trycua/cua # codebase.md

This is page 3 of 20. Use http://codebase.md/trycua/cua?page={x} to view the full context.

# Directory Structure

```
├── .cursorignore
├── .dockerignore
├── .editorconfig
├── .gitattributes
├── .github
│   ├── FUNDING.yml
│   ├── scripts
│   │   ├── get_pyproject_version.py
│   │   └── tests
│   │       ├── __init__.py
│   │       ├── README.md
│   │       └── test_get_pyproject_version.py
│   └── workflows
│       ├── bump-version.yml
│       ├── ci-lume.yml
│       ├── docker-publish-cua-linux.yml
│       ├── docker-publish-cua-windows.yml
│       ├── docker-publish-kasm.yml
│       ├── docker-publish-xfce.yml
│       ├── docker-reusable-publish.yml
│       ├── link-check.yml
│       ├── lint.yml
│       ├── npm-publish-cli.yml
│       ├── npm-publish-computer.yml
│       ├── npm-publish-core.yml
│       ├── publish-lume.yml
│       ├── pypi-publish-agent.yml
│       ├── pypi-publish-computer-server.yml
│       ├── pypi-publish-computer.yml
│       ├── pypi-publish-core.yml
│       ├── pypi-publish-mcp-server.yml
│       ├── pypi-publish-som.yml
│       ├── pypi-reusable-publish.yml
│       ├── python-tests.yml
│       ├── test-cua-models.yml
│       └── test-validation-script.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .prettierignore
├── .prettierrc.yaml
├── .vscode
│   ├── docs.code-workspace
│   ├── extensions.json
│   ├── launch.json
│   ├── libs-ts.code-workspace
│   ├── lume.code-workspace
│   ├── lumier.code-workspace
│   ├── py.code-workspace
│   └── settings.json
├── blog
│   ├── app-use.md
│   ├── assets
│   │   ├── composite-agents.png
│   │   ├── docker-ubuntu-support.png
│   │   ├── hack-booth.png
│   │   ├── hack-closing-ceremony.jpg
│   │   ├── hack-cua-ollama-hud.jpeg
│   │   ├── hack-leaderboard.png
│   │   ├── hack-the-north.png
│   │   ├── hack-winners.jpeg
│   │   ├── hack-workshop.jpeg
│   │   ├── hud-agent-evals.png
│   │   └── trajectory-viewer.jpeg
│   ├── bringing-computer-use-to-the-web.md
│   ├── build-your-own-operator-on-macos-1.md
│   ├── build-your-own-operator-on-macos-2.md
│   ├── cloud-windows-ga-macos-preview.md
│   ├── composite-agents.md
│   ├── computer-use-agents-for-growth-hacking.md
│   ├── cua-hackathon.md
│   ├── cua-playground-preview.md
│   ├── cua-vlm-router.md
│   ├── hack-the-north.md
│   ├── hud-agent-evals.md
│   ├── human-in-the-loop.md
│   ├── introducing-cua-cli.md
│   ├── introducing-cua-cloud-containers.md
│   ├── lume-to-containerization.md
│   ├── neurips-2025-cua-papers.md
│   ├── sandboxed-python-execution.md
│   ├── training-computer-use-models-trajectories-1.md
│   ├── trajectory-viewer.md
│   ├── ubuntu-docker-support.md
│   └── windows-sandbox.md
├── CONTRIBUTING.md
├── Development.md
├── Dockerfile
├── docs
│   ├── .env.example
│   ├── .gitignore
│   ├── content
│   │   └── docs
│   │       ├── agent-sdk
│   │       │   ├── agent-loops.mdx
│   │       │   ├── benchmarks
│   │       │   │   ├── index.mdx
│   │       │   │   ├── interactive.mdx
│   │       │   │   ├── introduction.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── osworld-verified.mdx
│   │       │   │   ├── screenspot-pro.mdx
│   │       │   │   └── screenspot-v2.mdx
│   │       │   ├── callbacks
│   │       │   │   ├── agent-lifecycle.mdx
│   │       │   │   ├── cost-saving.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── logging.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── pii-anonymization.mdx
│   │       │   │   └── trajectories.mdx
│   │       │   ├── chat-history.mdx
│   │       │   ├── custom-tools.mdx
│   │       │   ├── customizing-computeragent.mdx
│   │       │   ├── integrations
│   │       │   │   ├── hud.mdx
│   │       │   │   ├── meta.json
│   │       │   │   └── observability.mdx
│   │       │   ├── mcp-server
│   │       │   │   ├── client-integrations.mdx
│   │       │   │   ├── configuration.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   ├── llm-integrations.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── tools.mdx
│   │       │   │   └── usage.mdx
│   │       │   ├── message-format.mdx
│   │       │   ├── meta.json
│   │       │   ├── migration-guide.mdx
│   │       │   ├── prompt-caching.mdx
│   │       │   ├── supported-agents
│   │       │   │   ├── composed-agents.mdx
│   │       │   │   ├── computer-use-agents.mdx
│   │       │   │   ├── grounding-models.mdx
│   │       │   │   ├── human-in-the-loop.mdx
│   │       │   │   └── meta.json
│   │       │   ├── supported-model-providers
│   │       │   │   ├── cua-vlm-router.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   └── local-models.mdx
│   │       │   ├── telemetry.mdx
│   │       │   └── usage-tracking.mdx
│   │       ├── cli-playbook
│   │       │   ├── commands.mdx
│   │       │   ├── index.mdx
│   │       │   └── meta.json
│   │       ├── computer-sdk
│   │       │   ├── cloud-vm-management.mdx
│   │       │   ├── commands.mdx
│   │       │   ├── computer-server
│   │       │   │   ├── Commands.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── REST-API.mdx
│   │       │   │   └── WebSocket-API.mdx
│   │       │   ├── computer-ui.mdx
│   │       │   ├── computers.mdx
│   │       │   ├── custom-computer-handlers.mdx
│   │       │   ├── meta.json
│   │       │   ├── sandboxed-python.mdx
│   │       │   └── tracing-api.mdx
│   │       ├── example-usecases
│   │       │   ├── form-filling.mdx
│   │       │   ├── gemini-complex-ui-navigation.mdx
│   │       │   ├── meta.json
│   │       │   ├── post-event-contact-export.mdx
│   │       │   └── windows-app-behind-vpn.mdx
│   │       ├── get-started
│   │       │   ├── meta.json
│   │       │   └── quickstart.mdx
│   │       ├── index.mdx
│   │       ├── macos-vm-cli-playbook
│   │       │   ├── lume
│   │       │   │   ├── cli-reference.mdx
│   │       │   │   ├── faq.md
│   │       │   │   ├── http-api.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   ├── meta.json
│   │       │   │   └── prebuilt-images.mdx
│   │       │   ├── lumier
│   │       │   │   ├── building-lumier.mdx
│   │       │   │   ├── docker-compose.mdx
│   │       │   │   ├── docker.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   └── meta.json
│   │       │   └── meta.json
│   │       └── meta.json
│   ├── next.config.mjs
│   ├── package-lock.json
│   ├── package.json
│   ├── pnpm-lock.yaml
│   ├── postcss.config.mjs
│   ├── public
│   │   └── img
│   │       ├── agent_gradio_ui.png
│   │       ├── agent.png
│   │       ├── bg-dark.jpg
│   │       ├── bg-light.jpg
│   │       ├── cli.png
│   │       ├── computer.png
│   │       ├── grounding-with-gemini3.gif
│   │       ├── hero.png
│   │       ├── laminar_trace_example.png
│   │       ├── som_box_threshold.png
│   │       └── som_iou_threshold.png
│   ├── README.md
│   ├── source.config.ts
│   ├── src
│   │   ├── app
│   │   │   ├── (home)
│   │   │   │   ├── [[...slug]]
│   │   │   │   │   └── page.tsx
│   │   │   │   └── layout.tsx
│   │   │   ├── api
│   │   │   │   ├── posthog
│   │   │   │   │   └── [...path]
│   │   │   │   │       └── route.ts
│   │   │   │   └── search
│   │   │   │       └── route.ts
│   │   │   ├── favicon.ico
│   │   │   ├── global.css
│   │   │   ├── layout.config.tsx
│   │   │   ├── layout.tsx
│   │   │   ├── llms.mdx
│   │   │   │   └── [[...slug]]
│   │   │   │       └── route.ts
│   │   │   ├── llms.txt
│   │   │   │   └── route.ts
│   │   │   ├── robots.ts
│   │   │   └── sitemap.ts
│   │   ├── assets
│   │   │   ├── discord-black.svg
│   │   │   ├── discord-white.svg
│   │   │   ├── logo-black.svg
│   │   │   └── logo-white.svg
│   │   ├── components
│   │   │   ├── analytics-tracker.tsx
│   │   │   ├── cookie-consent.tsx
│   │   │   ├── doc-actions-menu.tsx
│   │   │   ├── editable-code-block.tsx
│   │   │   ├── footer.tsx
│   │   │   ├── hero.tsx
│   │   │   ├── iou.tsx
│   │   │   ├── mermaid.tsx
│   │   │   └── page-feedback.tsx
│   │   ├── lib
│   │   │   ├── llms.ts
│   │   │   └── source.ts
│   │   ├── mdx-components.tsx
│   │   └── providers
│   │       └── posthog-provider.tsx
│   └── tsconfig.json
├── examples
│   ├── agent_examples.py
│   ├── agent_ui_examples.py
│   ├── browser_tool_example.py
│   ├── cloud_api_examples.py
│   ├── computer_examples_windows.py
│   ├── computer_examples.py
│   ├── computer_ui_examples.py
│   ├── computer-example-ts
│   │   ├── .env.example
│   │   ├── .gitignore
│   │   ├── package-lock.json
│   │   ├── package.json
│   │   ├── pnpm-lock.yaml
│   │   ├── README.md
│   │   ├── src
│   │   │   ├── helpers.ts
│   │   │   └── index.ts
│   │   └── tsconfig.json
│   ├── docker_examples.py
│   ├── evals
│   │   ├── hud_eval_examples.py
│   │   └── wikipedia_most_linked.txt
│   ├── pylume_examples.py
│   ├── sandboxed_functions_examples.py
│   ├── som_examples.py
│   ├── tracing_examples.py
│   ├── utils.py
│   └── winsandbox_example.py
├── img
│   ├── agent_gradio_ui.png
│   ├── agent.png
│   ├── cli.png
│   ├── computer.png
│   ├── logo_black.png
│   └── logo_white.png
├── libs
│   ├── kasm
│   │   ├── Dockerfile
│   │   ├── LICENSE
│   │   ├── README.md
│   │   └── src
│   │       └── ubuntu
│   │           └── install
│   │               └── firefox
│   │                   ├── custom_startup.sh
│   │                   ├── firefox.desktop
│   │                   └── install_firefox.sh
│   ├── lume
│   │   ├── .cursorignore
│   │   ├── CONTRIBUTING.md
│   │   ├── Development.md
│   │   ├── img
│   │   │   └── cli.png
│   │   ├── Package.resolved
│   │   ├── Package.swift
│   │   ├── README.md
│   │   ├── resources
│   │   │   └── lume.entitlements
│   │   ├── scripts
│   │   │   ├── build
│   │   │   │   ├── build-debug.sh
│   │   │   │   ├── build-release-notarized.sh
│   │   │   │   └── build-release.sh
│   │   │   └── install.sh
│   │   ├── src
│   │   │   ├── Commands
│   │   │   │   ├── Clone.swift
│   │   │   │   ├── Config.swift
│   │   │   │   ├── Create.swift
│   │   │   │   ├── Delete.swift
│   │   │   │   ├── Get.swift
│   │   │   │   ├── Images.swift
│   │   │   │   ├── IPSW.swift
│   │   │   │   ├── List.swift
│   │   │   │   ├── Logs.swift
│   │   │   │   ├── Options
│   │   │   │   │   └── FormatOption.swift
│   │   │   │   ├── Prune.swift
│   │   │   │   ├── Pull.swift
│   │   │   │   ├── Push.swift
│   │   │   │   ├── Run.swift
│   │   │   │   ├── Serve.swift
│   │   │   │   ├── Set.swift
│   │   │   │   └── Stop.swift
│   │   │   ├── ContainerRegistry
│   │   │   │   ├── ImageContainerRegistry.swift
│   │   │   │   ├── ImageList.swift
│   │   │   │   └── ImagesPrinter.swift
│   │   │   ├── Errors
│   │   │   │   └── Errors.swift
│   │   │   ├── FileSystem
│   │   │   │   ├── Home.swift
│   │   │   │   ├── Settings.swift
│   │   │   │   ├── VMConfig.swift
│   │   │   │   ├── VMDirectory.swift
│   │   │   │   └── VMLocation.swift
│   │   │   ├── LumeController.swift
│   │   │   ├── Main.swift
│   │   │   ├── Server
│   │   │   │   ├── Handlers.swift
│   │   │   │   ├── HTTP.swift
│   │   │   │   ├── Requests.swift
│   │   │   │   ├── Responses.swift
│   │   │   │   └── Server.swift
│   │   │   ├── Utils
│   │   │   │   ├── CommandRegistry.swift
│   │   │   │   ├── CommandUtils.swift
│   │   │   │   ├── Logger.swift
│   │   │   │   ├── NetworkUtils.swift
│   │   │   │   ├── Path.swift
│   │   │   │   ├── ProcessRunner.swift
│   │   │   │   ├── ProgressLogger.swift
│   │   │   │   ├── String.swift
│   │   │   │   └── Utils.swift
│   │   │   ├── Virtualization
│   │   │   │   ├── DarwinImageLoader.swift
│   │   │   │   ├── DHCPLeaseParser.swift
│   │   │   │   ├── ImageLoaderFactory.swift
│   │   │   │   └── VMVirtualizationService.swift
│   │   │   ├── VM
│   │   │   │   ├── DarwinVM.swift
│   │   │   │   ├── LinuxVM.swift
│   │   │   │   ├── VM.swift
│   │   │   │   ├── VMDetails.swift
│   │   │   │   ├── VMDetailsPrinter.swift
│   │   │   │   ├── VMDisplayResolution.swift
│   │   │   │   └── VMFactory.swift
│   │   │   └── VNC
│   │   │       ├── PassphraseGenerator.swift
│   │   │       └── VNCService.swift
│   │   └── tests
│   │       ├── Mocks
│   │       │   ├── MockVM.swift
│   │       │   ├── MockVMVirtualizationService.swift
│   │       │   └── MockVNCService.swift
│   │       ├── VM
│   │       │   └── VMDetailsPrinterTests.swift
│   │       ├── VMTests.swift
│   │       ├── VMVirtualizationServiceTests.swift
│   │       └── VNCServiceTests.swift
│   ├── lumier
│   │   ├── .dockerignore
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   └── src
│   │       ├── bin
│   │       │   └── entry.sh
│   │       ├── config
│   │       │   └── constants.sh
│   │       ├── hooks
│   │       │   └── on-logon.sh
│   │       └── lib
│   │           ├── utils.sh
│   │           └── vm.sh
│   ├── python
│   │   ├── agent
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── agent
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── adapters
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── cua_adapter.py
│   │   │   │   │   ├── huggingfacelocal_adapter.py
│   │   │   │   │   ├── human_adapter.py
│   │   │   │   │   ├── mlxvlm_adapter.py
│   │   │   │   │   └── models
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── generic.py
│   │   │   │   │       ├── internvl.py
│   │   │   │   │       ├── opencua.py
│   │   │   │   │       └── qwen2_5_vl.py
│   │   │   │   ├── agent.py
│   │   │   │   ├── callbacks
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── budget_manager.py
│   │   │   │   │   ├── image_retention.py
│   │   │   │   │   ├── logging.py
│   │   │   │   │   ├── operator_validator.py
│   │   │   │   │   ├── pii_anonymization.py
│   │   │   │   │   ├── prompt_instructions.py
│   │   │   │   │   ├── telemetry.py
│   │   │   │   │   └── trajectory_saver.py
│   │   │   │   ├── cli.py
│   │   │   │   ├── computers
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── cua.py
│   │   │   │   │   └── custom.py
│   │   │   │   ├── decorators.py
│   │   │   │   ├── human_tool
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── __main__.py
│   │   │   │   │   ├── server.py
│   │   │   │   │   └── ui.py
│   │   │   │   ├── integrations
│   │   │   │   │   └── hud
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── agent.py
│   │   │   │   │       └── proxy.py
│   │   │   │   ├── loops
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── anthropic.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── composed_grounded.py
│   │   │   │   │   ├── gelato.py
│   │   │   │   │   ├── gemini.py
│   │   │   │   │   ├── generic_vlm.py
│   │   │   │   │   ├── glm45v.py
│   │   │   │   │   ├── gta1.py
│   │   │   │   │   ├── holo.py
│   │   │   │   │   ├── internvl.py
│   │   │   │   │   ├── model_types.csv
│   │   │   │   │   ├── moondream3.py
│   │   │   │   │   ├── omniparser.py
│   │   │   │   │   ├── openai.py
│   │   │   │   │   ├── opencua.py
│   │   │   │   │   ├── uiins.py
│   │   │   │   │   ├── uitars.py
│   │   │   │   │   └── uitars2.py
│   │   │   │   ├── proxy
│   │   │   │   │   ├── examples.py
│   │   │   │   │   └── handlers.py
│   │   │   │   ├── responses.py
│   │   │   │   ├── tools
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── browser_tool.py
│   │   │   │   ├── types.py
│   │   │   │   └── ui
│   │   │   │       ├── __init__.py
│   │   │   │       ├── __main__.py
│   │   │   │       └── gradio
│   │   │   │           ├── __init__.py
│   │   │   │           ├── app.py
│   │   │   │           └── ui_components.py
│   │   │   ├── benchmarks
│   │   │   │   ├── .gitignore
│   │   │   │   ├── contrib.md
│   │   │   │   ├── interactive.py
│   │   │   │   ├── models
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   └── gta1.py
│   │   │   │   ├── README.md
│   │   │   │   ├── ss-pro.py
│   │   │   │   ├── ss-v2.py
│   │   │   │   └── utils.py
│   │   │   ├── example.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_computer_agent.py
│   │   ├── bench-ui
│   │   │   ├── bench_ui
│   │   │   │   ├── __init__.py
│   │   │   │   ├── api.py
│   │   │   │   └── child.py
│   │   │   ├── examples
│   │   │   │   ├── folder_example.py
│   │   │   │   ├── gui
│   │   │   │   │   ├── index.html
│   │   │   │   │   ├── logo.svg
│   │   │   │   │   └── styles.css
│   │   │   │   ├── output_overlay.png
│   │   │   │   └── simple_example.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── tests
│   │   │       └── test_port_detection.py
│   │   ├── computer
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── computer
│   │   │   │   ├── __init__.py
│   │   │   │   ├── computer.py
│   │   │   │   ├── diorama_computer.py
│   │   │   │   ├── helpers.py
│   │   │   │   ├── interface
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── generic.py
│   │   │   │   │   ├── linux.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   ├── models.py
│   │   │   │   │   └── windows.py
│   │   │   │   ├── logger.py
│   │   │   │   ├── models.py
│   │   │   │   ├── providers
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── cloud
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── docker
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── lume
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── lume_api.py
│   │   │   │   │   ├── lumier
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── types.py
│   │   │   │   │   └── winsandbox
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── provider.py
│   │   │   │   │       └── setup_script.ps1
│   │   │   │   ├── tracing_wrapper.py
│   │   │   │   ├── tracing.py
│   │   │   │   ├── ui
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── __main__.py
│   │   │   │   │   └── gradio
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       └── app.py
│   │   │   │   └── utils.py
│   │   │   ├── poetry.toml
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_computer.py
│   │   ├── computer-server
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── computer_server
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── browser.py
│   │   │   │   ├── cli.py
│   │   │   │   ├── diorama
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── diorama_computer.py
│   │   │   │   │   ├── diorama.py
│   │   │   │   │   ├── draw.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   └── safezone.py
│   │   │   │   ├── handlers
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── generic.py
│   │   │   │   │   ├── linux.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   └── windows.py
│   │   │   │   ├── main.py
│   │   │   │   ├── server.py
│   │   │   │   ├── utils
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── wallpaper.py
│   │   │   │   └── watchdog.py
│   │   │   ├── examples
│   │   │   │   ├── __init__.py
│   │   │   │   └── usage_example.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   ├── run_server.py
│   │   │   ├── test_connection.py
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_server.py
│   │   ├── core
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── core
│   │   │   │   ├── __init__.py
│   │   │   │   └── telemetry
│   │   │   │       ├── __init__.py
│   │   │   │       └── posthog.py
│   │   │   ├── poetry.toml
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_telemetry.py
│   │   ├── mcp-server
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── build-extension.py
│   │   │   ├── CONCURRENT_SESSIONS.md
│   │   │   ├── desktop-extension
│   │   │   │   ├── cua-extension.mcpb
│   │   │   │   ├── desktop_extension.png
│   │   │   │   ├── manifest.json
│   │   │   │   ├── README.md
│   │   │   │   ├── requirements.txt
│   │   │   │   ├── run_server.sh
│   │   │   │   └── setup.py
│   │   │   ├── mcp_server
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── server.py
│   │   │   │   └── session_manager.py
│   │   │   ├── pdm.lock
│   │   │   ├── pyproject.toml
│   │   │   ├── QUICK_TEST_COMMANDS.sh
│   │   │   ├── quick_test_local_option.py
│   │   │   ├── README.md
│   │   │   ├── scripts
│   │   │   │   ├── install_mcp_server.sh
│   │   │   │   └── start_mcp_server.sh
│   │   │   ├── test_mcp_server_local_option.py
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_mcp_server.py
│   │   ├── pylume
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_pylume.py
│   │   └── som
│   │       ├── .bumpversion.cfg
│   │       ├── LICENSE
│   │       ├── poetry.toml
│   │       ├── pyproject.toml
│   │       ├── README.md
│   │       ├── som
│   │       │   ├── __init__.py
│   │       │   ├── detect.py
│   │       │   ├── detection.py
│   │       │   ├── models.py
│   │       │   ├── ocr.py
│   │       │   ├── util
│   │       │   │   └── utils.py
│   │       │   └── visualization.py
│   │       └── tests
│   │           ├── conftest.py
│   │           └── test_omniparser.py
│   ├── qemu-docker
│   │   ├── linux
│   │   │   ├── Dockerfile
│   │   │   ├── README.md
│   │   │   └── src
│   │   │       ├── entry.sh
│   │   │       └── vm
│   │   │           ├── image
│   │   │           │   └── README.md
│   │   │           └── setup
│   │   │               ├── install.sh
│   │   │               ├── setup-cua-server.sh
│   │   │               └── setup.sh
│   │   ├── README.md
│   │   └── windows
│   │       ├── Dockerfile
│   │       ├── README.md
│   │       └── src
│   │           ├── entry.sh
│   │           └── vm
│   │               ├── image
│   │               │   └── README.md
│   │               └── setup
│   │                   ├── install.bat
│   │                   ├── on-logon.ps1
│   │                   ├── setup-cua-server.ps1
│   │                   ├── setup-utils.psm1
│   │                   └── setup.ps1
│   ├── typescript
│   │   ├── .gitignore
│   │   ├── .nvmrc
│   │   ├── agent
│   │   │   ├── examples
│   │   │   │   ├── playground-example.html
│   │   │   │   └── README.md
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── client.ts
│   │   │   │   ├── index.ts
│   │   │   │   └── types.ts
│   │   │   ├── tests
│   │   │   │   └── client.test.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── computer
│   │   │   ├── .editorconfig
│   │   │   ├── .gitattributes
│   │   │   ├── .gitignore
│   │   │   ├── LICENSE
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── computer
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── providers
│   │   │   │   │   │   ├── base.ts
│   │   │   │   │   │   ├── cloud.ts
│   │   │   │   │   │   └── index.ts
│   │   │   │   │   └── types.ts
│   │   │   │   ├── index.ts
│   │   │   │   ├── interface
│   │   │   │   │   ├── base.ts
│   │   │   │   │   ├── factory.ts
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── linux.ts
│   │   │   │   │   ├── macos.ts
│   │   │   │   │   └── windows.ts
│   │   │   │   └── types.ts
│   │   │   ├── tests
│   │   │   │   ├── computer
│   │   │   │   │   └── cloud.test.ts
│   │   │   │   ├── interface
│   │   │   │   │   ├── factory.test.ts
│   │   │   │   │   ├── index.test.ts
│   │   │   │   │   ├── linux.test.ts
│   │   │   │   │   ├── macos.test.ts
│   │   │   │   │   └── windows.test.ts
│   │   │   │   └── setup.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── core
│   │   │   ├── .editorconfig
│   │   │   ├── .gitattributes
│   │   │   ├── .gitignore
│   │   │   ├── LICENSE
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── index.ts
│   │   │   │   └── telemetry
│   │   │   │       ├── clients
│   │   │   │       │   ├── index.ts
│   │   │   │       │   └── posthog.ts
│   │   │   │       └── index.ts
│   │   │   ├── tests
│   │   │   │   └── telemetry.test.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── cua-cli
│   │   │   ├── .gitignore
│   │   │   ├── .prettierrc
│   │   │   ├── bun.lock
│   │   │   ├── CLAUDE.md
│   │   │   ├── index.ts
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── auth.ts
│   │   │   │   ├── cli.ts
│   │   │   │   ├── commands
│   │   │   │   │   ├── auth.ts
│   │   │   │   │   └── sandbox.ts
│   │   │   │   ├── config.ts
│   │   │   │   ├── http.ts
│   │   │   │   ├── storage.ts
│   │   │   │   └── util.ts
│   │   │   └── tsconfig.json
│   │   ├── package.json
│   │   ├── pnpm-lock.yaml
│   │   ├── pnpm-workspace.yaml
│   │   └── README.md
│   └── xfce
│       ├── .dockerignore
│       ├── .gitignore
│       ├── Development.md
│       ├── Dockerfile
│       ├── Dockerfile.dev
│       ├── README.md
│       └── src
│           ├── scripts
│           │   ├── resize-display.sh
│           │   ├── start-computer-server.sh
│           │   ├── start-novnc.sh
│           │   ├── start-vnc.sh
│           │   └── xstartup.sh
│           ├── supervisor
│           │   └── supervisord.conf
│           └── xfce-config
│               ├── helpers.rc
│               ├── xfce4-power-manager.xml
│               └── xfce4-session.xml
├── LICENSE.md
├── Makefile
├── notebooks
│   ├── agent_nb.ipynb
│   ├── blog
│   │   ├── build-your-own-operator-on-macos-1.ipynb
│   │   └── build-your-own-operator-on-macos-2.ipynb
│   ├── composite_agents_docker_nb.ipynb
│   ├── computer_nb.ipynb
│   ├── computer_server_nb.ipynb
│   ├── customizing_computeragent.ipynb
│   ├── eval_osworld.ipynb
│   ├── ollama_nb.ipynb
│   ├── README.md
│   ├── sota_hackathon_cloud.ipynb
│   └── sota_hackathon.ipynb
├── package-lock.json
├── package.json
├── pnpm-lock.yaml
├── pyproject.toml
├── pyrightconfig.json
├── README.md
├── scripts
│   ├── install-cli.ps1
│   ├── install-cli.sh
│   ├── playground-docker.sh
│   ├── playground.sh
│   ├── run-docker-dev.sh
│   └── typescript-typecheck.js
├── TESTING.md
├── tests
│   ├── agent_loop_testing
│   │   ├── agent_test.py
│   │   └── README.md
│   ├── pytest.ini
│   ├── shell_cmd.py
│   ├── test_files.py
│   ├── test_mcp_server_session_management.py
│   ├── test_mcp_server_streaming.py
│   ├── test_shell_bash.py
│   ├── test_telemetry.py
│   ├── test_tracing.py
│   ├── test_venv.py
│   └── test_watchdog.py
└── uv.lock
```

# Files

--------------------------------------------------------------------------------
/.github/workflows/pypi-publish-core.yml:
--------------------------------------------------------------------------------

```yaml
name: Publish Core Package

on:
  push:
    tags:
      - "core-v*"
  workflow_dispatch:
    inputs:
      version:
        description: "Version to publish (without v prefix)"
        required: true
        default: "0.1.0"
  workflow_call:
    inputs:
      version:
        description: "Version to publish"
        required: true
        type: string

# Adding permissions at workflow level
permissions:
  contents: write

jobs:
  prepare:
    runs-on: macos-latest
    outputs:
      version: ${{ steps.get-version.outputs.version }}
    steps:
      - uses: actions/checkout@v4

      - name: Determine version
        id: get-version
        run: |
          if [ "${{ github.event_name }}" == "push" ]; then
            # Extract version from tag (for package-specific tags)
            if [[ "${{ github.ref }}" =~ ^refs/tags/core-v([0-9]+\.[0-9]+\.[0-9]+) ]]; then
              VERSION=${BASH_REMATCH[1]}
            else
              echo "Invalid tag format for core"
              exit 1
            fi
          elif [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
            # Use version from workflow dispatch
            VERSION=${{ github.event.inputs.version }}
          else
            # Use version from workflow_call
            VERSION=${{ inputs.version }}
          fi
          echo "VERSION=$VERSION"
          echo "version=$VERSION" >> $GITHUB_OUTPUT

  publish:
    needs: prepare
    uses: ./.github/workflows/pypi-reusable-publish.yml
    with:
      package_name: "core"
      package_dir: "libs/python/core"
      version: ${{ needs.prepare.outputs.version }}
      is_lume_package: false
      base_package_name: "cua-core"
    secrets:
      PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}

```

--------------------------------------------------------------------------------
/libs/lume/src/VM/LinuxVM.swift:
--------------------------------------------------------------------------------

```swift
import Foundation

/// Linux-specific virtual machine implementation
@MainActor
final class LinuxVM: VM {
    override init(
        vmDirContext: VMDirContext,
        virtualizationServiceFactory: @escaping (VMVirtualizationServiceContext) throws -> VMVirtualizationService = { try LinuxVirtualizationService(configuration: $0) },
        vncServiceFactory: @escaping (VMDirectory) -> VNCService = { DefaultVNCService(vmDirectory: $0) }
    ) {
        super.init(
            vmDirContext: vmDirContext,
            virtualizationServiceFactory: virtualizationServiceFactory,
            vncServiceFactory: vncServiceFactory
        )
    }

    override func getOSType() -> String {
        return "linux"
    }
    
    override func setup(
        ipswPath: String,
        cpuCount: Int,
        memorySize: UInt64,
        diskSize: UInt64,
        display: String
    ) async throws {

        try setDiskSize(diskSize)

        let service = try virtualizationServiceFactory(
            try createVMVirtualizationServiceContext(
                cpuCount: cpuCount,
                memorySize: memorySize,
                display: display
            )
        )
        guard let linuxService = service as? LinuxVirtualizationService else {
            throw VMError.internalError("Installation requires LinuxVirtualizationService")
        }

        try updateVMConfig(vmConfig: try VMConfig(
            os: getOSType(),
            cpuCount: cpuCount,
            memorySize: memorySize,
            diskSize: diskSize,
            macAddress: linuxService.generateMacAddress(),
            display: display
        ))

        // Create NVRAM store for EFI
        try linuxService.createNVRAM(at: vmDirContext.nvramPath)
    }
} 
```

--------------------------------------------------------------------------------
/libs/lume/tests/Mocks/MockVMVirtualizationService.swift:
--------------------------------------------------------------------------------

```swift
import Foundation
import Virtualization
@testable import lume

@MainActor
final class MockVMVirtualizationService: VMVirtualizationService {
    private(set) var currentState: VZVirtualMachine.State = .stopped
    private(set) var startCallCount = 0
    private(set) var stopCallCount = 0
    private(set) var pauseCallCount = 0
    private(set) var resumeCallCount = 0
    
    var state: VZVirtualMachine.State {
        currentState
    }
    
    private var _shouldFailNextOperation = false
    private var _operationError: Error = VMError.internalError("Mock operation failed")
    
    nonisolated func configure(shouldFail: Bool, error: Error = VMError.internalError("Mock operation failed")) async {
        await setConfiguration(shouldFail: shouldFail, error: error)
    }
    
    @MainActor
    private func setConfiguration(shouldFail: Bool, error: Error) {
        _shouldFailNextOperation = shouldFail
        _operationError = error
    }
    
    func start() async throws {
        startCallCount += 1
        if _shouldFailNextOperation {
            throw _operationError
        }
        currentState = .running
    }
    
    func stop() async throws {
        stopCallCount += 1
        if _shouldFailNextOperation {
            throw _operationError
        }
        currentState = .stopped
    }
    
    func pause() async throws {
        pauseCallCount += 1
        if _shouldFailNextOperation {
            throw _operationError
        }
        currentState = .paused
    }
    
    func resume() async throws {
        resumeCallCount += 1
        if _shouldFailNextOperation {
            throw _operationError
        }
        currentState = .running
    }
    
    func getVirtualMachine() -> Any {
        return "mock_vm"
    }
} 
```

--------------------------------------------------------------------------------
/docs/content/docs/macos-vm-cli-playbook/lume/installation.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Installation
description: Installation instructions for the current version of the Lume CLI.
---

## Quickstart

Install and run a prebuilt macOS sandbox in two commands:

```bash
# Install Lume
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)"
# Pull & start a macOS image
lume run macos-sequoia-vanilla:latest
```

<Callout title="Security Note">
  All prebuilt images use the default password `lume`. Change this immediately after your first
  login using the `passwd` command.
</Callout>

**System Requirements**:

- Apple Silicon Mac (M1, M2, M3, etc.)
- macOS 13.0 or later
- At least 8GB of RAM (16GB recommended)
- At least 50GB of free disk space

## Install with Script

Install with a single command:

```bash
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)"
```

### Manual Start (No Background Service)

By default, Lume is installed as a background service that starts automatically on login. If you prefer to start the Lume API service manually when needed, you can use the `--no-background-service` option:

```bash
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh) --no-background-service"
```

<Callout title="Note">
  With this option, you'll need to manually start the Lume API service by running `lume serve` in
  your terminal whenever you need to use tools or libraries that rely on the Lume API (such as the
  Computer-Use Agent).
</Callout>

## Manual Download and Installation

You can also download the `lume.pkg.tar.gz` archive from the [latest release](https://github.com/trycua/cua/releases?q=lume&expanded=true), extract it, and install the package manually.

```

--------------------------------------------------------------------------------
/libs/qemu-docker/windows/src/entry.sh:
--------------------------------------------------------------------------------

```bash
#!/bin/bash

cleanup() {
  echo "Received signal, shutting down gracefully..."
  if [ -n "$VM_PID" ]; then
    kill -TERM "$VM_PID" 2>/dev/null
    wait "$VM_PID" 2>/dev/null
  fi
  exit 0
}

# Install trap for signals
trap cleanup SIGTERM SIGINT SIGHUP SIGQUIT

# Create windows.boot file if it doesn't exist (required for proper boot)
if [ -d "/storage" -a ! -f "/storage/windows.boot" ]; then
  echo "Creating windows.boot file in /storage..."
  touch /storage/windows.boot
fi

# Start the VM in the background
echo "Starting Windows VM..."
/usr/bin/tini -s /run/entry.sh &
VM_PID=$!
echo "Live stream accessible at localhost:8006"

echo "Waiting for Windows to boot and CUA computer-server to start..."

VM_IP=""
while true; do
  # Wait from VM and get the IP
  if [ -z "$VM_IP" ]; then
    VM_IP=$(ps aux | grep dnsmasq | grep -oP '(?<=--dhcp-range=)[0-9.]+' | head -1)
    if [ -n "$VM_IP" ]; then
      echo "Detected VM IP: $VM_IP"
    else
      echo "Waiting for VM to start..."
      sleep 5
      continue
    fi
  fi

  # Check if server is ready
  response=$(curl --write-out '%{http_code}' --silent --output /dev/null $VM_IP:5000/status)

  if [ "${response:-0}" -eq 200 ]; then
    break
  fi

  echo "Waiting for CUA computer-server to be ready. This might take a while..."
  sleep 5
done

echo "VM is up and running, and the CUA Computer Server is ready!"

echo "Computer server accessible at localhost:5000"

# Detect initial setup by presence of custom ISO
CUSTOM_ISO=$(find / -maxdepth 1 -type f -iname "*.iso" -print -quit 2>/dev/null || true)
if [ -n "$CUSTOM_ISO" ]; then
  echo "Preparation complete. Shutting down gracefully..."
  cleanup
fi

# Keep container alive for golden image boots
echo "Container running. Press Ctrl+C to stop."
tail -f /dev/null
```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/usage-tracking.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Usage Tracking
sidebar_position: 9
description: How to track token usage and cost in ComputerAgent and agent loops.
---

Tracking usage is important for monitoring costs and optimizing your agent workflows. The ComputerAgent API provides easy access to token and cost usage for every run.

## Accessing Usage Data

Whenever you run an agent loop, each result contains a `usage` dictionary with token and cost information:

```python
async for result in agent.run(...):
    print(result["usage"])
    # Example output:
    # {
    #     "prompt_tokens": 150,
    #     "completion_tokens": 75,
    #     "total_tokens": 225,
    #     "response_cost": 0.01,
    # }
```

- `prompt_tokens`: Number of tokens in the prompt
- `completion_tokens`: Number of tokens in the agent's response
- `total_tokens`: Total tokens used
- `response_cost`: Estimated cost (USD) for this turn

## Tracking Total Usage

You can accumulate usage across multiple turns:

```python
total_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0, "response_cost": 0.0}
async for result in agent.run(...):
    for k in total_usage:
        total_usage[k] += result["usage"].get(k, 0)
print("Total usage:", total_usage)
```

## Using Callbacks for Usage Tracking

You can also use a callback to automatically track usage. Implement the `on_usage` method in your callback class:

```python
from agent.callbacks import AsyncCallbackHandler

class UsageTrackerCallback(AsyncCallbackHandler):
    async def on_usage(self, usage):
        print("Usage update:", usage)

agent = ComputerAgent(
    ...,
    callbacks=[UsageTrackerCallback()]
)
```

See also: [Budget Manager Callbacks](./callbacks/cost-saving)

## See Also

- [Prompt Caching](./prompt-caching)
- [Callbacks](./callbacks)

```

--------------------------------------------------------------------------------
/libs/qemu-docker/windows/src/vm/setup/setup.ps1:
--------------------------------------------------------------------------------

```
$ErrorActionPreference = "Continue"

$scriptFolder = "C:\OEM"

Import-Module (Join-Path $scriptFolder -ChildPath "setup-utils.psm1")

Set-StrictMode -Version Latest

# Set TLS version to 1.2 or higher
[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12 -bor [Net.SecurityProtocolType]::Tls13

# Install Git via Chocolatey
$ChocoExe = Resolve-ChocoPath
if ($ChocoExe) {
    Write-Host "Installing Git via Chocolatey..."
    try {
        & $ChocoExe install -y git | Out-Null
        Add-ToEnvPath -NewPath "C:\Program Files\Git\bin"
        Write-Host "Git installed successfully."
    } catch {
        Write-Host "Git install warning: $($_.Exception.Message)"
    }
} else {
    Write-Host "Chocolatey not available; skipping Git install"
}

# CUA Computer Server Setup
Write-Host "Setting up CUA Computer Server..."
$cuaServerSetupScript = Join-Path $scriptFolder -ChildPath "setup-cua-server.ps1"
if (Test-Path $cuaServerSetupScript) {
    & $cuaServerSetupScript
    Write-Host "CUA Computer Server setup completed."
} else {
    Write-Host "ERROR: setup-cua-server.ps1 not found at $cuaServerSetupScript"
}

# Register on-logon task
$onLogonTaskName = "WindowsArena_OnLogon"
$onLogonScriptPath = "$scriptFolder\on-logon.ps1"
if (Get-ScheduledTask -TaskName $onLogonTaskName -ErrorAction SilentlyContinue) {
    Write-Host "Scheduled task $onLogonTaskName already exists."
} else {
    Write-Host "Registering new task $onLogonTaskName..."
    Register-LogonTask -TaskName $onLogonTaskName -ScriptPath $onLogonScriptPath -LocalUser "Docker"
}

Start-Sleep -Seconds 10
Write-Host "Starting $onLogonTaskName task in background..."
Start-Process -WindowStyle Hidden -FilePath "powershell.exe" -ArgumentList "-Command", "Start-ScheduledTask -TaskName '$onLogonTaskName'"
```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/supported-model-providers/index.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Supported Model Providers
---

## Supported Models

### CUA VLM Router (Recommended)

Use CUA's cloud inference API for intelligent routing and cost optimization with a single API key. CUA manages all provider infrastructure and credentials for you.

```python
model="cua/anthropic/claude-sonnet-4.5"   # Claude Sonnet 4.5 (recommended)
model="cua/anthropic/claude-haiku-4.5"    # Claude Haiku 4.5 (faster)
```

**Benefits:**

- Single API key for multiple providers
- Cost tracking and optimization
- Fully managed infrastructure (no provider keys to manage)

[Learn more about CUA VLM Router →](/agent-sdk/supported-model-providers/cua-vlm-router)

---

### Anthropic Claude (Computer Use API - BYOK)

Direct access to Anthropic's Claude models using your own Anthropic API key (BYOK - Bring Your Own Key).

```python
model="anthropic/claude-3-7-sonnet-20250219"
model="anthropic/claude-opus-4-20250514"
model="anthropic/claude-sonnet-4-20250514"
```

**Setup:** Set `ANTHROPIC_API_KEY` environment variable with your Anthropic API key.

### OpenAI Computer Use Preview (BYOK)

Direct access to OpenAI's computer use models using your own OpenAI API key (BYOK).

```python
model="openai/computer-use-preview"
```

**Setup:** Set `OPENAI_API_KEY` environment variable with your OpenAI API key.

### UI-TARS (Local or Huggingface Inference)

Run UI-TARS models locally for privacy and offline use.

```python
model="huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B"
model="ollama_chat/0000/ui-tars-1.5-7b"
```

### Omniparser + Any LLM

Combine Omniparser for UI understanding with any LLM provider.

```python
model="omniparser+ollama_chat/mistral-small3.2"
model="omniparser+vertex_ai/gemini-pro"
model="omniparser+anthropic/claude-sonnet-4-5-20250929"
model="omniparser+openai/gpt-4o"
```

```

--------------------------------------------------------------------------------
/docs/src/app/layout.config.tsx:
--------------------------------------------------------------------------------

```typescript
import type { BaseLayoutProps } from 'fumadocs-ui/layouts/shared';

import Image from 'next/image';
import LogoBlack from '@/assets/logo-black.svg';
import LogoWhite from '@/assets/logo-white.svg';
import DiscordWhite from '@/assets/discord-white.svg';
import DiscordBlack from '@/assets/discord-black.svg';
import { HomeIcon } from 'lucide-react';

/**
 * Shared layout configurations
 *
 * you can customise layouts individually from:
 * Home Layout: app/(home)/layout.tsx
 * Docs Layout: app/docs/layout.tsx
 */
export const baseOptions: BaseLayoutProps = {
  nav: {
    title: (
      <>
        <Image
          width={30}
          height={30}
          src={LogoBlack}
          aria-label="Logo"
          className="block dark:hidden"
          alt="Logo"
        />
        <Image
          width={30}
          height={30}
          src={LogoWhite}
          aria-label="Logo"
          className="hidden dark:block"
          alt="Logo"
        />
        Cua
      </>
    ),
    url: 'https://cua.ai',
  },
  githubUrl: 'https://github.com/trycua/cua',
  links: [
    {
      url: 'https://cua.ai',
      text: 'Cua Home',
      type: 'icon',
      icon: <HomeIcon />,
      external: true,
    },
    {
      url: 'https://discord.com/invite/mVnXXpdE85',
      text: 'Discord',
      type: 'icon',
      icon: (
        <>
          <Image
            width={20}
            height={20}
            alt="Discord"
            className="hidden dark:block opacity-70 hover:opacity-100"
            src={DiscordWhite}
          />
          <Image
            width={20}
            height={20}
            alt="Discord"
            className="dark:hidden block opacity-55 hover:opacity-100"
            src={DiscordBlack}
          />
        </>
      ),
      external: true,
    },
  ],
};

```

--------------------------------------------------------------------------------
/docs/src/components/analytics-tracker.tsx:
--------------------------------------------------------------------------------

```typescript
'use client';

import { useEffect } from 'react';
import posthog from 'posthog-js';

export function AnalyticsTracker() {
  useEffect(() => {
    const handleClick = (e: MouseEvent) => {
      const target = e.target as HTMLElement;
      const link = target.closest('a');

      if (!link) return;

      const href = link.href;
      const text = link.textContent || link.getAttribute('aria-label') || '';

      if (href.includes('github.com/trycua')) {
        posthog.capture('github_link_clicked', {
          url: href,
          link_text: text,
          page: window.location.pathname,
        });
      }

      if (href.includes('discord.com/invite') || href.includes('discord.gg')) {
        posthog.capture('discord_link_clicked', {
          url: href,
          link_text: text,
          page: window.location.pathname,
        });
      }

      if (
        (href.includes('trycua.com') && !href.includes('trycua.com/docs')) ||
        href.includes('cua.ai')
      ) {
        posthog.capture('main_website_clicked', {
          url: href,
          link_text: text,
          page: window.location.pathname,
        });
      }

      if (link.hostname && link.hostname !== window.location.hostname) {
        if (
          href.includes('github.com/trycua') ||
          href.includes('discord.com') ||
          href.includes('trycua.com') ||
          href.includes('cua.ai')
        ) {
          return;
        }

        posthog.capture('external_link_clicked', {
          url: href,
          link_text: text,
          page: window.location.pathname,
          domain: link.hostname,
        });
      }
    };

    document.addEventListener('click', handleClick);

    return () => {
      document.removeEventListener('click', handleClick);
    };
  }, []);

  return null;
}

```

--------------------------------------------------------------------------------
/libs/python/computer-server/pyproject.toml:
--------------------------------------------------------------------------------

```toml
[build-system]
requires = ["pdm-backend"]
build-backend = "pdm.backend"

[project]
name = "cua-computer-server"
version = "0.1.31"

description = "Server component for the Computer-Use Interface (CUI) framework powering Cua"
authors = [
    { name = "TryCua", email = "[email protected]" }
]
readme = "README.md"
license = { text = "MIT" }
requires-python = ">=3.12,<3.14"
dependencies = [
    "fastapi>=0.111.0",
    "uvicorn[standard]>=0.27.0",
    "pydantic>=2.0.0",
    "pyautogui>=0.9.54",
    "pynput>=1.8.1",
    "pillow>=10.2.0",
    "aiohttp>=3.9.1",
    "pyperclip>=1.9.0",
    "websockets>=12.0",
    "pywinctl>=0.4.1",
    "playwright>=1.40.0",
    # OS-specific runtime deps
    "pyobjc-framework-Cocoa>=10.1; sys_platform == 'darwin'",
    "pyobjc-framework-Quartz>=10.1; sys_platform == 'darwin'",
    "pyobjc-framework-ApplicationServices>=10.1; sys_platform == 'darwin'",
    "python-xlib>=0.33; sys_platform == 'linux'",
    "pywin32>=310; sys_platform == 'win32'",
    "python-certifi-win32; sys_platform == 'win32'",
]

[project.optional-dependencies]
macos = [
    "pyobjc-framework-Cocoa>=10.1",
    "pyobjc-framework-Quartz>=10.1",
    "pyobjc-framework-ApplicationServices>=10.1"
]
linux = [
    "python-xlib>=0.33"
]
windows = [
    "pywin32>=310"
]

[project.urls]
homepage = "https://github.com/trycua/cua"
repository = "https://github.com/trycua/cua"

[project.scripts]
cua-computer-server = "computer_server:run_cli"

[tool.pdm]
distribution = true

[tool.pdm.build]
includes = ["computer_server"]
package-data = {"computer_server" = ["py.typed"]}

[tool.pdm.dev-dependencies]
test = [
    "pytest>=7.0.0",
    "pytest-asyncio>=0.23.0"
]
format = [
    "black>=23.0.0",
    "isort>=5.12.0"
]
dev = [
    "ruff>=0.0.241",
    "mypy>=0.971"
]

[tool.pdm.scripts]
api = "python -m computer_server"
```

--------------------------------------------------------------------------------
/libs/lume/src/Commands/Create.swift:
--------------------------------------------------------------------------------

```swift
import ArgumentParser
import Foundation
import Virtualization

// MARK: - Create Command

struct Create: AsyncParsableCommand {
    static let configuration = CommandConfiguration(
        abstract: "Create a new virtual machine"
    )

    @Argument(help: "Name for the virtual machine")
    var name: String

    @Option(
        help: "Operating system to install. Defaults to macOS.",
        completion: .list(["macOS", "linux"]))
    var os: String = "macOS"

    @Option(help: "Number of CPU cores", transform: { Int($0) ?? 4 })
    var cpu: Int = 4

    @Option(
        help: "Memory size, e.g., 8192MB or 8GB. Defaults to 8GB.", transform: { try parseSize($0) }
    )
    var memory: UInt64 = 8 * 1024 * 1024 * 1024

    @Option(
        help: "Disk size, e.g., 20480MB or 20GB. Defaults to 50GB.",
        transform: { try parseSize($0) })
    var diskSize: UInt64 = 50 * 1024 * 1024 * 1024

    @Option(help: "Display resolution in format WIDTHxHEIGHT. Defaults to 1024x768.")
    var display: VMDisplayResolution = VMDisplayResolution(string: "1024x768")!

    @Option(
        help:
            "Path to macOS restore image (IPSW), or 'latest' to download the latest supported version. Required for macOS VMs.",
        completion: .file(extensions: ["ipsw"])
    )
    var ipsw: String?

    @Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location")
    var storage: String?

    init() {
    }

    @MainActor
    func run() async throws {
        let controller = LumeController()
        try await controller.create(
            name: name,
            os: os,
            diskSize: diskSize,
            cpuCount: cpu,
            memorySize: memory,
            display: display.string,
            ipsw: ipsw,
            storage: storage
        )
    }
}

```

--------------------------------------------------------------------------------
/libs/python/mcp-server/scripts/install_mcp_server.sh:
--------------------------------------------------------------------------------

```bash
#!/bin/bash

set -e

# Create the ~/.cua directory if it doesn't exist
mkdir -p "$HOME/.cua"

# Create start_mcp_server.sh script in ~/.cua directory
cat > "$HOME/.cua/start_mcp_server.sh" << 'EOF'
#!/bin/bash

set -e

# Function to check if a directory is writable
is_writable() {
    [ -w "$1" ]
}

# Function to check if a command exists (silent)
command_exists() {
    command -v "$1" >/dev/null 2>&1
}

# Find a writable directory for the virtual environment
if is_writable "$HOME"; then
    VENV_DIR="$HOME/.cua-mcp-venv"
elif is_writable "/tmp"; then
    VENV_DIR="/tmp/.cua-mcp-venv"
else
    # Try to create a directory in the current working directory
    TEMP_DIR="$(pwd)/.cua-mcp-venv"
    if is_writable "$(pwd)"; then
        VENV_DIR="$TEMP_DIR"
    else
        echo "Error: Cannot find a writable directory for the virtual environment." >&2
        exit 1
    fi
fi

# Check if Python is installed
if ! command_exists python3; then
    echo "Error: Python 3 is not installed." >&2
    exit 1
fi

# Check if pip is installed
if ! command_exists pip3; then
    echo "Error: pip3 is not installed." >&2
    exit 1
fi

# Create virtual environment if it doesn't exist
if [ ! -d "$VENV_DIR" ]; then
    # Redirect output to prevent JSON parsing errors in Claude
    python3 -m venv "$VENV_DIR" >/dev/null 2>&1
fi

# Activate virtual environment
source "$VENV_DIR/bin/activate"

# Always install/upgrade the latest version of cua-mcp-server
pip install --upgrade "cua-mcp-server"

# Run the MCP server with isolation from development paths
cd "$VENV_DIR"  # Change to venv directory to avoid current directory in path

python3 -c "from mcp_server.server import main; main()"
EOF

# Make the script executable
chmod +x "$HOME/.cua/start_mcp_server.sh"

echo "MCP server startup script created at $HOME/.cua/start_mcp_server.sh"

```

--------------------------------------------------------------------------------
/docs/src/components/page-feedback.tsx:
--------------------------------------------------------------------------------

```typescript
'use client';

import { useState } from 'react';
import posthog from 'posthog-js';
import { ThumbsUp, ThumbsDown } from 'lucide-react';

export function PageFeedback() {
  const [feedback, setFeedback] = useState<'helpful' | 'not_helpful' | null>(null);

  const handleFeedback = (isHelpful: boolean) => {
    const feedbackType = isHelpful ? 'helpful' : 'not_helpful';
    setFeedback(feedbackType);

    posthog.capture(`page_feedback_${feedbackType}`, {
      page: window.location.pathname,
      page_title: document.title,
    });
  };

  return (
    <div className="mt-8 pt-4 border-t border-fd-border">
      {feedback === null ? (
        <div className="flex flex-col sm:flex-row items-center justify-between gap-3">
          <p className="text-sm text-fd-muted-foreground">Was this page helpful?</p>
          <div className="flex gap-2">
            <button
              onClick={() => handleFeedback(true)}
              className="flex items-center gap-1.5 px-3 py-1.5 text-sm hover:bg-fd-accent rounded transition-colors"
              aria-label="This page was helpful"
            >
              <ThumbsUp className="w-4 h-4" />
              Yes
            </button>
            <button
              onClick={() => handleFeedback(false)}
              className="flex items-center gap-1.5 px-3 py-1.5 text-sm hover:bg-fd-accent rounded transition-colors"
              aria-label="This page was not helpful"
            >
              <ThumbsDown className="w-4 h-4" />
              No
            </button>
          </div>
        </div>
      ) : (
        <p className="text-sm text-fd-muted-foreground text-left">
          {feedback === 'helpful'
            ? 'Thanks for your feedback!'
            : "Thanks for your feedback. We'll work on improving this page."}
        </p>
      )}
    </div>
  );
}

```

--------------------------------------------------------------------------------
/libs/python/computer/computer/interface/factory.py:
--------------------------------------------------------------------------------

```python
"""Factory for creating computer interfaces."""

from typing import Literal, Optional

from .base import BaseComputerInterface


class InterfaceFactory:
    """Factory for creating OS-specific computer interfaces."""

    @staticmethod
    def create_interface_for_os(
        os: Literal["macos", "linux", "windows"],
        ip_address: str,
        api_port: Optional[int] = None,
        api_key: Optional[str] = None,
        vm_name: Optional[str] = None,
    ) -> BaseComputerInterface:
        """Create an interface for the specified OS.

        Args:
            os: Operating system type ('macos', 'linux', or 'windows')
            ip_address: IP address of the computer to control
            api_port: Optional API port of the computer to control
            api_key: Optional API key for cloud authentication
            vm_name: Optional VM name for cloud authentication

        Returns:
            BaseComputerInterface: The appropriate interface for the OS

        Raises:
            ValueError: If the OS type is not supported
        """
        # Import implementations here to avoid circular imports
        from .linux import LinuxComputerInterface
        from .macos import MacOSComputerInterface
        from .windows import WindowsComputerInterface

        if os == "macos":
            return MacOSComputerInterface(
                ip_address, api_key=api_key, vm_name=vm_name, api_port=api_port
            )
        elif os == "linux":
            return LinuxComputerInterface(
                ip_address, api_key=api_key, vm_name=vm_name, api_port=api_port
            )
        elif os == "windows":
            return WindowsComputerInterface(
                ip_address, api_key=api_key, vm_name=vm_name, api_port=api_port
            )
        else:
            raise ValueError(f"Unsupported OS type: {os}")

```

--------------------------------------------------------------------------------
/libs/python/agent/agent/decorators.py:
--------------------------------------------------------------------------------

```python
"""
Decorators for agent - agent_loop decorator
"""

from typing import List, Optional

from .types import AgentConfigInfo

# Global registry
_agent_configs: List[AgentConfigInfo] = []


def register_agent(models: str, priority: int = 0):
    """
    Decorator to register an AsyncAgentConfig class.

    Args:
        models: Regex pattern to match supported models
        priority: Priority for agent selection (higher = more priority)
    """

    def decorator(agent_class: type):
        # Validate that the class implements AsyncAgentConfig protocol
        if not hasattr(agent_class, "predict_step"):
            raise ValueError(
                f"Agent class {agent_class.__name__} must implement predict_step method"
            )
        if not hasattr(agent_class, "predict_click"):
            raise ValueError(
                f"Agent class {agent_class.__name__} must implement predict_click method"
            )
        if not hasattr(agent_class, "get_capabilities"):
            raise ValueError(
                f"Agent class {agent_class.__name__} must implement get_capabilities method"
            )

        # Register the agent config
        config_info = AgentConfigInfo(
            agent_class=agent_class, models_regex=models, priority=priority
        )
        _agent_configs.append(config_info)

        # Sort by priority (highest first)
        _agent_configs.sort(key=lambda x: x.priority, reverse=True)

        return agent_class

    return decorator


def get_agent_configs() -> List[AgentConfigInfo]:
    """Get all registered agent configs"""
    return _agent_configs.copy()


def find_agent_config(model: str) -> Optional[AgentConfigInfo]:
    """Find the best matching agent config for a model"""
    for config_info in _agent_configs:
        if config_info.matches_model(model):
            return config_info
    return None

```

--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------

```dockerfile
FROM python:3.12-slim

# Set environment variables
ENV PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1 \
    PIP_NO_CACHE_DIR=1 \
    PIP_DISABLE_PIP_VERSION_CHECK=1 \
    PYTHONPATH="/app/libs/python/core:/app/libs/python/computer:/app/libs/python/agent:/app/libs/python/som:/app/libs/python/computer-server:/app/libs/python/mcp-server"

# Install system dependencies for ARM architecture
RUN apt-get update && apt-get install -y --no-install-recommends \
    git \
    build-essential \
    libgl1-mesa-glx \
    libglib2.0-0 \
    libxcb-xinerama0 \
    libxkbcommon-x11-0 \
    cmake \
    pkg-config \
    curl \
    iputils-ping \
    net-tools \
    sed \
    xxd \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/*

# Set working directory
WORKDIR /app

# Copy the entire project temporarily
# We'll mount the real source code over this at runtime
COPY . /app/

# Create a simple .env.local file for build.sh
RUN echo "PYTHON_BIN=python" > /app/.env.local

# Modify build.sh to skip virtual environment creation
RUN sed -i 's/python -m venv .venv/echo "Skipping venv creation in Docker"/' /app/scripts/build.sh && \
    sed -i 's/source .venv\/bin\/activate/echo "Skipping venv activation in Docker"/' /app/scripts/build.sh && \
    sed -i 's/find . -type d -name ".venv" -exec rm -rf {} +/echo "Skipping .venv removal in Docker"/' /app/scripts/build.sh && \
    chmod +x /app/scripts/build.sh

# Run the build script to install dependencies
RUN cd /app && ./scripts/build.sh

# Clean up the source files now that dependencies are installed
# When we run the container, we'll mount the actual source code
RUN rm -rf /app/* /app/.??*

# Note: This Docker image doesn't contain the lume executable (macOS-specific)
# Instead, it relies on connecting to a lume server running on the host machine
# via host.docker.internal:7777

# Default command
CMD ["bash"] 
```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/supported-agents/human-in-the-loop.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Human-In-The-Loop
description: Use humans as agents for evaluation, demonstrations, and interactive control
---

The Agent SDK provides a human tool, with native support for using a human-in-the-loop as a way to evaluate your environment, tools, or to create demonstrations. You can use it by doing `grounding_model+human/human` or `human/human` directly.

## Getting Started

To start the human agent tool, simply run:

```bash
python -m agent.human_tool
```

The UI will show you pending completions. Select a completion to take control of the agent.

## Usage Examples

### Direct Human Agent

```python
from agent import ComputerAgent
from agent.computer import computer

agent = ComputerAgent(
    "human/human",
    tools=[computer]
)

async for _ in agent.run("Take a screenshot, analyze the UI, and click on the most prominent button"):
    pass
```

### Composed with Grounding Model

```python
agent = ComputerAgent(
    "huggingface-local/HelloKKMe/GTA1-7B+human/human",
    tools=[computer]
)

async for _ in agent.run("Navigate to the settings page and enable dark mode"):
    pass
```

## Features

The human-in-the-loop interface provides:

- **Interactive UI**: Web-based interface for reviewing and responding to agent requests
- **Image Display**: Screenshots with click handlers for direct interaction
- **Action Accordions**: Support for various computer actions (click, type, keypress, etc.)
- **Tool Calls**: Full OpenAI-compatible tool call support
- **Real-time Updates**: Smart polling for responsive UI updates

## Use Cases

- **Evaluation**: Have humans evaluate agent performance and provide ground truth responses
- **Demonstrations**: Create training data by having humans demonstrate tasks
- **Interactive Control**: Take manual control when automated agents need human guidance
- **Testing**: Validate agent, tool, and environment behavior manually

---

```

--------------------------------------------------------------------------------
/tests/test_telemetry.py:
--------------------------------------------------------------------------------

```python
"""
Required environment variables:
- CUA_API_KEY: API key for Cua cloud provider
"""

import os
import sys
from pathlib import Path

import pytest

# Load environment variables from .env file
project_root = Path(__file__).parent.parent
env_file = project_root / ".env"
print(f"Loading environment from: {env_file}")
from dotenv import load_dotenv

load_dotenv(env_file)

# Add paths to sys.path if needed
pythonpath = os.environ.get("PYTHONPATH", "")
for path in pythonpath.split(":"):
    if path and path not in sys.path:
        sys.path.insert(0, path)  # Insert at beginning to prioritize
        print(f"Added to sys.path: {path}")

from core.telemetry import destroy_telemetry_client, is_telemetry_enabled, record_event


class TestTelemetry:
    def setup_method(self):
        """Reset environment variables before each test"""
        os.environ.pop("CUA_TELEMETRY", None)
        os.environ.pop("CUA_TELEMETRY_ENABLED", None)
        destroy_telemetry_client()

    def test_telemetry_disabled_when_cua_telemetry_is_off(self):
        """Should return false when CUA_TELEMETRY is off"""
        os.environ["CUA_TELEMETRY"] = "off"
        assert is_telemetry_enabled() is False

    def test_telemetry_enabled_when_cua_telemetry_not_set(self):
        """Should return true when CUA_TELEMETRY is not set"""
        assert is_telemetry_enabled() is True

    def test_telemetry_disabled_when_cua_telemetry_enabled_is_0(self):
        """Should return false if CUA_TELEMETRY_ENABLED is 0"""
        os.environ["CUA_TELEMETRY_ENABLED"] = "0"
        assert is_telemetry_enabled() is False

    def test_send_test_event_to_posthog(self):
        """Should send a test event to PostHog"""
        # This should not raise an exception
        record_event("test_telemetry", {"message": "Hello, world!"})


if __name__ == "__main__":
    # Run tests directly
    pytest.main([__file__, "-v"])

```

--------------------------------------------------------------------------------
/libs/lume/src/Utils/Utils.swift:
--------------------------------------------------------------------------------

```swift
import Foundation
import ArgumentParser

extension Collection {
  subscript (safe index: Index) -> Element? {
    indices.contains(index) ? self[index] : nil
  }
}

func resolveBinaryPath(_ name: String) -> URL? {
  guard let path = ProcessInfo.processInfo.environment["PATH"] else {
    return nil
  }

  for pathComponent in path.split(separator: ":") {
    let url = URL(fileURLWithPath: String(pathComponent))
      .appendingPathComponent(name, isDirectory: false)

    if FileManager.default.fileExists(atPath: url.path) {
      return url
    }
  }

  return nil
}

// Helper function to parse size strings
func parseSize(_ input: String) throws -> UInt64 {
    let lowercased = input.trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
    let multiplier: Double 
    let valueString: String

    if lowercased.hasSuffix("tb") {
        multiplier = 1024 * 1024 * 1024 * 1024
        valueString = String(lowercased.dropLast(2))
    } else if lowercased.hasSuffix("gb") {
        multiplier = 1024 * 1024 * 1024
        valueString = String(lowercased.dropLast(2))
    } else if lowercased.hasSuffix("mb") {
        multiplier = 1024 * 1024
        valueString = String(lowercased.dropLast(2))
    } else if lowercased.hasSuffix("kb") {
        multiplier = 1024
        valueString = String(lowercased.dropLast(2))
    } else {
        multiplier = 1024 * 1024
        valueString = lowercased
    }

    guard let value = Double(valueString.trimmingCharacters(in: .whitespacesAndNewlines)) else {
        throw ValidationError("Malformed size input: \(input). Could not parse numeric value.")
    }

    let bytesAsDouble = (value * multiplier).rounded()
    
    guard bytesAsDouble >= 0 && bytesAsDouble <= Double(UInt64.max) else {
        throw ValidationError("Calculated size out of bounds for UInt64: \(input)")
    }

    let val = UInt64(bytesAsDouble)

    return val
}

```

--------------------------------------------------------------------------------
/libs/python/agent/agent/callbacks/budget_manager.py:
--------------------------------------------------------------------------------

```python
from typing import Any, Dict, List

from .base import AsyncCallbackHandler


class BudgetExceededError(Exception):
    """Exception raised when budget is exceeded."""

    pass


class BudgetManagerCallback(AsyncCallbackHandler):
    """Budget manager callback that tracks usage costs and can stop execution when budget is exceeded."""

    def __init__(
        self, max_budget: float, reset_after_each_run: bool = True, raise_error: bool = False
    ):
        """
        Initialize BudgetManagerCallback.

        Args:
            max_budget: Maximum budget allowed
            reset_after_each_run: Whether to reset budget after each run
            raise_error: Whether to raise an error when budget is exceeded
        """
        self.max_budget = max_budget
        self.reset_after_each_run = reset_after_each_run
        self.raise_error = raise_error
        self.total_cost = 0.0

    async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
        """Reset budget if configured to do so."""
        if self.reset_after_each_run:
            self.total_cost = 0.0

    async def on_usage(self, usage: Dict[str, Any]) -> None:
        """Track usage costs."""
        if "response_cost" in usage:
            self.total_cost += usage["response_cost"]

    async def on_run_continue(
        self,
        kwargs: Dict[str, Any],
        old_items: List[Dict[str, Any]],
        new_items: List[Dict[str, Any]],
    ) -> bool:
        """Check if budget allows continuation."""
        if self.total_cost >= self.max_budget:
            if self.raise_error:
                raise BudgetExceededError(
                    f"Budget exceeded: ${self.total_cost} >= ${self.max_budget}"
                )
            else:
                print(f"Budget exceeded: ${self.total_cost} >= ${self.max_budget}")
            return False
        return True

```

--------------------------------------------------------------------------------
/.github/workflows/pypi-publish-som.yml:
--------------------------------------------------------------------------------

```yaml
name: Publish SOM Package

on:
  push:
    tags:
      - "som-v*"
  workflow_dispatch:
    inputs:
      version:
        description: "Version to publish (without v prefix)"
        required: true
        default: "0.1.0"
  workflow_call:
    inputs:
      version:
        description: "Version to publish"
        required: true
        type: string
    outputs:
      version:
        description: "The version that was published"
        value: ${{ jobs.determine-version.outputs.version }}

# Adding permissions at workflow level
permissions:
  contents: write

jobs:
  determine-version:
    runs-on: macos-latest
    outputs:
      version: ${{ steps.get-version.outputs.version }}
    steps:
      - uses: actions/checkout@v4

      - name: Determine version
        id: get-version
        run: |
          if [ "${{ github.event_name }}" == "push" ]; then
            # Extract version from tag (for package-specific tags)
            if [[ "${{ github.ref }}" =~ ^refs/tags/som-v([0-9]+\.[0-9]+\.[0-9]+) ]]; then
              VERSION=${BASH_REMATCH[1]}
            else
              echo "Invalid tag format for som"
              exit 1
            fi
          elif [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
            # Use version from workflow dispatch
            VERSION=${{ github.event.inputs.version }}
          else
            # Use version from workflow_call
            VERSION=${{ inputs.version }}
          fi
          echo "VERSION=$VERSION"
          echo "version=$VERSION" >> $GITHUB_OUTPUT

  publish:
    needs: determine-version
    uses: ./.github/workflows/pypi-reusable-publish.yml
    with:
      package_name: "som"
      package_dir: "libs/python/som"
      version: ${{ needs.determine-version.outputs.version }}
      is_lume_package: false
      base_package_name: "cua-som"
    secrets:
      PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}

```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/prompt-caching.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Prompt Caching
sidebar_position: 8
description: How to use prompt caching in ComputerAgent and agent loops.
---

Prompt caching is a cost-saving feature offered by some LLM API providers that helps avoid reprocessing the same prompt, improving efficiency and reducing costs for repeated or long-running tasks.

## Usage

The `use_prompt_caching` argument is available for `ComputerAgent` and agent loops:

```python
agent = ComputerAgent(
    ...,
    use_prompt_caching=True,
)
```

- **Type:** `bool`
- **Default:** `False`
- **Purpose:** Use prompt caching to avoid reprocessing the same prompt.

## Anthropic CUAs

When using Anthropic-based CUAs (Claude models), setting `use_prompt_caching=True` will automatically add `{ "cache_control": "ephemeral" }` to your messages. This enables prompt caching for the session and can speed up repeated runs with the same prompt.

<Callout title="Note">
  This argument is only required for Anthropic CUAs. For other providers, it is ignored.
</Callout>

## OpenAI Provider

With the OpenAI provider, prompt caching is handled automatically for prompts of 1000+ tokens. You do **not** need to set `use_prompt_caching`—caching will occur for long prompts without any extra configuration.

## Example

```python
from agent import ComputerAgent
agent = ComputerAgent(
    model="anthropic/claude-sonnet-4-5-20250929",
    use_prompt_caching=True,
)
```

## Implementation Details

- For Anthropic: Adds `{ "cache_control": "ephemeral" }` to messages when enabled.
- For OpenAI: Caching is automatic for long prompts; the argument is ignored.

## When to Use

- Enable for Anthropic CUAs if you want to avoid reprocessing the same prompt in repeated or iterative tasks.
- Not needed for OpenAI models unless you want explicit ephemeral cache control (not required for most users).

## See Also

- [Agent Loops](./agent-loops)
- [Migration Guide](./migration-guide)

```

--------------------------------------------------------------------------------
/docs/content/docs/computer-sdk/computer-server/WebSocket-API.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: WebSocket API Reference
description: Reference for the /ws WebSocket endpoint of the Computer Server.
---

# WebSocket API Reference

The Computer Server exposes a WebSocket endpoint for real-time command execution and streaming results.

- `ws://localhost:8000/ws`
- `wss://your-container.containers.cloud.trycua.com:8443/ws` (cloud)

### Authentication (Cloud Only)

For cloud containers, you must authenticate immediately after connecting:

```json
{
  "command": "authenticate",
  "params": {
    "container_name": "your-container",
    "api_key": "your-api-key"
  }
}
```

If authentication fails, the connection is closed.

### Command Format

Send JSON messages:

```json
{
  "command": "<command_name>",
  "params": { ... }
}
```

### Example (Python)

```python
import websockets
import asyncio
import json

async def main():
    uri = "ws://localhost:8000/ws"
    async with websockets.connect(uri) as ws:
        await ws.send(json.dumps({"command": "version", "params": {}}))
        response = await ws.recv()
        print(response)

asyncio.run(main())
```

### Example (Cloud)

```python
import websockets
import asyncio
import json

async def main():
    uri = "wss://your-container.containers.cloud.trycua.com:8443/ws"
    async with websockets.connect(uri) as ws:
        await ws.send(json.dumps({
            "command": "authenticate",
            "params": {
                "container_name": "your-container",
                "api_key": "your-api-key"
            }
        }))
        auth_response = await ws.recv()
        print(auth_response)
        await ws.send(json.dumps({"command": "version", "params": {}}))
        response = await ws.recv()
        print(response)

asyncio.run(main())
```

### Response Format

Each response is a JSON object:

```json
{
  "success": true,
  ...
}
```

### Supported Commands

See [Commands Reference](./Commands) for the full list of commands and parameters.

```

--------------------------------------------------------------------------------
/docs/next.config.mjs:
--------------------------------------------------------------------------------

```
import { createMDX } from 'fumadocs-mdx/next';

const withMDX = createMDX();

/** @type {import('next').NextConfig} */
const config = {
  reactStrictMode: true,
  trailingSlash: false,
  basePath: '/docs',
  assetPrefix: '/docs',
  async rewrites() {
    return [
      {
        source: '/:path*.mdx',
        destination: '/llms.mdx/:path*',
      },
    ];
  },
  async redirects() {
    return [
      {
        source: '/',
        destination: '/docs',
        basePath: false, // Important: this bypasses the basePath
        permanent: false,
      },
      // Redirect old docs.cua.ai URLs to cua.ai/docs with 301 for SEO
      // This handles URLs that Google has indexed from the old domain
      {
        source: '/:path*',
        has: [
          {
            type: 'host',
            value: 'docs.cua.ai',
          },
        ],
        destination: 'https://cua.ai/docs/:path*',
        permanent: true, // 301 redirect to preserve SEO authority
        basePath: false,
      },
      // Redirects for documentation restructure (PR #568)
      // Moved quickstart-devs to get-started section
      {
        source: '/quickstart-devs',
        destination: '/get-started/quickstart',
        permanent: true,
      },
      // Moved telemetry to agent-sdk section
      {
        source: '/telemetry',
        destination: '/agent-sdk/telemetry',
        permanent: true,
      },
      // Removed quickstart-cli, consolidated into main quickstart
      {
        source: '/quickstart-cli',
        destination: '/get-started/quickstart',
        permanent: true,
      },
    ];
  },
  images: {
    dangerouslyAllowSVG: true,
    remotePatterns: [
      {
        protocol: 'https',
        hostname: 'img.shields.io',
      },
      {
        protocol: 'https',
        hostname: 'starchart.cc',
      },
      {
        protocol: 'https',
        hostname: 'github.com',
      },
    ],
  },
};

export default withMDX(config);

```

--------------------------------------------------------------------------------
/libs/python/computer/tests/conftest.py:
--------------------------------------------------------------------------------

```python
"""Pytest configuration and shared fixtures for computer package tests.

This file contains shared fixtures and configuration for all computer tests.
Following SRP: This file ONLY handles test setup/teardown.
"""

from unittest.mock import AsyncMock, MagicMock, Mock, patch

import pytest


@pytest.fixture
def mock_interface():
    """Mock computer interface for testing.

    Use this fixture to test Computer logic without real OS calls.
    """
    interface = AsyncMock()
    interface.screenshot = AsyncMock(return_value=b"fake_screenshot")
    interface.left_click = AsyncMock()
    interface.right_click = AsyncMock()
    interface.middle_click = AsyncMock()
    interface.double_click = AsyncMock()
    interface.type = AsyncMock()
    interface.key = AsyncMock()
    interface.move_mouse = AsyncMock()
    interface.scroll = AsyncMock()
    interface.get_screen_size = AsyncMock(return_value=(1920, 1080))

    return interface


@pytest.fixture
def mock_cloud_provider():
    """Mock cloud provider for testing.

    Use this fixture to test cloud provider logic without real API calls.
    """
    provider = AsyncMock()
    provider.start = AsyncMock()
    provider.stop = AsyncMock()
    provider.get_status = AsyncMock(return_value="running")
    provider.execute_command = AsyncMock(return_value="command output")

    return provider


@pytest.fixture
def mock_local_provider():
    """Mock local provider for testing.

    Use this fixture to test local provider logic without real VM operations.
    """
    provider = AsyncMock()
    provider.start = AsyncMock()
    provider.stop = AsyncMock()
    provider.get_status = AsyncMock(return_value="running")
    provider.execute_command = AsyncMock(return_value="command output")

    return provider


@pytest.fixture
def disable_telemetry(monkeypatch):
    """Disable telemetry for tests.

    Use this fixture to ensure no telemetry is sent during tests.
    """
    monkeypatch.setenv("CUA_TELEMETRY_DISABLED", "1")

```

--------------------------------------------------------------------------------
/libs/typescript/cua-cli/src/util.ts:
--------------------------------------------------------------------------------

```typescript
export async function writeEnvFile(cwd: string, key: string) {
  const path = `${cwd}/.env`;
  let content = '';
  try {
    content = await Bun.file(path).text();
  } catch {}
  const lines = content.split(/\r?\n/).filter(Boolean);
  const idx = lines.findIndex((l) => l.startsWith('CUA_API_KEY='));
  if (idx >= 0) lines[idx] = `CUA_API_KEY=${key}`;
  else lines.push(`CUA_API_KEY=${key}`);
  await Bun.write(path, lines.join('\n') + '\n');
  return path;
}

export type SandboxStatus =
  | 'pending'
  | 'running'
  | 'stopped'
  | 'suspended'
  | 'suspending'
  | 'terminated'
  | 'failed';
export type SandboxItem = {
  name: string;
  password: string;
  status: SandboxStatus;
  host?: string;
};

export function printSandboxList(
  items: SandboxItem[],
  showPasswords: boolean = false
) {
  const headers = showPasswords
    ? ['NAME', 'STATUS', 'PASSWORD', 'HOST']
    : ['NAME', 'STATUS', 'HOST'];

  const rows: string[][] = [
    headers,
    ...items.map((v) =>
      showPasswords
        ? [v.name, String(v.status), v.password, v.host || '']
        : [v.name, String(v.status), v.host || '']
    ),
  ];

  const numCols = headers.length;
  const widths: number[] = new Array(numCols).fill(0);

  for (const r of rows)
    for (let i = 0; i < numCols; i++)
      widths[i] = Math.max(widths[i] ?? 0, (r[i] ?? '').length);

  for (const r of rows)
    console.log(r.map((c, i) => (c ?? '').padEnd(widths[i] ?? 0)).join('  '));

  if (items.length === 0) console.log('No sandboxes found');
}

export async function openInBrowser(url: string) {
  const platform = process.platform;
  let cmd: string;
  let args: string[] = [];
  if (platform === 'darwin') {
    cmd = 'open';
    args = [url];
  } else if (platform === 'win32') {
    cmd = 'cmd';
    args = ['/c', 'start', '', url];
  } else {
    cmd = 'xdg-open';
    args = [url];
  }
  try {
    await Bun.spawn({ cmd: [cmd, ...args] }).exited;
  } catch {
    console.error(`Failed to open browser. Please visit: ${url}`);
  }
}

```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/integrations/observability.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Observability
description: Trace CUA execution steps and sessions
---

## Observability

CUA has a native integration with [Laminar](https://laminar.sh/) – open-source platform for tracing, evals, and labeling of autonomous AI agents. Read more about Laminar in the [Laminar docs](https://docs.lmnr.ai/).

## Setup

Register on [Laminar Cloud](https://laminar.sh/) or spin up a [local instance](https://github.com/lmnr-ai/lmnr) and get the key from your project settings. Set the `LMNR_PROJECT_API_KEY` environment variable to your key.

```bash
pip install lmnr[all]
export LMNR_PROJECT_API_KEY=your-key
```

## Usage

Then, initialize Laminar at the entry point of your application, register Laminar LiteLLM callback, and all steps of CUA will be automatically traced.

```python
import os

import litellm

from agent import ComputerAgent
from computer import Computer
from lmnr import Laminar, LaminarLiteLLMCallback # [!code highlight]

Laminar.initialize() # [!code highlight]
litellm.callbacks.append(LaminarLiteLLMCallback()) # [!code highlight]

computer = Computer(
    os_type="linux",
    provider_type="cloud",
    name=os.getenv("CUA_CONTAINER_NAME"),
    api_key=os.getenv("CUA_API_KEY"),
)

agent = ComputerAgent(
    model="openai/computer-use-preview",
    tools=[computer],
)

async def main():
    async for step in agent.run("Create a new file called 'test.txt' in the current directory"):
        print(step["output"])

if __name__ == "__main__":
    asyncio.run(main())
```

## Viewing traces

You can view traces in the Laminar UI by going to the traces tab in your project. When you select a trace,
you will see all the agent execution steps, including computer actions, LLM calls, and screenshots.

For each step, you will see the LLM call, the computer action. The computer actions are highlighted in the timeline in yellow.

<img
  src="/docs/img/laminar_trace_example.png"
  alt="Example trace in Laminar showing the litellm.response span and its output."
  width="800px"
/>

```

--------------------------------------------------------------------------------
/libs/python/computer-server/examples/usage_example.py:
--------------------------------------------------------------------------------

```python
#!/usr/bin/env python3
"""
Example showing how to use the CUA Computer API as an imported package.
"""

import asyncio
import logging
from typing import TYPE_CHECKING

# For type checking only
if TYPE_CHECKING:
    from computer_api import Server

# Setup logging
logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)


# Example 1: Synchronous usage (blocks until server is stopped)
def example_sync():
    """
    Example of synchronous server usage. This will block until interrupted.
    Run with: python3 -m examples.usage_example sync
    """
    # Import directly to avoid any confusion
    from computer_api.server import Server

    server = Server(port=8080)
    print("Server started at http://localhost:8080")
    print("Press Ctrl+C to stop the server")

    try:
        server.start()  # This will block until the server is stopped
    except KeyboardInterrupt:
        print("Server stopped by user")


# Example 2: Asynchronous usage
async def example_async():
    """
    Example of asynchronous server usage. This will start the server in the background
    and allow other operations to run concurrently.
    Run with: python3 -m examples.usage_example async
    """
    # Import directly to avoid any confusion
    from computer_api.server import Server

    server = Server(port=8080)

    # Start the server in the background
    await server.start_async()

    print("Server is running in the background")
    print("Performing other tasks...")

    # Do other things while the server is running
    for i in range(5):
        print(f"Doing work iteration {i+1}/5...")
        await asyncio.sleep(2)

    print("Work complete, stopping server...")

    # Stop the server when done
    await server.stop()
    print("Server stopped")


if __name__ == "__main__":
    import sys

    if len(sys.argv) > 1 and sys.argv[1] == "async":
        asyncio.run(example_async())
    else:
        example_sync()

```

--------------------------------------------------------------------------------
/libs/lume/tests/VMVirtualizationServiceTests.swift:
--------------------------------------------------------------------------------

```swift
import Foundation
import Testing
import Virtualization
@testable import lume

@Test("VMVirtualizationService starts correctly")
func testVMVirtualizationServiceStart() async throws {
    let service = MockVMVirtualizationService()
    
    // Initial state
    #expect(await service.state == .stopped)
    #expect(await service.startCallCount == 0)
    
    // Start service
    try await service.start()
    #expect(await service.state == .running)
    #expect(await service.startCallCount == 1)
}

@Test("VMVirtualizationService stops correctly")
func testVMVirtualizationServiceStop() async throws {
    let service = MockVMVirtualizationService()
    
    // Start then stop
    try await service.start()
    try await service.stop()
    
    #expect(await service.state == .stopped)
    #expect(await service.stopCallCount == 1)
}

@Test("VMVirtualizationService handles pause and resume")
func testVMVirtualizationServicePauseResume() async throws {
    let service = MockVMVirtualizationService()
    
    // Start and pause
    try await service.start()
    try await service.pause()
    #expect(await service.state == .paused)
    #expect(await service.pauseCallCount == 1)
    
    // Resume
    try await service.resume()
    #expect(await service.state == .running)
    #expect(await service.resumeCallCount == 1)
}

@Test("VMVirtualizationService handles operation failures")
func testVMVirtualizationServiceFailures() async throws {
    let service = MockVMVirtualizationService()
    await service.configure(shouldFail: true)
    
    // Test start failure
    do {
        try await service.start()
        #expect(Bool(false), "Expected start to throw")
    } catch let error as VMError {
        switch error {
        case .internalError(let message):
            #expect(message == "Mock operation failed")
        default:
            #expect(Bool(false), "Unexpected error type: \(error)")
        }
    }
    
    #expect(await service.state == .stopped)
    #expect(await service.startCallCount == 1)
} 
```

--------------------------------------------------------------------------------
/libs/lumier/src/hooks/on-logon.sh:
--------------------------------------------------------------------------------

```bash
#!/bin/bash

# Arguments passed from execute_remote_script in vm.sh
# $1: VNC_PASSWORD
# $2: HOST_SHARED_PATH (Path inside VM where host shared dir is mounted, e.g., /Volumes/My Shared Files)

VNC_PASSWORD="$1"
# IMPORTANT: In the VM, the shared folder is always mounted at this fixed location
HOST_SHARED_PATH="/Volumes/My Shared Files"

# Set default value for VNC_DEBUG if not provided
VNC_DEBUG=${VNC_DEBUG:-0}

# Define the path to the user's optional on-logon script within the shared folder
USER_ON_LOGON_SCRIPT_PATH="$HOST_SHARED_PATH/lifecycle/on-logon.sh"

# Show basic information when debug is enabled
if [ "$VNC_DEBUG" = "1" ]; then
    echo "[VM] Lumier lifecycle script starting"
    echo "[VM] Looking for user script: $USER_ON_LOGON_SCRIPT_PATH"
fi

# Check if the user-provided script exists
if [ -f "$USER_ON_LOGON_SCRIPT_PATH" ]; then
    if [ "$VNC_DEBUG" = "1" ]; then
        echo "[VM] Found user script: $USER_ON_LOGON_SCRIPT_PATH"
    fi
    
    # Always show what script we're executing
    echo "[VM] Executing user lifecycle script"
    
    # Make script executable
    chmod +x "$USER_ON_LOGON_SCRIPT_PATH"
    
    # Execute the user script in a subshell with error output captured
    "$USER_ON_LOGON_SCRIPT_PATH" "$VNC_PASSWORD" "$HOST_SHARED_PATH" 2>&1
    
    # Capture exit code
    USER_SCRIPT_EXIT_CODE=$?
    
    # Always report script execution results
    if [ $USER_SCRIPT_EXIT_CODE -eq 0 ]; then
        echo "[VM] User lifecycle script completed successfully"
    else
        echo "[VM] User lifecycle script failed with exit code: $USER_SCRIPT_EXIT_CODE"
    fi
    
    # Check results (only in debug mode)
    if [ "$VNC_DEBUG" = "1" ]; then
        # List any files created by the script
        echo "[VM] Files created by user script:"
        ls -la /Users/lume/Desktop/hello_*.txt 2>/dev/null || echo "[VM] No script-created files found"
    fi
else
    if [ "$VNC_DEBUG" = "1" ]; then
        echo "[VM] No user lifecycle script found"
    fi
fi

exit 0 # Ensure the entry point script exits cleanly

```

--------------------------------------------------------------------------------
/examples/computer-example-ts/src/helpers.ts:
--------------------------------------------------------------------------------

```typescript
import type { Computer } from '@trycua/computer';
import type OpenAI from 'openai';

export async function executeAction(
  computer: Computer,
  action: OpenAI.Responses.ResponseComputerToolCall['action']
) {
  switch (action.type) {
    case 'click': {
      const { x, y, button } = action;
      console.log(`Executing click at (${x}, ${y}) with button '${button}'.`);
      await computer.interface.moveCursor(x, y);
      if (button === 'right') await computer.interface.rightClick();
      else await computer.interface.leftClick();
      break;
    }
    case 'type':
      {
        const { text } = action;
        console.log(`Typing text: ${text}`);
        await computer.interface.typeText(text);
      }
      break;
    case 'scroll': {
      const { x: locX, y: locY, scroll_x, scroll_y } = action;
      console.log(
        `Scrolling at (${locX}, ${locY}) with offsets (scroll_x=${scroll_x}, scroll_y=${scroll_y}).`
      );
      await computer.interface.moveCursor(locX, locY);
      await computer.interface.scroll(scroll_x, scroll_y);
      break;
    }
    case 'keypress': {
      const { keys } = action;
      for (const key of keys) {
        console.log(`Pressing key: ${key}.`);
        // Map common key names to CUA equivalents
        if (key.toLowerCase() === 'enter') {
          await computer.interface.pressKey('return');
        } else if (key.toLowerCase() === 'space') {
          await computer.interface.pressKey('space');
        } else {
          await computer.interface.pressKey(key);
        }
      }
      break;
    }
    case 'wait': {
      console.log(`Waiting for 3 seconds.`);
      await new Promise((resolve) => setTimeout(resolve, 3 * 1000));
      break;
    }
    case 'screenshot': {
      console.log('Taking screenshot.');
      // This is handled automatically in the main loop, but we can take an extra one if requested
      const screenshot = await computer.interface.screenshot();
      return screenshot;
    }
    default:
      console.log(`Unrecognized action: ${action.type}`);
      break;
  }
}

```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/callbacks/cost-saving.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Cost Optimization
description: Budget management and image retention for cost optimization
---

# Cost Optimization Callbacks

Optimize agent costs with budget management and image retention callbacks.

## Budget Manager Callbacks Example

```python
from agent.callbacks import BudgetManagerCallback

agent = ComputerAgent(
    model="anthropic/claude-sonnet-4-5-20250929",
    tools=[computer],
    callbacks=[
        BudgetManagerCallback(
            max_budget=5.0,  # $5 limit
            reset_after_each_run=False,
            raise_error=True
        )
    ]
)
```

## Budget Manager Shorthand

```python
# Simple budget limit
agent = ComputerAgent(
    model="anthropic/claude-sonnet-4-5-20250929",
    max_trajectory_budget=5.0  # $5 limit
)
```

**Or with options:**

```python
# Advanced budget configuration
agent = ComputerAgent(
    model="anthropic/claude-sonnet-4-5-20250929",
    max_trajectory_budget={
        "max_budget": 10.0,
        "raise_error": True,  # Raise error when exceeded
        "reset_after_each_run": False  # Persistent across runs
    }
)
```

## Image Retention Callbacks Example

```python
from agent.callbacks import ImageRetentionCallback

agent = ComputerAgent(
    model="anthropic/claude-sonnet-4-5-20250929",
    tools=[computer],
    callbacks=[
        ImageRetentionCallback(only_n_most_recent_images=3)
    ]
)
```

## Image Retention Shorthand

```python
agent = ComputerAgent(
    model="anthropic/claude-sonnet-4-5-20250929",
    tools=[computer],
    only_n_most_recent_images=3  # Auto-adds ImageRetentionCallback
)
```

## Combined Cost Optimization

```python
agent = ComputerAgent(
    model="anthropic/claude-sonnet-4-5-20250929",
    tools=[computer],
    max_trajectory_budget=5.0,        # Budget limit
    only_n_most_recent_images=3,      # Image retention
    trajectory_dir="trajectories"     # Track spending
)
```

## Budget Manager Options

- `max_budget`: Dollar limit for trajectory
- `reset_after_each_run`: Reset budget per run (default: True)
- `raise_error`: Raise exception vs. graceful stop (default: False)

```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/callbacks/index.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Callbacks
---

Callbacks in the Agent SDK provide hooks into the agent's lifecycle, allowing for custom functionality to be executed at various stages of an agent's run. They enable extensibility by allowing developers to integrate their own logic for tasks such as logging, cost management, and data anonymization.

## Usage

You can add preprocessing and postprocessing hooks using callbacks, or write your own by subclassing `AsyncCallbackHandler`.

### Built-in Callbacks

Built-in callbacks can be used as follows:

```python
from agent.callbacks import (
    ImageRetentionCallback,
    TrajectorySaverCallback,
    BudgetManagerCallback,
    LoggingCallback
)

agent = ComputerAgent(
    model="anthropic/claude-sonnet-4-5-20250929",
    tools=[computer],
    callbacks=[
        ImageRetentionCallback(only_n_most_recent_images=3),
        TrajectorySaverCallback(trajectory_dir="trajectories"),
        BudgetManagerCallback(max_budget=10.0, raise_error=True),
        LoggingCallback(level=logging.INFO)
    ]
)
```

The following built-in callbacks are available:

- [BudgetManagerCallback](callbacks/cost-saving): Stops execution when budget exceeded
- [LoggingCallback](callbacks/trajectories): Logs agent activities
- **ImageRetentionCallback**: Limits recent images in context
- **TrajectorySaverCallback**: Saves conversation trajectories
- [PII Anonymization](callbacks/pii-anonymization)

### Custom Callbacks

Create custom callbacks using knowlege of the callback lifecycle as described in [Agent Lifecycle](callbacks/agent-lifecycle).

```python
from agent.callbacks.base import AsyncCallbackHandler

class CustomCallback(AsyncCallbackHandler):
    async def on_llm_start(self, messages):
        """Preprocess messages before LLM call"""
        # Add custom preprocessing logic
        return messages

    async def on_llm_end(self, messages):
        """Postprocess messages after LLM call"""
        # Add custom postprocessing logic
        return messages

    async def on_usage(self, usage):
        """Track usage information"""
        print(f"Tokens used: {usage.total_tokens}")
```

```

--------------------------------------------------------------------------------
/docs/content/docs/macos-vm-cli-playbook/lume/prebuilt-images.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Prebuilt Images
---

Pre-built images are available in the registry [ghcr.io/trycua](https://github.com/orgs/trycua/packages). These images come with an SSH server pre-configured and auto-login enabled.

<Callout>
  The default password on pre-built images is `lume`. For the security of your VM, change this
  password after your first login.
</Callout>

## Available Images

The following pre-built images are available to download via `lume pull`:

| Image                   | Tag                 | Description                                                                                     | Logical Size |
| ----------------------- | ------------------- | ----------------------------------------------------------------------------------------------- | ------------ |
| `macos-sequoia-vanilla` | `latest`, `15.2`    | macOS Sequoia 15.2 image                                                                        | 20GB         |
| `macos-sequoia-xcode`   | `latest`, `15.2`    | macOS Sequoia 15.2 image with Xcode command line tools                                          | 22GB         |
| `macos-sequoia-cua`     | `latest`, `15.3`    | macOS Sequoia 15.3 image compatible with the Computer interface                                 | 24GB         |
| `ubuntu-noble-vanilla`  | `latest`, `24.04.1` | [Ubuntu Server for ARM 24.04.1 LTS](https://ubuntu.com/download/server/arm) with Ubuntu Desktop | 20GB         |

## Disk Space

For additional disk space, resize the VM disk after pulling the image using the `lume set <name> --disk-size <size>` command. Note that the actual disk space used by sparse images will be much lower than the logical size listed.

<Callout>
  **Important Note (v0.2.0+):** Images are being re-uploaded with sparse file system optimizations
  enabled, resulting in significantly lower actual disk usage. Older images (without the `-sparse`
  suffix) are now **deprecated**. The last version of `lume` fully supporting the non-sparse images
  was `v0.1.x`. Starting from `v0.2.0`, lume will automatically pull images optimized with sparse
  file system support.
</Callout>

```

--------------------------------------------------------------------------------
/docs/src/app/api/posthog/[...path]/route.ts:
--------------------------------------------------------------------------------

```typescript
import { NextRequest, NextResponse } from 'next/server';

export async function GET(
  request: NextRequest,
  { params }: { params: Promise<{ path: string[] }> }
) {
  const { path } = await params;
  const url = new URL(request.url);

  const targetUrl = `${process.env.NEXT_PUBLIC_POSTHOG_HOST}/${path.join('/')}${url.search}`;

  try {
    const response = await fetch(targetUrl, {
      method: 'GET',
      headers: {
        'Content-Type': request.headers.get('Content-Type') || 'application/json',
      },
    });

    // Handle 204 No Content responses
    if (response.status === 204) {
      return new NextResponse(null, { status: 204 });
    }

    const data = await response.arrayBuffer();
    return new NextResponse(data, {
      status: response.status,
      headers: {
        'Content-Type': response.headers.get('Content-Type') || 'application/json',
      },
    });
  } catch (error) {
    console.error('PostHog proxy error:', error);
    return new NextResponse('Error proxying request', { status: 500 });
  }
}

export async function POST(
  request: NextRequest,
  { params }: { params: Promise<{ path: string[] }> }
) {
  const { path } = await params;
  const url = new URL(request.url);

  const targetUrl = `${process.env.NEXT_PUBLIC_POSTHOG_HOST}/${path.join('/')}${url.search}`;

  try {
    const body = await request.arrayBuffer();
    const contentType = request.headers.get('Content-Type') || 'application/x-www-form-urlencoded';

    const response = await fetch(targetUrl, {
      method: 'POST',
      headers: {
        'Content-Type': contentType,
      },
      body,
    });

    // Handle 204 No Content responses
    if (response.status === 204) {
      return new NextResponse(null, { status: 204 });
    }

    const data = await response.arrayBuffer();
    return new NextResponse(data, {
      status: response.status,
      headers: {
        'Content-Type': response.headers.get('Content-Type') || 'application/json',
      },
    });
  } catch (error) {
    console.error('PostHog proxy error:', error);
    return new NextResponse('Error proxying request', { status: 500 });
  }
}

```

--------------------------------------------------------------------------------
/libs/typescript/computer/tests/interface/factory.test.ts:
--------------------------------------------------------------------------------

```typescript
import { describe, expect, it } from 'vitest';
import { InterfaceFactory } from '../../src/interface/factory.ts';
import { LinuxComputerInterface } from '../../src/interface/linux.ts';
import { MacOSComputerInterface } from '../../src/interface/macos.ts';
import { WindowsComputerInterface } from '../../src/interface/windows.ts';
import { OSType } from '../../src/types.ts';

describe('InterfaceFactory', () => {
  const testParams = {
    ipAddress: '192.168.1.100',
    username: 'testuser',
    password: 'testpass',
    apiKey: 'test-api-key',
    vmName: 'test-vm',
  };

  describe('createInterfaceForOS', () => {
    it('should create MacOSComputerInterface for macOS', () => {
      const interface_ = InterfaceFactory.createInterfaceForOS(
        OSType.MACOS,
        testParams.ipAddress,
        testParams.apiKey,
        testParams.vmName
      );

      expect(interface_).toBeInstanceOf(MacOSComputerInterface);
    });

    it('should create LinuxComputerInterface for Linux', () => {
      const interface_ = InterfaceFactory.createInterfaceForOS(
        OSType.LINUX,
        testParams.ipAddress,
        testParams.apiKey,
        testParams.vmName
      );

      expect(interface_).toBeInstanceOf(LinuxComputerInterface);
    });

    it('should create WindowsComputerInterface for Windows', () => {
      const interface_ = InterfaceFactory.createInterfaceForOS(
        OSType.WINDOWS,
        testParams.ipAddress,
        testParams.apiKey,
        testParams.vmName
      );

      expect(interface_).toBeInstanceOf(WindowsComputerInterface);
    });

    it('should throw error for unsupported OS type', () => {
      expect(() => {
        InterfaceFactory.createInterfaceForOS(
          'unsupported' as OSType,
          testParams.ipAddress,
          testParams.apiKey,
          testParams.vmName
        );
      }).toThrow('Unsupported OS type: unsupported');
    });

    it('should create interface without API key and VM name', () => {
      const interface_ = InterfaceFactory.createInterfaceForOS(OSType.MACOS, testParams.ipAddress);

      expect(interface_).toBeInstanceOf(MacOSComputerInterface);
    });
  });
});

```

--------------------------------------------------------------------------------
/libs/typescript/cua-cli/src/cli.ts:
--------------------------------------------------------------------------------

```typescript
import yargs from 'yargs';
import { hideBin } from 'yargs/helpers';
import { registerAuthCommands } from './commands/auth';
import { registerSandboxCommands } from './commands/sandbox';

export async function runCli() {
  let argv = yargs(hideBin(process.argv))
    .scriptName('cua')
    .usage('Usage: $0 <command> [options]')
    .epilogue(
      'Recommended Command Structure:\n' +
        '  cua auth <command>     Authenticate and manage credentials\n' +
        '    login              Login via browser or with API key\n' +
        '    env                Export API key to .env file\n' +
        '    logout             Clear stored credentials\n' +
        '\n' +
        '  cua sb <command>       Create and manage cloud sandboxes\n' +
        '    list               View all your sandboxes\n' +
        '    create             Provision a new sandbox\n' +
        '    get                Get detailed info about a sandbox\n' +
        '    start              Start or resume a sandbox\n' +
        '    stop               Stop a sandbox (preserves disk)\n' +
        '    suspend            Suspend a sandbox (preserves memory)\n' +
        '    vnc                Open remote desktop\n' +
        '\n' +
        'Documentation: https://docs.cua.ai/libraries/cua-cli/commands'
    );
  // Override the default --version behavior
  argv = argv.version(false).option('version', {
    alias: 'v',
    describe: 'Show CUA CLI version',
    type: 'boolean',
    global: false,
  });
  argv = registerAuthCommands(argv);
  argv = registerSandboxCommands(argv);

  // Check for version flag before command validation
  const args = process.argv.slice(2);
  if (args.includes('--version') || args.includes('-v')) {
    try {
      const home = process.env.HOME || process.env.USERPROFILE || '';
      const path = `${home}/.cua/bin/.version`;
      const version = await Bun.file(path).text();
      const v = version.trim();
      if (v) {
        console.log(v);
      } else {
        console.log('unknown');
      }
    } catch {
      console.log('unknown');
    }
    process.exit(0);
  }

  await argv.demandCommand(1).strict().help().parseAsync();
}

```

--------------------------------------------------------------------------------
/libs/python/agent/pyproject.toml:
--------------------------------------------------------------------------------

```toml
[build-system]
requires = ["pdm-backend"]
build-backend = "pdm.backend"

[project]
name = "cua-agent"
version = "0.5.2"
description = "CUA (Computer Use) Agent for AI-driven computer interaction"
readme = "README.md"
authors = [
    { name = "TryCua", email = "[email protected]" }
]
dependencies = [
    "httpx>=0.27.0",
    "aiohttp>=3.9.3",
    "asyncio",
    "anyio>=4.4.1",
    "typing-extensions>=4.12.2",
    "pydantic>=2.6.4",
    "rich>=13.7.1",
    "python-dotenv>=1.0.1",
    "cua-computer>=0.4.0,<0.5.0",
    "cua-core>=0.1.8,<0.2.0",
    "certifi>=2024.2.2",
    "litellm>=1.74.12"
]
requires-python = ">=3.12,<3.14"

[project.optional-dependencies]
openai = []
anthropic = []
qwen = [
    "qwen-vl-utils",
    "qwen-agent",
    "Pillow>=10.0.0",
]
omni = [
    "cua-som>=0.1.0,<0.2.0",
]
uitars = []
uitars-mlx = [
    "mlx-vlm>=0.1.27; sys_platform == 'darwin'"
]
uitars-hf = [
    "accelerate",
    "torch",
    "transformers>=4.54.0"
]
glm45v-hf = [
    "accelerate",
    "torch",
    "transformers-v4.55.0-GLM-4.5V-preview"
]
opencua-hf = [
    "accelerate",
    "torch",
    "transformers>=4.53.0",
    "tiktoken>=0.11.0",
    "blobfile>=3.0.0"
]
internvl-hf = [
    "accelerate",
    "torch",
    "transformers>=4.55.0",
    "einops",
    "timm"
]
moondream3 = [
    "accelerate",
    "torch",
    "transformers>=4.55.0"
]
ui = [
    "gradio>=5.23.3",
    "python-dotenv>=1.0.1",
]
cli = [
    "yaspin>=3.1.0",
]
hud = [
    "hud-python==0.4.52",
]
gemini = [
    "google-genai>=1.41.0",
]
all = [
    # uitars requirements
    "mlx-vlm>=0.1.27; sys_platform == 'darwin'",
    "accelerate",
    "torch",
    "transformers>=4.55.0",
    # internvl requirements,
    "einops",
    "timm",
    # opencua requirements
    "tiktoken>=0.11.0",
    "blobfile>=3.0.0",
    # ui requirements
    "gradio>=5.23.3",
    "python-dotenv>=1.0.1",
    # cli requirements
    "yaspin>=3.1.0",
    # gemini requirements
    "google-genai>=1.41.0",
    # qwen requirements
    "qwen-vl-utils",
    "qwen-agent",
    "Pillow>=10.0.0",
]

[tool.uv]
constraint-dependencies = ["fastrtc>0.43.0", "mlx-audio>0.2.3"]

[tool.pdm]
distribution = true

[tool.pdm.build]
includes = ["agent/"]
```

--------------------------------------------------------------------------------
/libs/python/computer/tests/test_computer.py:
--------------------------------------------------------------------------------

```python
"""Unit tests for Computer class.

This file tests ONLY the Computer class initialization and context manager.
Following SRP: This file tests ONE class (Computer).
All external dependencies (providers, interfaces) are mocked.
"""

from unittest.mock import AsyncMock, MagicMock, Mock, patch

import pytest


class TestComputerImport:
    """Test Computer module imports (SRP: Only tests imports)."""

    def test_computer_class_exists(self):
        """Test that Computer class can be imported."""
        from computer import Computer

        assert Computer is not None

    def test_vm_provider_type_exists(self):
        """Test that VMProviderType enum can be imported."""
        from computer import VMProviderType

        assert VMProviderType is not None


class TestComputerInitialization:
    """Test Computer initialization (SRP: Only tests initialization)."""

    def test_computer_class_can_be_imported(self, disable_telemetry):
        """Test that Computer class can be imported without errors."""
        from computer import Computer

        assert Computer is not None

    def test_computer_has_required_methods(self, disable_telemetry):
        """Test that Computer class has required methods."""
        from computer import Computer

        assert hasattr(Computer, "__aenter__")
        assert hasattr(Computer, "__aexit__")


class TestComputerContextManager:
    """Test Computer context manager protocol (SRP: Only tests context manager)."""

    def test_computer_is_async_context_manager(self, disable_telemetry):
        """Test that Computer has async context manager methods."""
        from computer import Computer

        assert hasattr(Computer, "__aenter__")
        assert hasattr(Computer, "__aexit__")
        assert callable(Computer.__aenter__)
        assert callable(Computer.__aexit__)


class TestComputerInterface:
    """Test Computer.interface property (SRP: Only tests interface access)."""

    def test_computer_class_structure(self, disable_telemetry):
        """Test that Computer class has expected structure."""
        from computer import Computer

        # Verify Computer is a class
        assert isinstance(Computer, type)

```

--------------------------------------------------------------------------------
/libs/kasm/src/ubuntu/install/firefox/custom_startup.sh:
--------------------------------------------------------------------------------

```bash
#!/usr/bin/env bash
set -ex
START_COMMAND="firefox"
PGREP="firefox"
export MAXIMIZE="true"
export MAXIMIZE_NAME="Mozilla Firefox"
MAXIMIZE_SCRIPT=$STARTUPDIR/maximize_window.sh
DEFAULT_ARGS=""
ARGS=${APP_ARGS:-$DEFAULT_ARGS}

options=$(getopt -o gau: -l go,assign,url: -n "$0" -- "$@") || exit
eval set -- "$options"

while [[ $1 != -- ]]; do
    case $1 in
        -g|--go) GO='true'; shift 1;;
        -a|--assign) ASSIGN='true'; shift 1;;
        -u|--url) OPT_URL=$2; shift 2;;
        *) echo "bad option: $1" >&2; exit 1;;
    esac
done
shift

# Process non-option arguments.
for arg; do
    echo "arg! $arg"
done

FORCE=$2

# run with vgl if GPU is available
if [ -f /opt/VirtualGL/bin/vglrun ] && [ ! -z "${KASM_EGL_CARD}" ] && [ ! -z "${KASM_RENDERD}" ] && [ -O "${KASM_RENDERD}" ] && [ -O "${KASM_EGL_CARD}" ] ; then
    START_COMMAND="/opt/VirtualGL/bin/vglrun -d ${KASM_EGL_CARD} $START_COMMAND"
fi

kasm_exec() {
    if [ -n "$OPT_URL" ] ; then
        URL=$OPT_URL
    elif [ -n "$1" ] ; then
        URL=$1
    fi

    # Since we are execing into a container that already has the browser running from startup,
    #  when we don't have a URL to open we want to do nothing. Otherwise a second browser instance would open.
    if [ -n "$URL" ] ; then
        /usr/bin/filter_ready
        /usr/bin/desktop_ready
        bash ${MAXIMIZE_SCRIPT} &
        $START_COMMAND $ARGS $OPT_URL
    else
        echo "No URL specified for exec command. Doing nothing."
    fi
}

kasm_startup() {
    if [ -n "$KASM_URL" ] ; then
        URL=$KASM_URL
    elif [ -z "$URL" ] ; then
        URL=$LAUNCH_URL
    fi

    if [ -z "$DISABLE_CUSTOM_STARTUP" ] ||  [ -n "$FORCE" ] ; then

        echo "Entering process startup loop"
        set +x
        while true
        do
            if ! pgrep -x $PGREP > /dev/null
            then
                /usr/bin/filter_ready
                /usr/bin/desktop_ready
                set +e
                bash ${MAXIMIZE_SCRIPT} &
                $START_COMMAND $ARGS $URL
                set -e
            fi
            sleep 1
        done
        set -x

    fi

}

if [ -n "$GO" ] || [ -n "$ASSIGN" ] ; then
    kasm_exec
else
    kasm_startup
fi

```

--------------------------------------------------------------------------------
/libs/typescript/cua-cli/src/auth.ts:
--------------------------------------------------------------------------------

```typescript
import { AUTH_PAGE, CALLBACK_HOST } from './config';
import { setApiKey, getApiKey } from './storage';
import { openInBrowser } from './util';

const c = {
  reset: '\x1b[0m',
  bold: '\x1b[1m',
  dim: '\x1b[2m',
  underline: '\x1b[4m',
  cyan: '\x1b[36m',
  green: '\x1b[32m',
  yellow: '\x1b[33m',
};

export async function loginViaBrowser(): Promise<string> {
  let resolveToken!: (v: string) => void;
  const tokenPromise = new Promise<string>((resolve) => {
    resolveToken = resolve;
  });

  // dynamic port (0) -> OS chooses available port
  const server = Bun.serve({
    hostname: CALLBACK_HOST,
    port: 0,
    fetch(req) {
      const u = new URL(req.url);
      if (u.pathname !== '/callback')
        return new Response('Not found', { status: 404 });
      const token = u.searchParams.get('token');
      if (!token) return new Response('Missing token', { status: 400 });
      resolveToken(token);
      queueMicrotask(() => server.stop());
      return new Response('CLI authorized. You can close this window.', {
        status: 200,
        headers: { 'content-type': 'text/plain' },
      });
    },
  });

  const callbackURL = `http://${CALLBACK_HOST}:${server.port}/callback`;
  const url = `${AUTH_PAGE}?callback_url=${encodeURIComponent(callbackURL)}`;
  console.log(
    `${c.cyan}${c.bold}Opening your default browser to authorize the CLI...${c.reset}`
  );
  console.log(
    `${c.dim}If the browser does not open automatically, copy/paste this URL:${c.reset}`
  );
  console.log(`${c.yellow}${c.underline}${url}${c.reset}`);
  await openInBrowser(url);

  let timeoutId: ReturnType<typeof setTimeout> | undefined;
  const timeout = new Promise<string>((_, reject) => {
    timeoutId = setTimeout(
      () => reject(new Error('Timed out waiting for authorization')),
      2 * 60 * 1000
    );
  });
  try {
    const result = await Promise.race([tokenPromise, timeout]);
    if (timeoutId) clearTimeout(timeoutId);
    return result;
  } finally {
    try {
      server.stop();
    } catch {}
  }
}

export async function ensureApiKeyInteractive(): Promise<string> {
  const existing = getApiKey();
  if (existing) return existing;
  const token = await loginViaBrowser();
  setApiKey(token);
  return token;
}

```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/callbacks/logging.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Logging
description: Agent logging and custom logger implementation
---

# Logging Callback

Built-in logging callback and custom logger creation for agent monitoring.

## Callbacks Example

```python
from agent.callbacks import LoggingCallback
import logging

agent = ComputerAgent(
    model="anthropic/claude-sonnet-4-5-20250929",
    tools=[computer],
    callbacks=[
        LoggingCallback(
            logger=logging.getLogger("cua"),
            level=logging.INFO
        )
    ]
)
```

## Shorthand

```python
agent = ComputerAgent(
    model="anthropic/claude-sonnet-4-5-20250929",
    tools=[computer],
    verbosity=logging.INFO  # Auto-adds LoggingCallback
)
```

## Custom Logger

Create custom loggers by extending AsyncCallbackHandler:

```python
from agent.callbacks.base import AsyncCallbackHandler
import logging

class CustomLogger(AsyncCallbackHandler):
    def __init__(self, logger_name="agent"):
        self.logger = logging.getLogger(logger_name)
        self.logger.setLevel(logging.INFO)

        # Add console handler
        handler = logging.StreamHandler()
        formatter = logging.Formatter(
            '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
        )
        handler.setFormatter(formatter)
        self.logger.addHandler(handler)

    async def on_run_start(self, kwargs, old_items):
        self.logger.info(f"Agent run started with model: {kwargs.get('model')}")

    async def on_computer_call_start(self, item):
        action = item.get('action', {})
        self.logger.info(f"Computer action: {action.get('type')}")

    async def on_usage(self, usage):
        cost = usage.get('response_cost', 0)
        self.logger.info(f"API call cost: ${cost:.4f}")

    async def on_run_end(self, kwargs, old_items, new_items):
        self.logger.info("Agent run completed")

# Use custom logger
agent = ComputerAgent(
    model="anthropic/claude-sonnet-4-5-20250929",
    tools=[computer],
    callbacks=[CustomLogger("my_agent")]
)
```

## Available Hooks

Log any agent event using these callback methods:

- `on_run_start/end` - Run lifecycle
- `on_computer_call_start/end` - Computer actions
- `on_api_start/end` - LLM API calls
- `on_usage` - Cost tracking
- `on_screenshot` - Screenshot events

```

--------------------------------------------------------------------------------
/libs/python/mcp-server/QUICK_TEST_COMMANDS.sh:
--------------------------------------------------------------------------------

```bash
#!/bin/bash
# Quick Test Commands for MCP Server Local Desktop Option
# Run these commands to test the implementation

set -e  # Exit on error

echo "======================================================================"
echo "Testing MCP Server Local Desktop Option"
echo "======================================================================"
echo ""

# Change to repo root
cd "$(dirname "$0")/.."

# Test 1: Quick Logic Test (No setup required)
echo "Test 1: Quick Logic Test (No setup required)"
echo "----------------------------------------------------------------------"
python tests/quick_test_local_option.py
echo ""

# Test 2: Automated Tests (Requires pytest and packages)
echo "Test 2: Automated Tests (Requires pytest and packages installed)"
echo "----------------------------------------------------------------------"
if command -v pytest &> /dev/null; then
    echo "Running pytest..."
    pytest tests/test_mcp_server_local_option.py -v || echo "Note: Some tests may require full setup"
else
    echo "⚠️  pytest not found. Install with: pip install pytest"
fi
echo ""

# Test 3: Existing MCP server tests
echo "Test 3: Existing MCP Server Tests"
echo "----------------------------------------------------------------------"
if command -v pytest &> /dev/null; then
    echo "Running existing session management tests..."
    pytest tests/test_mcp_server_session_management.py -v || echo "Note: Some tests may fail if dependencies are missing"
else
    echo "⚠️  pytest not found. Install with: pip install pytest"
fi
echo ""

# Summary
echo "======================================================================"
echo "Test Summary"
echo "======================================================================"
echo "✅ Quick logic test completed"
echo ""
echo "Next steps for comprehensive testing:"
echo "1. Install dependencies:"
echo "   pip install -e libs/python/core"
echo "   pip install -e libs/python/computer"
echo "   pip install -e libs/python/agent"
echo "   pip install -e libs/python/mcp-server"
echo "   pip install -e libs/python/computer-server"
echo ""
echo "2. For manual end-to-end testing, see:"
echo "   tests/MANUAL_TEST_LOCAL_OPTION.md"
echo ""
echo "3. For detailed testing info, see:"
echo "   tests/TESTING_SUMMARY.md"
echo ""


```

--------------------------------------------------------------------------------
/.github/scripts/get_pyproject_version.py:
--------------------------------------------------------------------------------

```python
#!/usr/bin/env python3
"""
Verifies that the version in pyproject.toml matches the expected version.

Usage:
    python get_pyproject_version.py <pyproject_path> <expected_version>

Exit codes:
    0 - Versions match
    1 - Versions don't match or error occurred
"""

import sys

try:
    import tomllib
except ImportError:
    # Fallback for Python < 3.11
    import toml as tomllib


def main():
    if len(sys.argv) != 3:
        print(
            "Usage: python get_pyproject_version.py <pyproject_path> <expected_version>",
            file=sys.stderr,
        )
        sys.exit(1)

    pyproject_path = sys.argv[1]
    expected_version = sys.argv[2]

    # tomllib requires binary mode
    try:
        with open(pyproject_path, "rb") as f:
            data = tomllib.load(f)
    except FileNotFoundError:
        print(f"❌ ERROR: File not found: {pyproject_path}", file=sys.stderr)
        sys.exit(1)
    except Exception as e:
        # Fallback to toml if using the old library or handle other errors
        try:
            import toml

            data = toml.load(pyproject_path)
        except FileNotFoundError:
            print(f"❌ ERROR: File not found: {pyproject_path}", file=sys.stderr)
            sys.exit(1)
        except Exception as toml_err:
            print(f"❌ ERROR: Failed to parse TOML file: {e}", file=sys.stderr)
            sys.exit(1)

    actual_version = data.get("project", {}).get("version")

    if not actual_version:
        print("❌ ERROR: No version found in pyproject.toml", file=sys.stderr)
        sys.exit(1)

    if actual_version != expected_version:
        print("❌ Version mismatch detected!", file=sys.stderr)
        print(f"   pyproject.toml version: {actual_version}", file=sys.stderr)
        print(f"   Expected version: {expected_version}", file=sys.stderr)
        print("", file=sys.stderr)
        print(
            "The version in pyproject.toml must match the version being published.", file=sys.stderr
        )
        print(
            f"Please update pyproject.toml to version {expected_version} or use the correct tag.",
            file=sys.stderr,
        )
        sys.exit(1)

    print(f"✅ Version consistency check passed: {actual_version}")
    sys.exit(0)


if __name__ == "__main__":
    main()

```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/chat-history.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Chat History
description: Managing conversation history and message arrays
---

Managing conversation history is essential for multi-turn agent interactions. The agent maintains a messages array that tracks the entire conversation flow.

## Managing History

### Continuous Conversation

```python
history = []

while True:
    user_input = input("> ")
    history.append({"role": "user", "content": user_input})

    async for result in agent.run(history, stream=False):
        history += result["output"]
```

## Message Array Structure

The messages array contains different types of messages that represent the conversation state:

```python
messages = [
    # user input
    {
        "role": "user",
        "content": "go to trycua on gh"
    },
    # first agent turn adds the model output to the history
    {
        "summary": [
            {
                "text": "Searching Firefox for Trycua GitHub",
                "type": "summary_text"
            }
        ],
        "type": "reasoning"
    },
    {
        "action": {
            "text": "Trycua GitHub",
            "type": "type"
        },
        "call_id": "call_QI6OsYkXxl6Ww1KvyJc4LKKq",
        "status": "completed",
        "type": "computer_call"
    },
    # second agent turn adds the computer output to the history
    {
        "type": "computer_call_output",
        "call_id": "call_QI6OsYkXxl6Ww1KvyJc4LKKq",
        "output": {
            "type": "input_image",
            "image_url": "data:image/png;base64,..."
        }
    },
    # final agent turn adds the agent output text to the history
    {
        "type": "message",
        "role": "assistant",
        "content": [
          {
            "text": "Success! The Trycua GitHub page has been opened.",
            "type": "output_text"
          }
        ]
    }
]
```

## Message Types

See the complete schema in [Message Format](./message-format).

### Memory Management

For long conversations, consider using the `only_n_most_recent_images` parameter to manage memory:

```python
agent = ComputerAgent(
    model="cua/anthropic/claude-sonnet-4.5",
    tools=[computer],
    only_n_most_recent_images=3
)
```

This automatically removes old images from the conversation history to prevent context window overflow.

```

--------------------------------------------------------------------------------
/TESTING.md:
--------------------------------------------------------------------------------

```markdown
# Testing Guide for CUA

Quick guide to running tests and understanding the test architecture.

## 🚀 Quick Start

```bash
# Install dependencies
pip install pytest pytest-asyncio pytest-mock pytest-cov

# Install package
cd libs/python/core
pip install -e .

# Run tests
export CUA_TELEMETRY_DISABLED=1  # or $env:CUA_TELEMETRY_DISABLED="1" on Windows
pytest tests/ -v
```

## 🧪 Running Tests

```bash
# All packages
pytest libs/python/*/tests/ -v

# Specific package
cd libs/python/core && pytest tests/ -v

# With coverage
pytest tests/ --cov --cov-report=html

# Specific test
pytest tests/test_telemetry.py::TestTelemetryEnabled::test_telemetry_enabled_by_default -v
```

## 🏗️ Test Architecture

**Principles**: SRP (Single Responsibility) + Vertical Slices + Testability

```
libs/python/
├── core/tests/           # Tests ONLY core
├── agent/tests/          # Tests ONLY agent
└── computer/tests/       # Tests ONLY computer
```

Each test file = ONE feature. Each test class = ONE concern.

## ➕ Adding New Tests

1. Create `test_*.py` in the appropriate package's `tests/` directory
2. Follow the pattern:

```python
"""Unit tests for my_feature."""
import pytest
from unittest.mock import patch

class TestMyFeature:
    """Test MyFeature class."""

    def test_initialization(self):
        """Test that feature initializes."""
        from my_package import MyFeature
        feature = MyFeature()
        assert feature is not None
```

3. Mock external dependencies:

```python
@pytest.fixture
def mock_api():
    with patch("my_package.api_client") as mock:
        yield mock
```

## 🔄 CI/CD

Tests run automatically on every PR via GitHub Actions (`.github/workflows/python-tests.yml`):

- Matrix strategy: each package tested separately
- Python 3.12
- ~2 minute runtime

## 🐛 Troubleshooting

**ModuleNotFoundError**: Run `pip install -e .` in package directory

**Tests fail in CI but pass locally**: Set `CUA_TELEMETRY_DISABLED=1`

**Async tests error**: Install `pytest-asyncio` and use `@pytest.mark.asyncio`

**Mock not working**: Patch at usage location, not definition:

```python
# ✅ Right
@patch("my_package.module.external_function")

# ❌ Wrong
@patch("external_library.function")
```

---

**Questions?** Check existing tests for examples or open an issue.

```

--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------

```toml
[project]
authors = [{ name = "TryCua", email = "[email protected]" }]
dependencies = [
    "openai<1.100.0",
    "anthropic>=0.67.0",
]
description = "CUA (Computer Use Agent) mono-repo"
license = { text = "MIT" }
name = "cua-workspace"
readme = "README.md"
requires-python = "<3.14,>=3.12"
version = "0.1.0"

[project.urls]
repository = "https://github.com/trycua/cua"

[dependency-groups]
dev = [
    "cua-core",
    "cua-agent",
    "cua-computer",
    "cua-computer-server",
    "cua-som",
    "cua-mcp-server",
    "black>=23.0.0",
    "ipykernel>=6.29.5",
    "jedi>=0.19.2",
    "jupyter>=1.0.0",
    "mypy>=1.10.0",
    "ruff>=0.9.2",
    "types-requests>=2.31.0",
    "hud-python[agent]==0.4.52",
    "pre-commit>=4.3.0",
    "isort>=7.0.0",
]
docs = [
    "mkdocs-material>=9.2.0",
    "mkdocs>=1.5.0",
]
test = [
    "aioresponses>=0.7.4",
    "pytest-asyncio>=0.21.1",
    "pytest-cov>=4.1.0",
    "pytest-mock>=3.10.0",
    "pytest-xdist>=3.6.1",
    "pytest>=8.0.0",
]
examples = []

[tool.uv]
package = false

[tool.uv.workspace]
members = [
    "libs/python/agent",
    "libs/python/core",
    "libs/python/computer",
    "libs/python/computer-server",
    "libs/python/som",
    "libs/python/mcp-server",
    "libs/python/bench-ui",
]

[tool.uv.sources]
cua-agent = { workspace = true }
cua-core = { workspace = true }
cua-computer = { workspace = true }
cua-computer-server = { workspace = true }
cua-som = { workspace = true }
cua-mcp-server = { workspace = true }
cua-bench-ui = { workspace = true }

[tool.black]
line-length = 100
target-version = ["py312"]

[tool.ruff]
fix = true
line-length = 100
target-version = "py312"

[tool.ruff.lint]
select = ["E", "F", "B", "I"]
ignore = [
    "E501", "E402", "I001", "I002", "B007", "B023", "B024", "B027", "B028",
    "B904", "B905", "E711", "E712", "E722", "E731", "F401", "F403", "F405",
    "F811", "F821", "F841"
]

[tool.ruff.format]
docstring-code-format = true

[tool.mypy]
check_untyped_defs = true
disallow_untyped_defs = true
ignore_missing_imports = true
python_version = "3.12"
show_error_codes = true
strict = true
warn_return_any = true
warn_unused_ignores = false

[tool.isort]
profile = "black"
py_version = 312

[tool.pytest.ini_options]
asyncio_mode = "auto"
python_files = "test_*.py"
testpaths = ["libs/*/tests"]

```

--------------------------------------------------------------------------------
/examples/cloud_api_examples.py:
--------------------------------------------------------------------------------

```python
import asyncio
import os

from utils import load_dotenv_files

load_dotenv_files()

from computer.providers.cloud.provider import CloudProvider


async def main() -> None:
    # CloudProvider will automatically read CUA_API_KEY from environment if not provided
    # You can still pass api_key explicitly if needed: CloudProvider(api_key="your_key")
    api_base = os.getenv("CUA_API_BASE")
    if api_base:
        print(f"Using API base: {api_base}")

    provider = CloudProvider(verbose=True)
    async with provider:

        # List all VMs
        vms = await provider.list_vms()
        print(f"Found {len(vms)} VM(s)")
        for vm in vms:
            print(
                f"name: {vm['name']}\n",
                f"status: {vm['status']}\n",  # pending, running, stopped, terminated, failed
                f"api_url: {vm.get('api_url')}\n",
                f"vnc_url: {vm.get('vnc_url')}\n",
            )

        # # --- Additional operations (commented out) ---
        # # To stop a VM by name:
        # name = "m-linux-96lcxd2c2k"
        # resp = await provider.stop_vm(name)
        # print(
        #     "stop_vm response:\n",
        #     f"name: {resp['name']}\n",
        #     f"status: {resp['status']}\n", # stopping
        # )

        # # To start a VM by name:
        # name = "m-linux-96lcxd2c2k"
        # resp = await provider.run_vm(name)
        # print(
        #     "run_vm response:\n",
        #     f"name: {resp['name']}\n",
        #     f"status: {resp['status']}\n", # starting
        # )

        # # To restart a VM by name:
        # name = "m-linux-96lcxd2c2k"
        # resp = await provider.restart_vm(name)
        # print(
        #     "restart_vm response:\n",
        #     f"name: {resp['name']}\n",
        #     f"status: {resp['status']}\n", # restarting
        # )

        # # To probe a VM's status via its public hostname (if you know the name):
        # name = "m-linux-96lcxd2c2k"
        # info = await provider.get_vm(name)
        # print("get_vm info:\n",
        #     f"name: {info['name']}\n",
        #     f"status: {info['status']}\n", # running
        #     f"api_url: {info.get('api_url')}\n",
        #     f"os_type: {info.get('os_type')}\n",
        # )


if __name__ == "__main__":
    asyncio.run(main())

```

--------------------------------------------------------------------------------
/libs/python/bench-ui/examples/simple_example.py:
--------------------------------------------------------------------------------

```python
from __future__ import annotations

import os
import time
from pathlib import Path

from bench_ui import execute_javascript, get_element_rect, launch_window

HTML = """
<!doctype html>
<html>
  <head>
    <meta charset="utf-8" />
    <title>Bench UI Example</title>
    <style>
      body { font-family: system-ui, sans-serif; margin: 24px; }
      #target { width: 220px; height: 120px; background: #4f46e5; color: white; display: flex; align-items: center; justify-content: center; border-radius: 8px; }
    </style>
  </head>
  <body>
    <h1>Bench UI Example</h1>
    <div id="target">Hello from pywebview</div>

    
    <h1>Click the button</h1>
    <button id="submit" class="btn" data-instruction="the button">Submit</button>
    <script>
        window.__submitted = false;
        document.getElementById('submit').addEventListener('click', function() {
            window.__submitted = true;
            this.textContent = 'Submitted!';
            this.disabled = true;
        });
    </script>
  </body>
</html>
"""


def main():
    os.environ["CUA_BENCH_UI_DEBUG"] = "1"

    # Launch a window with inline HTML content
    pid = launch_window(
        html=HTML,
        title="Bench UI Example",
        width=800,
        height=600,
    )
    print(f"Launched window with PID: {pid}")

    # Give the window a brief moment to render
    time.sleep(1.0)

    # Query the client rect of an element via CSS selector in SCREEN space
    rect = get_element_rect(pid, "#target", space="screen")
    print("Element rect (screen space):", rect)

    # Take a screenshot and overlay the bbox
    try:
        from PIL import ImageDraw, ImageGrab

        img = ImageGrab.grab()  # full screen
        draw = ImageDraw.Draw(img)
        x, y, w, h = rect["x"], rect["y"], rect["width"], rect["height"]
        box = (x, y, x + w, y + h)
        draw.rectangle(box, outline=(255, 0, 0), width=3)
        out_path = Path(__file__).parent / "output_overlay.png"
        img.save(out_path)
        print(f"Saved overlay screenshot to: {out_path}")
    except Exception as e:
        print(f"Failed to capture/annotate screenshot: {e}")

    # Execute arbitrary JavaScript
    text = execute_javascript(pid, "window.__submitted")
    print("text:", text)


if __name__ == "__main__":
    main()

```

--------------------------------------------------------------------------------
/.github/workflows/pypi-publish-computer-server.yml:
--------------------------------------------------------------------------------

```yaml
name: Publish Computer Server Package

on:
  push:
    tags:
      - "computer-server-v*"
  workflow_dispatch:
    inputs:
      version:
        description: "Version to publish (without v prefix)"
        required: true
        default: "0.1.0"
  workflow_call:
    inputs:
      version:
        description: "Version to publish"
        required: true
        type: string
    outputs:
      version:
        description: "The version that was published"
        value: ${{ jobs.prepare.outputs.version }}

# Adding permissions at workflow level
permissions:
  contents: write

jobs:
  prepare:
    runs-on: macos-latest
    outputs:
      version: ${{ steps.get-version.outputs.version }}
    steps:
      - uses: actions/checkout@v4

      - name: Determine version
        id: get-version
        run: |
          if [ "${{ github.event_name }}" == "push" ]; then
            # Extract version from tag (for package-specific tags)
            if [[ "${{ github.ref }}" =~ ^refs/tags/computer-server-v([0-9]+\.[0-9]+\.[0-9]+) ]]; then
              VERSION=${BASH_REMATCH[1]}
            else
              echo "Invalid tag format for computer-server"
              exit 1
            fi
          elif [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
            # Use version from workflow dispatch
            VERSION=${{ github.event.inputs.version }}
          else
            # Use version from workflow_call
            VERSION=${{ inputs.version }}
          fi
          echo "VERSION=$VERSION"
          echo "version=$VERSION" >> $GITHUB_OUTPUT

      - name: Set up Python
        uses: actions/setup-python@v4
        with:
          python-version: "3.10"

  publish:
    needs: prepare
    uses: ./.github/workflows/pypi-reusable-publish.yml
    with:
      package_name: "computer-server"
      package_dir: "libs/python/computer-server"
      version: ${{ needs.prepare.outputs.version }}
      is_lume_package: false
      base_package_name: "cua-computer-server"
    secrets:
      PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}

  set-env-variables:
    needs: [prepare, publish]
    runs-on: macos-latest
    steps:
      - name: Set environment variables for use in other jobs
        run: |
          echo "COMPUTER_VERSION=${{ needs.prepare.outputs.version }}" >> $GITHUB_ENV

```

--------------------------------------------------------------------------------
/libs/python/agent/agent/computers/base.py:
--------------------------------------------------------------------------------

```python
"""
Base computer interface protocol for agent interactions.
"""

from typing import (
    Any,
    Dict,
    List,
    Literal,
    Optional,
    Protocol,
    Union,
    runtime_checkable,
)


@runtime_checkable
class AsyncComputerHandler(Protocol):
    """Protocol defining the interface for computer interactions."""

    # ==== Computer-Use-Preview Action Space ====

    async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]:
        """Get the current environment type."""
        ...

    async def get_dimensions(self) -> tuple[int, int]:
        """Get screen dimensions as (width, height)."""
        ...

    async def screenshot(self, text: Optional[str] = None) -> str:
        """Take a screenshot and return as base64 string.

        Args:
            text: Optional descriptive text (for compatibility with GPT-4o models, ignored)
        """
        ...

    async def click(self, x: int, y: int, button: str = "left") -> None:
        """Click at coordinates with specified button."""
        ...

    async def double_click(self, x: int, y: int) -> None:
        """Double click at coordinates."""
        ...

    async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None:
        """Scroll at coordinates with specified scroll amounts."""
        ...

    async def type(self, text: str) -> None:
        """Type text."""
        ...

    async def wait(self, ms: int = 1000) -> None:
        """Wait for specified milliseconds."""
        ...

    async def move(self, x: int, y: int) -> None:
        """Move cursor to coordinates."""
        ...

    async def keypress(self, keys: Union[List[str], str]) -> None:
        """Press key combination."""
        ...

    async def drag(self, path: List[Dict[str, int]]) -> None:
        """Drag along specified path."""
        ...

    async def get_current_url(self) -> str:
        """Get current URL (for browser environments)."""
        ...

    # ==== Anthropic Action Space ====

    async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
        """Left mouse down at coordinates."""
        ...

    async def left_mouse_up(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
        """Left mouse up at coordinates."""
        ...

```

--------------------------------------------------------------------------------
/libs/lume/src/VNC/PassphraseGenerator.swift:
--------------------------------------------------------------------------------

```swift
import Foundation
import CryptoKit

final class PassphraseGenerator {
    private let words: [String]
    
    init(words: [String] = PassphraseGenerator.defaultWords) {
        self.words = words
    }
    
    func prefix(_ count: Int) -> [String] {
        guard count > 0 else { return [] }
        
        // Use secure random number generation
        var result: [String] = []
        for _ in 0..<count {
            let randomBytes = (0..<4).map { _ in UInt8.random(in: 0...255) }
            let randomNumber = Data(randomBytes).withUnsafeBytes { bytes in
                bytes.load(as: UInt32.self)
            }
            let index = Int(randomNumber % UInt32(words.count))
            result.append(words[index])
        }
        return result
    }
    
    // A much larger set of common, easy-to-type words
    private static let defaultWords = [
        "alpha", "bravo", "charlie", "delta", "echo", "foxtrot", "golf", "hotel",
        "india", "juliet", "kilo", "lima", "mike", "november", "oscar", "papa",
        "quebec", "romeo", "sierra", "tango", "uniform", "victor", "whiskey", "xray",
        "yankee", "zulu", "zero", "one", "two", "three", "four", "five",
        "six", "seven", "eight", "nine", "apple", "banana", "cherry", "date",
        "elder", "fig", "grape", "honey", "iris", "jade", "kiwi", "lemon",
        "mango", "nectarine", "orange", "peach", "quince", "raspberry", "strawberry", "tangerine",
        "red", "blue", "green", "yellow", "purple", "orange", "pink", "brown",
        "black", "white", "gray", "silver", "gold", "copper", "bronze", "steel",
        "north", "south", "east", "west", "spring", "summer", "autumn", "winter",
        "river", "ocean", "mountain", "valley", "forest", "desert", "island", "beach",
        "sun", "moon", "star", "cloud", "rain", "snow", "wind", "storm",
        "happy", "brave", "calm", "swift", "wise", "kind", "bold", "free",
        "safe", "strong", "bright", "clear", "light", "soft", "warm", "cool",
        "eagle", "falcon", "hawk", "owl", "robin", "sparrow", "swan", "dove",
        "tiger", "lion", "bear", "wolf", "deer", "horse", "dolphin", "whale",
        "maple", "oak", "pine", "birch", "cedar", "fir", "palm", "willow",
        "rose", "lily", "daisy", "tulip", "lotus", "orchid", "violet", "jasmine"
    ]
}
```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/benchmarks/introduction.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Introduction
description: Overview of benchmarking in the Cua agent framework
---

The Cua agent framework uses benchmarks to test the performance of supported models and providers at various agentic tasks.

## Benchmark Types

Computer-Agent benchmarks evaluate two key capabilities:

- **Plan Generation**: Breaking down complex tasks into a sequence of actions
- **Coordinate Generation**: Predicting precise click locations on GUI elements

## Using State-of-the-Art Models

Let's see how to use the SOTA vision-language models in the Cua agent framework.

### Plan Generation + Coordinate Generation

**[OS-World](https://os-world.github.io/)** - Benchmark for complete computer-use agents

This leaderboard tests models that can understand instructions and automatically perform the full sequence of actions needed to complete tasks.

```python
# UI-TARS-1.5 is a SOTA unified plan generation + coordinate generation VLM
# This makes it suitable for agentic loops for computer-use
agent = ComputerAgent("huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B", tools=[computer])
agent.run("Open Firefox and go to github.com")
# Success! 🎉
```

### Coordinate Generation Only

**[GUI Agent Grounding Leaderboard](https://gui-agent.github.io/grounding-leaderboard/)** - Benchmark for click prediction accuracy

This leaderboard tests models that specialize in finding exactly where to click on screen elements, but needs to be told what specific action to take.

```python
# GTA1-7B is a SOTA coordinate generation VLM
# It can only generate coordinates, not plan:
agent = ComputerAgent("huggingface-local/HelloKKMe/GTA1-7B", tools=[computer])
agent.predict_click("find the button to open the settings") # (27, 450)
# This will raise an error:
# agent.run("Open Firefox and go to github.com")
```

### Composed Agent

The Cua agent framework also supports composed agents, which combine a planning model with a clicking model for the best of both worlds. Any liteLLM model can be used as the plan generation model.

```python
# It can be paired with any LLM to form a composed agent:
# "gemini/gemini-1.5-pro" will be used as the plan generation LLM
agent = ComputerAgent("huggingface-local/HelloKKMe/GTA1-7B+gemini/gemini-1.5-pro", tools=[computer])
agent.run("Open Firefox and go to github.com")
# Success! 🎉
```

```

--------------------------------------------------------------------------------
/libs/python/som/tests/test_omniparser.py:
--------------------------------------------------------------------------------

```python
"""Unit tests for som package (Set-of-Mark).

This file tests ONLY basic som functionality.
Following SRP: This file tests som module imports and basic operations.
All external dependencies (ML models, OCR) are mocked.
"""

import pytest


class TestSomImports:
    """Test som module imports (SRP: Only tests imports)."""

    def test_som_module_exists(self):
        """Test that som module can be imported."""
        try:
            import som

            assert som is not None
        except ImportError:
            pytest.skip("som module not installed")

    def test_omniparser_import(self):
        """Test that OmniParser can be imported."""
        try:
            from som import OmniParser

            assert OmniParser is not None
        except ImportError:
            pytest.skip("som module not available")
        except Exception as e:
            pytest.skip(f"som initialization requires ML models: {e}")

    def test_models_import(self):
        """Test that model classes can be imported."""
        try:
            from som import BoundingBox, ParseResult, UIElement

            assert BoundingBox is not None
            assert UIElement is not None
            assert ParseResult is not None
        except ImportError:
            pytest.skip("som models not available")
        except Exception as e:
            pytest.skip(f"som models require dependencies: {e}")


class TestSomModels:
    """Test som data models (SRP: Only tests model structure)."""

    def test_bounding_box_structure(self):
        """Test BoundingBox class structure."""
        try:
            from som import BoundingBox

            # Check the class exists and has expected structure
            assert hasattr(BoundingBox, "__init__")
        except ImportError:
            pytest.skip("som models not available")
        except Exception as e:
            pytest.skip(f"som models require dependencies: {e}")

    def test_ui_element_structure(self):
        """Test UIElement class structure."""
        try:
            from som import UIElement

            # Check the class exists and has expected structure
            assert hasattr(UIElement, "__init__")
        except ImportError:
            pytest.skip("som models not available")
        except Exception as e:
            pytest.skip(f"som models require dependencies: {e}")

```

--------------------------------------------------------------------------------
/libs/python/computer/computer/helpers.py:
--------------------------------------------------------------------------------

```python
"""
Helper functions and decorators for the Computer module.
"""

import asyncio
import logging
from functools import wraps
from typing import Any, Awaitable, Callable, Optional, TypeVar

try:
    # Python 3.12+ has ParamSpec in typing
    from typing import ParamSpec
except ImportError:  # pragma: no cover
    # Fallback for environments without ParamSpec in typing
    from typing_extensions import ParamSpec  # type: ignore

P = ParamSpec("P")
R = TypeVar("R")

# Global reference to the default computer instance
_default_computer = None

logger = logging.getLogger(__name__)


def set_default_computer(computer: Any) -> None:
    """
    Set the default computer instance to be used by the remote decorator.

    Args:
        computer: The computer instance to use as default
    """
    global _default_computer
    _default_computer = computer


def sandboxed(
    venv_name: str = "default",
    computer: str = "default",
    max_retries: int = 3,
) -> Callable[[Callable[P, R]], Callable[P, Awaitable[R]]]:
    """
    Decorator that wraps a function to be executed remotely via computer.venv_exec

    Args:
        venv_name: Name of the virtual environment to execute in
        computer: The computer instance to use, or "default" to use the globally set default
        max_retries: Maximum number of retries for the remote execution
    """

    def decorator(func: Callable[P, R]) -> Callable[P, Awaitable[R]]:
        @wraps(func)
        async def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
            # Determine which computer instance to use
            comp = computer if computer != "default" else _default_computer

            if comp is None:
                raise RuntimeError(
                    "No computer instance available. Either specify a computer instance or call set_default_computer() first."
                )

            for i in range(max_retries):
                try:
                    return await comp.venv_exec(venv_name, func, *args, **kwargs)
                except Exception as e:
                    logger.error(f"Attempt {i+1} failed: {e}")
                    await asyncio.sleep(1)
                    if i == max_retries - 1:
                        raise e

            # Should be unreachable because we either returned or raised
            raise RuntimeError("sandboxed wrapper reached unreachable code path")

        return wrapper

    return decorator

```

--------------------------------------------------------------------------------
/libs/lume/src/Commands/Push.swift:
--------------------------------------------------------------------------------

```swift
import ArgumentParser
import Foundation

struct Push: AsyncParsableCommand {
    static let configuration = CommandConfiguration(
        abstract: "Push a macOS VM to GitHub Container Registry"
    )

    @Argument(help: "Name of the VM to push")
    var name: String

    @Argument(help: "Image tag to push (format: name:tag)")
    var image: String

    @Option(parsing: .upToNextOption, help: "Additional tags to push the same image to")
    var additionalTags: [String] = []

    @Option(help: "Github Container Registry to push to. Defaults to ghcr.io")
    var registry: String = "ghcr.io"

    @Option(help: "Organization to push to. Defaults to trycua")
    var organization: String = "trycua"

    @Option(name: .customLong("storage"), help: "VM storage location to use")
    var storage: String?

    @Option(help: "Chunk size for large files in MB. Defaults to 512.")
    var chunkSizeMb: Int = 512

    @Flag(name: .long, help: "Enable verbose logging")
    var verbose: Bool = false

    @Flag(name: .long, help: "Prepare files without uploading to registry")
    var dryRun: Bool = false
    
    @Flag(name: .long, help: "In dry-run mode, also reassemble chunks to verify integrity")
    var reassemble: Bool = true

    init() {}

    @MainActor
    func run() async throws {
        let controller = LumeController()

        // Parse primary image name and tag
        let components = image.split(separator: ":")
        guard components.count == 2, let primaryTag = components.last else {
            throw ValidationError("Invalid primary image format. Expected format: name:tag")
        }
        let imageName = String(components.first!)
        
        // Combine primary and additional tags, ensuring uniqueness
        var allTags: Swift.Set<String> = []
        allTags.insert(String(primaryTag))
        allTags.formUnion(additionalTags)
        
        guard !allTags.isEmpty else {
             throw ValidationError("At least one tag must be provided.")
        }
        
        try await controller.pushImage(
            name: name,
            imageName: imageName, // Pass base image name
            tags: Array(allTags), // Pass array of all unique tags
            registry: registry,
            organization: organization,
            storage: storage,
            chunkSizeMb: chunkSizeMb,
            verbose: verbose,
            dryRun: dryRun,
            reassemble: reassemble
        )
    }
} 
```

--------------------------------------------------------------------------------
/libs/typescript/cua-cli/src/commands/auth.ts:
--------------------------------------------------------------------------------

```typescript
import { setApiKey, clearApiKey } from '../storage';
import { ensureApiKeyInteractive, loginViaBrowser } from '../auth';
import { writeEnvFile } from '../util';
import type { Argv } from 'yargs';

// Command handlers
const loginHandler = async (argv: Record<string, unknown>) => {
  if (argv['api-key']) {
    setApiKey(String(argv['api-key']));
    console.log('API key saved');
    return;
  }
  console.log('Opening browser for CLI auth...');
  const token = await loginViaBrowser();
  setApiKey(token);
  console.log('API key saved');
};

const envHandler = async (_argv: Record<string, unknown>) => {
  const token = await ensureApiKeyInteractive();
  const out = await writeEnvFile(process.cwd(), token);
  console.log(`Wrote ${out}`);
};

const logoutHandler = async (_argv: Record<string, unknown>) => {
  clearApiKey();
  console.log('Logged out');
};

export function registerAuthCommands(y: Argv) {
  // Grouped structure: cua auth <command> (register first to appear first in help)
  y.command(
    'auth',
    'Authenticate with CUA (login, logout, or export credentials)',
    (y) => {
      return y
        .command(
          'login',
          'Authenticate via browser or API key and save credentials locally',
          (y) =>
            y.option('api-key', {
              type: 'string',
              describe: 'API key to store directly',
            }),
          loginHandler
        )
        .command(
          'env',
          'Export your API key to a .env file in the current directory',
          () => {},
          envHandler
        )
        .command(
          'logout',
          'Clear stored API credentials from this machine',
          () => {},
          logoutHandler
        )
        .demandCommand(1, 'You must provide an auth command');
    },
    () => {}
  );

  // Flat structure (backwards compatible, hidden from help)
  y.command({
    command: 'login',
    describe: false as any, // Hide from help
    builder: (y: Argv) =>
      y.option('api-key', {
        type: 'string',
        describe: 'API key to store directly',
      }),
    handler: loginHandler,
  } as any)
    .command({
      command: 'env',
      describe: false as any, // Hide from help
      builder: (y: Argv) => y,
      handler: envHandler,
    } as any)
    .command({
      command: 'logout',
      describe: false as any, // Hide from help
      builder: (y: Argv) => y,
      handler: logoutHandler,
    } as any);

  return y;
}

```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/mcp-server/tools.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Tools
---

## Available Tools

The MCP server exposes the following tools to Claude:

### Core Task Execution Tools

1. **`run_cua_task`** - Run a single Computer-Use Agent task with the given instruction
   - `task` (string): The task description for the agent to execute
   - `session_id` (string, optional): Session ID for multi-client support. If not provided, a new session will be created
   - Returns: Tuple of (combined text output, final screenshot)

2. **`run_multi_cua_tasks`** - Run multiple tasks in sequence or concurrently
   - `tasks` (list of strings): List of task descriptions to execute
   - `session_id` (string, optional): Session ID for multi-client support. If not provided, a new session will be created
   - `concurrent` (boolean, optional): If true, run tasks concurrently. If false, run sequentially (default)
   - Returns: List of tuples (combined text output, screenshot) for each task

### Utility Tools

3. **`screenshot_cua`** - Take a screenshot of the current screen
   - `session_id` (string, optional): Session ID for multi-client support. If not provided, a new session will be created
   - Returns: Screenshot image

4. **`get_session_stats`** - Get statistics about active sessions and resource usage
   - Returns: Dictionary with session statistics including total sessions, active tasks, and session details

5. **`cleanup_session`** - Cleanup a specific session and release its resources
   - `session_id` (string): The session ID to cleanup
   - Returns: Confirmation message

## Session Management

The MCP server supports multi-client sessions with automatic resource management:

- **Session Isolation**: Each client can have its own session with isolated computer instances
- **Resource Pooling**: Computer instances are pooled for efficient resource usage
- **Automatic Cleanup**: Idle sessions are automatically cleaned up after 10 minutes
- **Concurrent Tasks**: Multiple tasks can run concurrently within the same session
- **Progress Reporting**: Real-time progress updates during task execution

## Usage Examples

### Basic Task Execution

```
"Open Chrome and navigate to github.com"
"Create a folder called 'Projects' on my desktop"
```

### Multi-Task Execution

```
"Run these tasks: 1) Open Finder, 2) Navigate to Documents, 3) Create a new folder called 'Work'"
```

### Session Management

```
"Take a screenshot of the current screen"
"Show me the session statistics"
"Cleanup session abc123"
```

```

--------------------------------------------------------------------------------
/libs/lume/src/FileSystem/VMLocation.swift:
--------------------------------------------------------------------------------

```swift
import Foundation

/// Represents a location where VMs can be stored
struct VMLocation: Codable, Equatable, Sendable {
    let name: String
    let path: String

    var expandedPath: String {
        (path as NSString).expandingTildeInPath
    }

    /// Validates the location path exists and is writable
    func validate() throws {
        let fullPath = expandedPath
        var isDir: ObjCBool = false

        if FileManager.default.fileExists(atPath: fullPath, isDirectory: &isDir) {
            if !isDir.boolValue {
                throw VMLocationError.notADirectory(path: fullPath)
            }

            if !FileManager.default.isWritableFile(atPath: fullPath) {
                throw VMLocationError.directoryNotWritable(path: fullPath)
            }
        } else {
            // Try to create the directory
            do {
                try FileManager.default.createDirectory(
                    atPath: fullPath,
                    withIntermediateDirectories: true
                )
            } catch {
                throw VMLocationError.directoryCreationFailed(path: fullPath, error: error)
            }
        }
    }
}

// MARK: - Errors

enum VMLocationError: Error, LocalizedError {
    case notADirectory(path: String)
    case directoryNotWritable(path: String)
    case directoryCreationFailed(path: String, error: Error)
    case locationNotFound(name: String)
    case duplicateLocationName(name: String)
    case invalidLocationName(name: String)
    case defaultLocationCannotBeRemoved(name: String)

    var errorDescription: String? {
        switch self {
        case .notADirectory(let path):
            return "Path is not a directory: \(path)"
        case .directoryNotWritable(let path):
            return "Directory is not writable: \(path)"
        case .directoryCreationFailed(let path, let error):
            return "Failed to create directory at \(path): \(error.localizedDescription)"
        case .locationNotFound(let name):
            return "VM location not found: \(name)"
        case .duplicateLocationName(let name):
            return "VM location with name '\(name)' already exists"
        case .invalidLocationName(let name):
            return
                "Invalid location name: \(name). Names should be alphanumeric with underscores or dashes."
        case .defaultLocationCannotBeRemoved(let name):
            return "Cannot remove the default location '\(name)'. Set a new default location first."
        }
    }
}

```

--------------------------------------------------------------------------------
/libs/xfce/src/xfce-config/xfce4-session.xml:
--------------------------------------------------------------------------------

```
<?xml version="1.0" encoding="UTF-8"?>
<channel name="xfce4-session" version="1.0">
  <property name="general" type="empty">
    <property name="FailsafeSessionName" type="string" value="Failsafe"/>
    <property name="SessionName" type="string" value="Default"/>
    <property name="SaveOnExit" type="bool" value="false"/>
  </property>
  <property name="sessions" type="empty">
    <property name="Failsafe" type="empty">
      <property name="IsFailsafe" type="bool" value="true"/>
      <property name="Count" type="int" value="5"/>
      <property name="Client0_Command" type="array">
        <value type="string" value="xfwm4"/>
      </property>
      <property name="Client0_Priority" type="int" value="15"/>
      <property name="Client0_PerScreen" type="bool" value="false"/>
      <property name="Client1_Command" type="array">
        <value type="string" value="xfce4-panel"/>
      </property>
      <property name="Client1_Priority" type="int" value="25"/>
      <property name="Client1_PerScreen" type="bool" value="false"/>
      <property name="Client2_Command" type="array">
        <value type="string" value="xfdesktop"/>
      </property>
      <property name="Client2_Priority" type="int" value="35"/>
      <property name="Client2_PerScreen" type="bool" value="false"/>
      <property name="Client3_Command" type="array">
        <value type="string" value="xfsettingsd"/>
      </property>
      <property name="Client3_Priority" type="int" value="10"/>
      <property name="Client3_PerScreen" type="bool" value="false"/>
      <property name="Client4_Command" type="array">
        <value type="string" value="xfce4-notifyd"/>
      </property>
      <property name="Client4_Priority" type="int" value="20"/>
      <property name="Client4_PerScreen" type="bool" value="false"/>
    </property>
  </property>
  <property name="splash" type="empty">
    <property name="Engine" type="string" value=""/>
  </property>
  <property name="compat" type="empty">
    <property name="LaunchGNOME" type="bool" value="false"/>
  </property>
  <property name="shutdown" type="empty">
    <property name="ShowSuspend" type="bool" value="false"/>
    <property name="ShowHibernate" type="bool" value="false"/>
    <property name="ShowHybridSleep" type="bool" value="false"/>
    <property name="ShowSwitchUser" type="bool" value="false"/>
  </property>
  <property name="screensaver" type="empty">
    <property name="enabled" type="bool" value="false"/>
    <property name="lock-enabled" type="bool" value="false"/>
  </property>
</channel>

```

--------------------------------------------------------------------------------
/libs/python/agent/tests/conftest.py:
--------------------------------------------------------------------------------

```python
"""Pytest configuration and shared fixtures for agent package tests.

This file contains shared fixtures and configuration for all agent tests.
Following SRP: This file ONLY handles test setup/teardown.
"""

from unittest.mock import AsyncMock, MagicMock, Mock, patch

import pytest


@pytest.fixture
def mock_litellm():
    """Mock liteLLM completion calls.

    Use this fixture to avoid making real LLM API calls during tests.
    Returns a mock that simulates LLM responses.
    """
    with patch("litellm.acompletion") as mock_completion:

        async def mock_response(*args, **kwargs):
            """Simulate a typical LLM response."""
            return {
                "id": "chatcmpl-test123",
                "object": "chat.completion",
                "created": 1234567890,
                "model": kwargs.get("model", "anthropic/claude-sonnet-4-5-20250929"),
                "choices": [
                    {
                        "index": 0,
                        "message": {
                            "role": "assistant",
                            "content": "This is a mocked response for testing.",
                        },
                        "finish_reason": "stop",
                    }
                ],
                "usage": {
                    "prompt_tokens": 10,
                    "completion_tokens": 20,
                    "total_tokens": 30,
                },
            }

        mock_completion.side_effect = mock_response
        yield mock_completion


@pytest.fixture
def mock_computer():
    """Mock Computer interface for agent tests.

    Use this fixture to test agent logic without requiring a real Computer instance.
    """
    computer = AsyncMock()
    computer.interface = AsyncMock()
    computer.interface.screenshot = AsyncMock(return_value=b"fake_screenshot_data")
    computer.interface.left_click = AsyncMock()
    computer.interface.type = AsyncMock()
    computer.interface.key = AsyncMock()

    # Mock context manager
    computer.__aenter__ = AsyncMock(return_value=computer)
    computer.__aexit__ = AsyncMock()

    return computer


@pytest.fixture
def disable_telemetry(monkeypatch):
    """Disable telemetry for tests.

    Use this fixture to ensure no telemetry is sent during tests.
    """
    monkeypatch.setenv("CUA_TELEMETRY_DISABLED", "1")


@pytest.fixture
def sample_messages():
    """Provide sample messages for testing.

    Returns a list of messages in the expected format.
    """
    return [{"role": "user", "content": "Take a screenshot and tell me what you see"}]

```

--------------------------------------------------------------------------------
/examples/pylume_examples.py:
--------------------------------------------------------------------------------

```python
import asyncio

from pylume import ImageRef, PyLume, SharedDirectory, VMConfig, VMRunOpts, VMUpdateOpts


async def main():
    """Example usage of PyLume."""
    async with PyLume(port=7777, use_existing_server=False, debug=True) as pylume:

        # Get latest IPSW URL
        print("\n=== Getting Latest IPSW URL ===")
        url = await pylume.get_latest_ipsw_url()
        print("Latest IPSW URL:", url)

        # Create a new VM
        print("\n=== Creating a new VM ===")
        vm_config = VMConfig(
            name="lume-vm-new",
            os="macOS",
            cpu=2,
            memory="4GB",
            disk_size="64GB",  # type: ignore
            display="1024x768",
            ipsw="latest",
        )
        await pylume.create_vm(vm_config)

        # Get latest IPSW URL
        print("\n=== Getting Latest IPSW URL ===")
        url = await pylume.get_latest_ipsw_url()
        print("Latest IPSW URL:", url)

        # List available images
        print("\n=== Listing Available Images ===")
        images = await pylume.get_images()
        print("Available Images:", images)

        # List all VMs to verify creation
        print("\n=== Listing All VMs ===")
        vms = await pylume.list_vms()
        print("VMs:", vms)

        # Get specific VM details
        print("\n=== Getting VM Details ===")
        vm = await pylume.get_vm("lume-vm")
        print("VM Details:", vm)

        # Update VM settings
        print("\n=== Updating VM Settings ===")
        update_opts = VMUpdateOpts(cpu=8, memory="4GB")
        await pylume.update_vm("lume-vm", update_opts)

        # Pull an image
        image_ref = ImageRef(
            image="macos-sequoia-vanilla", tag="latest", registry="ghcr.io", organization="trycua"
        )
        await pylume.pull_image(image_ref, name="lume-vm-pulled")

        # Run with shared directory
        run_opts = VMRunOpts(
            no_display=False,  # type: ignore
            shared_directories=[  # type: ignore
                SharedDirectory(host_path="~/shared", read_only=False)  # type: ignore
            ],
        )
        await pylume.run_vm("lume-vm", run_opts)

        # Or simpler:
        await pylume.run_vm("lume-vm")

        # Clone VM
        print("\n=== Cloning VM ===")
        await pylume.clone_vm("lume-vm", "lume-vm-cloned")

        # Stop VM
        print("\n=== Stopping VM ===")
        await pylume.stop_vm("lume-vm")

        # Delete VM
        print("\n=== Deleting VM ===")
        await pylume.delete_vm("lume-vm-cloned")


if __name__ == "__main__":
    asyncio.run(main())

```

--------------------------------------------------------------------------------
/.github/workflows/link-check.yml:
--------------------------------------------------------------------------------

```yaml
name: Link Checker

on:
  pull_request_target:
    branches: [main, master]
  push:
    branches:
      - main
  workflow_dispatch:

jobs:
  link-check:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4

      - name: Run Lychee link checker
        uses: lycheeverse/lychee-action@v2
        id: lychee
        with:
          # Check all markdown files
          args: --verbose --no-progress --max-cache-age 1d --accept 200..=299,403 --exclude '^file://' --exclude 'localhost' --exclude '127\.0\.0\.1' '**/*.md'
          # Output results to file for parsing
          output: lychee-output.md
          # Don't fail the build on broken links (warning mode)
          fail: false
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

      - name: Parse link check results
        id: parse-results
        if: always()
        run: |
          # Use lychee exit code: 0 = success, >0 = errors found
          EXIT_CODE="${{ steps.lychee.outputs.exit_code }}"

          echo "Exit code: $EXIT_CODE"

          # Show summary if output file exists
          if [ -f "lychee-output.md" ]; then
            echo "=== Link Check Summary ==="
            cat lychee-output.md
          fi

          # Set status based on exit code
          if [ "$EXIT_CODE" = "0" ]; then
            echo "STATUS_ICON=✅" >> $GITHUB_ENV
            echo "STATUS_TEXT=All links are working" >> $GITHUB_ENV
            echo "COLOR=#36a64f" >> $GITHUB_ENV
          elif [ "$EXIT_CODE" = "2" ]; then
            echo "STATUS_ICON=❌" >> $GITHUB_ENV
            echo "STATUS_TEXT=Link checker failed to run" >> $GITHUB_ENV
            echo "COLOR=#dc3545" >> $GITHUB_ENV
          else
            echo "STATUS_ICON=⚠️" >> $GITHUB_ENV
            echo "STATUS_TEXT=Found broken links" >> $GITHUB_ENV
            echo "COLOR=#ffa500" >> $GITHUB_ENV
          fi

      - name: Send results to Slack
        if: always() && github.ref == 'refs/heads/main'
        uses: rtCamp/action-slack-notify@v2
        env:
          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
          SLACK_CHANNEL: ${{ vars.SLACK_CHANNEL }}
          SLACK_TITLE: "🔗 Link Check Results"
          SLACK_COLOR: ${{ env.COLOR }}
          SLACK_MESSAGE: |
            *Status:* ${{ env.STATUS_ICON }} ${{ env.STATUS_TEXT }}

            *Branch:* `${{ github.ref_name }}`

            <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}${{ github.event.pull_request.number && format('?pr={0}', github.event.pull_request.number) || '' }}|View broken links>

```

--------------------------------------------------------------------------------
/tests/test_shell_bash.py:
--------------------------------------------------------------------------------

```python
"""
Shell Command Tests (Bash)
Tests for the run_command method of the Computer interface using bash commands.
Required environment variables:
- CUA_API_KEY: API key for Cua cloud provider
- CUA_CONTAINER_NAME: Name of the container to use
"""

import asyncio
import os
import sys
import traceback
from pathlib import Path

import pytest

# Load environment variables from .env file
project_root = Path(__file__).parent.parent
env_file = project_root / ".env"
print(f"Loading environment from: {env_file}")
from dotenv import load_dotenv

load_dotenv(env_file)

# Add paths to sys.path if needed
pythonpath = os.environ.get("PYTHONPATH", "")
for path in pythonpath.split(":"):
    if path and path not in sys.path:
        sys.path.insert(0, path)  # Insert at beginning to prioritize
        print(f"Added to sys.path: {path}")

from computer import Computer, VMProviderType


@pytest.fixture(scope="session")
async def computer():
    """Shared Computer instance for all test cases."""
    # Create a remote Linux computer with Cua
    computer = Computer(
        os_type="linux",
        api_key=os.getenv("CUA_API_KEY"),
        name=str(os.getenv("CUA_CONTAINER_NAME")),
        provider_type=VMProviderType.CLOUD,
    )

    try:
        await computer.run()
        yield computer
    finally:
        await computer.disconnect()


# Sample test cases
@pytest.mark.asyncio(loop_scope="session")
async def test_bash_echo_command(computer):
    """Test basic echo command with bash."""
    result = await computer.interface.run_command("echo 'Hello World'")

    assert result.stdout.strip() == "Hello World"
    assert result.stderr == ""
    assert result.returncode == 0


@pytest.mark.asyncio(loop_scope="session")
async def test_bash_ls_command(computer):
    """Test ls command to list directory contents."""
    result = await computer.interface.run_command("ls -la /tmp")

    assert result.returncode == 0
    assert result.stderr == ""
    assert "total" in result.stdout  # ls -la typically starts with "total"
    assert "." in result.stdout  # Current directory entry
    assert ".." in result.stdout  # Parent directory entry


@pytest.mark.asyncio(loop_scope="session")
async def test_bash_command_with_error(computer):
    """Test command that produces an error."""
    result = await computer.interface.run_command("ls /nonexistent_directory_12345")

    assert result.returncode != 0
    assert result.stdout == ""
    assert "No such file or directory" in result.stderr or "cannot access" in result.stderr


if __name__ == "__main__":
    # Run tests directly
    pytest.main([__file__, "-v"])

```

--------------------------------------------------------------------------------
/tests/shell_cmd.py:
--------------------------------------------------------------------------------

```python
"""
Shell Command Tests (CMD)
Tests for the run_command method of the Computer interface using cmd.exe commands.
Required environment variables:
- CUA_API_KEY: API key for Cua cloud provider
- CUA_CONTAINER_NAME: Name of the container to use
"""

import asyncio
import os
import sys
import traceback
from pathlib import Path

import pytest

# Load environment variables from .env file
project_root = Path(__file__).parent.parent
env_file = project_root / ".env"
print(f"Loading environment from: {env_file}")
from dotenv import load_dotenv

load_dotenv(env_file)

# Add paths to sys.path if needed
pythonpath = os.environ.get("PYTHONPATH", "")
for path in pythonpath.split(":"):
    if path and path not in sys.path:
        sys.path.insert(0, path)  # Insert at beginning to prioritize
        print(f"Added to sys.path: {path}")

from computer import Computer, VMProviderType


@pytest.fixture(scope="session")
async def computer():
    """Shared Computer instance for all test cases."""
    # Create a remote Windows computer with Cua
    computer = Computer(
        os_type="windows",
        api_key=os.getenv("CUA_API_KEY"),
        name=str(os.getenv("CUA_CONTAINER_NAME")),
        provider_type=VMProviderType.CLOUD,
    )

    try:
        await computer.run()
        yield computer
    finally:
        await computer.disconnect()


# Sample test cases
@pytest.mark.asyncio(loop_scope="session")
async def test_cmd_echo_command(computer):
    """Test basic echo command with cmd.exe."""
    result = await computer.interface.run_command("echo Hello World")

    assert result.stdout.strip() == "Hello World"
    assert result.stderr == ""
    assert result.returncode == 0


@pytest.mark.asyncio(loop_scope="session")
async def test_cmd_dir_command(computer):
    """Test dir command to list directory contents."""
    result = await computer.interface.run_command("dir C:\\")

    assert result.returncode == 0
    assert result.stderr == ""
    assert "Directory of C:\\" in result.stdout
    assert "bytes" in result.stdout.lower()  # dir typically shows file sizes


@pytest.mark.asyncio(loop_scope="session")
async def test_cmd_command_with_error(computer):
    """Test command that produces an error."""
    result = await computer.interface.run_command("dir C:\\nonexistent_directory_12345")

    assert result.returncode != 0
    assert result.stdout == ""
    assert (
        "File Not Found" in result.stderr
        or "cannot find the path" in result.stderr
        or "The system cannot find" in result.stderr
    )


if __name__ == "__main__":
    # Run tests directly
    pytest.main([__file__, "-v"])

```

--------------------------------------------------------------------------------
/.github/workflows/python-tests.yml:
--------------------------------------------------------------------------------

```yaml
name: Python Unit Tests

on:
  pull_request:
    paths:
      - "libs/python/**"
      - ".github/workflows/python-tests.yml"
  push:
    branches:
      - main
    paths:
      - "libs/python/**"
      - ".github/workflows/python-tests.yml"
  workflow_dispatch: # Allow manual trigger

jobs:
  test:
    name: Test ${{ matrix.package }}
    runs-on: ubuntu-latest

    strategy:
      fail-fast: false # Test all packages even if one fails
      matrix:
        package:
          - core
          - agent
          - computer
          - computer-server
          - mcp-server
          - pylume
          - som

    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.12"

      - name: Install uv
        run: |
          pip install uv

      - name: Install package and dependencies
        run: |
          cd libs/python/${{ matrix.package }}
          # Install the package in editable mode with dev dependencies
          if [ -f pyproject.toml ]; then
            uv pip install --system -e .
          fi
        shell: bash

      - name: Install test dependencies
        run: |
          # Install test dependencies from root pyproject.toml if tests directory exists
          # The root pyproject.toml has package=false, so we install just the dependency group
          if [ -d "libs/python/${{ matrix.package }}/tests" ]; then
            uv pip install --system --group test
          fi
        shell: bash

      - name: Run tests
        run: |
          cd libs/python/${{ matrix.package }}
          if [ -d tests ]; then
            python -m pytest tests/ -v --tb=short --cov --cov-report=term --cov-report=xml
          else
            echo "No tests directory found, skipping tests"
          fi
        shell: bash
        env:
          CUA_TELEMETRY_DISABLED: "1" # Disable telemetry during tests

      - name: Upload coverage to Codecov
        uses: codecov/codecov-action@v4
        if: always()
        with:
          file: ./libs/python/${{ matrix.package }}/coverage.xml
          flags: ${{ matrix.package }}
          name: codecov-${{ matrix.package }}
          fail_ci_if_error: false
        continue-on-error: true

  summary:
    name: Test Summary
    runs-on: ubuntu-latest
    needs: test
    if: always()

    steps:
      - name: Check test results
        run: |
          if [ "${{ needs.test.result }}" == "failure" ]; then
            echo "❌ Some tests failed. Please check the logs above."
            exit 1
          else
            echo "✅ All tests passed!"
          fi

```

--------------------------------------------------------------------------------
/libs/python/agent/agent/loops/base.py:
--------------------------------------------------------------------------------

```python
"""
Base protocol for async agent configurations
"""

from abc import abstractmethod
from typing import Any, Dict, List, Optional, Protocol, Tuple, Union

from ..types import AgentCapability


class AsyncAgentConfig(Protocol):
    """Protocol defining the interface for async agent configurations."""

    @abstractmethod
    async def predict_step(
        self,
        messages: List[Dict[str, Any]],
        model: str,
        tools: Optional[List[Dict[str, Any]]] = None,
        max_retries: Optional[int] = None,
        stream: bool = False,
        computer_handler=None,
        _on_api_start=None,
        _on_api_end=None,
        _on_usage=None,
        _on_screenshot=None,
        **generation_config,
    ) -> Dict[str, Any]:
        """
        Predict the next step based on input items.

        Args:
            messages: Input items following Responses format (message, function_call, computer_call)
            model: Model name to use
            tools: Optional list of tool schemas
            max_retries: Maximum number of retries for failed API calls
            stream: Whether to stream responses
            computer_handler: Computer handler instance
            _on_api_start: Callback for API start
            _on_api_end: Callback for API end
            _on_usage: Callback for usage tracking
            _on_screenshot: Callback for screenshot events
            **generation_config: Additional arguments to pass to the model provider
                - api_key: Optional API key for the provider
                - api_base: Optional API base URL for the provider

        Returns:
            Dictionary with "output" (output items) and "usage" array
        """
        ...

    @abstractmethod
    async def predict_click(
        self, model: str, image_b64: str, instruction: str, **generation_config
    ) -> Optional[Tuple[int, int]]:
        """
        Predict click coordinates based on image and instruction.

        Args:
            model: Model name to use
            image_b64: Base64 encoded image
            instruction: Instruction for where to click
            **generation_config: Additional arguments to pass to the model provider
                - api_key: Optional API key for the provider
                - api_base: Optional API base URL for the provider

        Returns:
            None or tuple with (x, y) coordinates
        """
        ...

    @abstractmethod
    def get_capabilities(self) -> List[AgentCapability]:
        """
        Get list of capabilities supported by this agent config.

        Returns:
            List of capability strings (e.g., ["step", "click"])
        """
        ...

```

--------------------------------------------------------------------------------
/examples/evals/hud_eval_examples.py:
--------------------------------------------------------------------------------

```python
"""
hud_eval_examples.py — minimal HUD evaluation runner

- Auto-discovers .env anywhere up the directory tree (via find_dotenv)
- Requires HUD_API_KEY in the resolved environment
- No Docker/local computer usage
"""

# imports
import asyncio
import logging
import os
import uuid
from pathlib import Path
from pprint import pprint

from agent import ComputerAgent
from agent.integrations.hud import run_full_dataset
from dotenv import find_dotenv, load_dotenv

"""
Loading env
"""


def load_env_or_fail() -> None:
    # Walk up from CWD / file dir to find nearest .env
    env_path = find_dotenv(usecwd=False)
    if not env_path:
        raise FileNotFoundError(
            "❌ .env not found. Place a .env at your repo root (or export HUD_API_KEY)."
        )
    load_dotenv(env_path, override=True)
    if not os.getenv("HUD_API_KEY"):
        raise EnvironmentError("❌ HUD_API_KEY is missing in the loaded environment")


"""
Build Agent Config
- customize agent behavior, tool integration, callbacks, resource management, and more
- https://cua.ai/docs/agent-sdk/agent-loops#parameters
- https://cua.ai/docs/agent-sdk/supported-model-providers
"""


def build_agent_config() -> dict:

    instruction = "You are a computer-using agent graded by deterministic checkers."

    return {
        "model": "openai/computer-use-preview",
        "trajectory_dir": str(Path("trajectories")),
        "only_n_most_recent_images": 3,
        "verbosity": logging.INFO,
        "instruction": instruction,
    }


"""
Hud Eval
"""


async def run_hud_eval() -> None:
    # load env and agent config
    load_env_or_fail()
    agent_config = build_agent_config()

    # Initialize to ensure config is valid (tools, verbosity, etc.)
    _ = ComputerAgent(**agent_config)

    job_name = (
        f"osworld-test-{str(uuid.uuid4())[:4]}"  # job name (each run of your task is a job on hud)
    )
    print(f"🚀 Running HUD eval: {job_name}")

    """
    Customize your hud eval below, check the doc for additional params
    - https://cua.ai/docs/agent-sdk/integrations/hud#parameters-1
    - recommend low max steps (5-10) for testing, then max 100 for benchmarking
    - also select specific tasks to run by using splitting the dataset
    """
    results = await run_full_dataset(
        dataset="ddupont/OSWorld-Tiny-Public",
        job_name=job_name,
        **agent_config,
        max_concurrent=20,
        max_steps=50,
        # split="train[0:1]"
    )

    print(f"\n📊 Job: {job_name}")
    print(f"Total results: {len(results)}")
    pprint(results[:3])


def main() -> None:
    logging.basicConfig(level=logging.INFO)
    asyncio.run(run_hud_eval())


if __name__ == "__main__":
    main()

```

--------------------------------------------------------------------------------
/libs/python/agent/agent/adapters/models/generic.py:
--------------------------------------------------------------------------------

```python
from typing import Any, Dict, List, Optional

# Hugging Face imports are local to avoid hard dependency at module import
try:
    import torch  # type: ignore
    from transformers import AutoModel, AutoProcessor  # type: ignore

    HF_AVAILABLE = True
except Exception:
    HF_AVAILABLE = False


class GenericHFModel:
    """Generic Hugging Face vision-language model handler.
    Loads an AutoModelForImageTextToText and AutoProcessor and generates text.
    """

    def __init__(
        self, model_name: str, device: str = "auto", trust_remote_code: bool = False
    ) -> None:
        if not HF_AVAILABLE:
            raise ImportError(
                'HuggingFace transformers dependencies not found. Install with: pip install "cua-agent[uitars-hf]"'
            )
        self.model_name = model_name
        self.device = device
        self.model = None
        self.processor = None
        self.trust_remote_code = trust_remote_code
        self._load()

    def _load(self) -> None:
        # Load model
        self.model = AutoModel.from_pretrained(
            self.model_name,
            torch_dtype=torch.float16,
            device_map=self.device,
            attn_implementation="sdpa",
            trust_remote_code=self.trust_remote_code,
        )
        # Load processor
        self.processor = AutoProcessor.from_pretrained(
            self.model_name,
            min_pixels=3136,
            max_pixels=4096 * 2160,
            device_map=self.device,
            trust_remote_code=self.trust_remote_code,
        )

    def generate(self, messages: List[Dict[str, Any]], max_new_tokens: int = 128) -> str:
        """Generate text for the given HF-format messages.
        messages: [{ role, content: [{type:'text'|'image', text|image}] }]
        """
        assert self.model is not None and self.processor is not None
        # Apply chat template and tokenize
        inputs = self.processor.apply_chat_template(
            messages,
            add_generation_prompt=True,
            tokenize=True,
            return_dict=True,
            return_tensors="pt",
        )
        # Move inputs to the same device as model
        inputs = inputs.to(self.model.device)
        # Generate
        with torch.no_grad():
            generated_ids = self.model.generate(**inputs, max_new_tokens=max_new_tokens)
        # Trim prompt tokens from output
        generated_ids_trimmed = [
            out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
        ]
        # Decode
        output_text = self.processor.batch_decode(
            generated_ids_trimmed,
            skip_special_tokens=True,
            clean_up_tokenization_spaces=False,
        )
        return output_text[0] if output_text else ""

```

--------------------------------------------------------------------------------
/docs/content/docs/computer-sdk/computer-ui.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Computer UI
---

<Callout type="warn" title="Deprecated">
  The Computer UI is deprecated and will be replaced with a revamped playground experience soon. We
  recommend using VNC or Screen Sharing for precise control of the computer instead.
</Callout>

The computer module includes a Gradio UI for creating and sharing demonstration data. We make it easy for people to build community datasets for better computer use models with an upload to Huggingface feature.

```bash
# Install with UI support
pip install "cua-computer[ui]"
```

<Callout title="Note">
  For precise control of the computer, we recommend using VNC or Screen Sharing instead of the
  Computer Gradio UI.
</Callout>

### Building and Sharing Demonstrations with Huggingface

Follow these steps to contribute your own demonstrations:

#### 1. Set up Huggingface Access

Set your HF_TOKEN in a .env file or in your environment variables:

```bash
# In .env file
HF_TOKEN=your_huggingface_token
```

#### 2. Launch the Computer UI

```python
# launch_ui.py
from computer.ui.gradio.app import create_gradio_ui
from dotenv import load_dotenv
load_dotenv('.env')

app = create_gradio_ui()
app.launch(share=False)
```

For examples, see [Computer UI Examples](https://github.com/trycua/cua/tree/main/examples/computer_ui_examples.py)

#### 3. Record Your Tasks

<details open>
  <summary>View demonstration video</summary>
  <video
    src="https://github.com/user-attachments/assets/de3c3477-62fe-413c-998d-4063e48de176"
    controls
    width="600"
  ></video>
</details>

Record yourself performing various computer tasks using the UI.

#### 4. Save Your Demonstrations

<details open>
  <summary>View demonstration video</summary>
  <video
    src="https://github.com/user-attachments/assets/5ad1df37-026a-457f-8b49-922ae805faef"
    controls
    width="600"
  ></video>
</details>

Save each task by picking a descriptive name and adding relevant tags (e.g., "office", "web-browsing", "coding").

#### 5. Record Additional Demonstrations

Repeat steps 3 and 4 until you have a good amount of demonstrations covering different tasks and scenarios.

#### 6. Upload to Huggingface

<details open>
  <summary>View demonstration video</summary>
  <video
    src="https://github.com/user-attachments/assets/c586d460-3877-4b5f-a736-3248886d2134"
    controls
    width="600"
  ></video>
</details>

Upload your dataset to Huggingface by:

- Naming it as `{your_username}/{dataset_name}`
- Choosing public or private visibility
- Optionally selecting specific tags to upload only tasks with certain tags

#### Examples and Resources

- Example Dataset: [ddupont/test-dataset](https://huggingface.co/datasets/ddupont/test-dataset)
- Find Community Datasets: 🔍 [Browse CUA Datasets on Huggingface](https://huggingface.co/datasets?other=cua)

```

--------------------------------------------------------------------------------
/libs/python/agent/agent/adapters/models/qwen2_5_vl.py:
--------------------------------------------------------------------------------

```python
from typing import Any, Dict, List, Optional

# Hugging Face imports are local to avoid hard dependency at module import
try:
    import torch  # type: ignore
    from transformers import AutoModelForImageTextToText, AutoProcessor  # type: ignore

    HF_AVAILABLE = True
except Exception:
    HF_AVAILABLE = False


class Qwen2_5_VLModel:
    """Qwen2.5-VL Hugging Face vision-language model handler.
    Loads an AutoModelForImageTextToText and AutoProcessor and generates text.
    """

    def __init__(
        self, model_name: str, device: str = "auto", trust_remote_code: bool = False
    ) -> None:
        if not HF_AVAILABLE:
            raise ImportError(
                'HuggingFace transformers dependencies not found. Install with: pip install "cua-agent[uitars-hf]"'
            )
        self.model_name = model_name
        self.device = device
        self.model = None
        self.processor = None
        self.trust_remote_code = trust_remote_code
        self._load()

    def _load(self) -> None:
        # Load model
        self.model = AutoModelForImageTextToText.from_pretrained(
            self.model_name,
            torch_dtype=torch.bfloat16,
            device_map=self.device,
            attn_implementation="sdpa",
            trust_remote_code=self.trust_remote_code,
        )
        # Load processor
        self.processor = AutoProcessor.from_pretrained(
            self.model_name,
            min_pixels=3136,
            max_pixels=4096 * 2160,
            device_map=self.device,
            trust_remote_code=self.trust_remote_code,
        )

    def generate(self, messages: List[Dict[str, Any]], max_new_tokens: int = 128) -> str:
        """Generate text for the given HF-format messages.
        messages: [{ role, content: [{type:'text'|'image', text|image}] }]
        """
        assert self.model is not None and self.processor is not None
        # Apply chat template and tokenize
        inputs = self.processor.apply_chat_template(
            messages,
            add_generation_prompt=True,
            tokenize=True,
            return_dict=True,
            return_tensors="pt",
        )
        # Move inputs to the same device as model
        inputs = inputs.to(self.model.device)
        # Generate
        with torch.no_grad():
            generated_ids = self.model.generate(**inputs, max_new_tokens=max_new_tokens)
        # Trim prompt tokens from output
        generated_ids_trimmed = [
            out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
        ]
        # Decode
        output_text = self.processor.batch_decode(
            generated_ids_trimmed,
            skip_special_tokens=True,
            clean_up_tokenization_spaces=False,
        )
        return output_text[0] if output_text else ""

```