#
tokens: 49015/50000 23/616 files (page 7/20)
lines: off (toggle) GitHub
raw markdown copy
This is page 7 of 20. Use http://codebase.md/trycua/cua?page={x} to view the full context.

# Directory Structure

```
├── .cursorignore
├── .dockerignore
├── .editorconfig
├── .gitattributes
├── .github
│   ├── FUNDING.yml
│   ├── scripts
│   │   ├── get_pyproject_version.py
│   │   └── tests
│   │       ├── __init__.py
│   │       ├── README.md
│   │       └── test_get_pyproject_version.py
│   └── workflows
│       ├── bump-version.yml
│       ├── ci-lume.yml
│       ├── docker-publish-cua-linux.yml
│       ├── docker-publish-cua-windows.yml
│       ├── docker-publish-kasm.yml
│       ├── docker-publish-xfce.yml
│       ├── docker-reusable-publish.yml
│       ├── link-check.yml
│       ├── lint.yml
│       ├── npm-publish-cli.yml
│       ├── npm-publish-computer.yml
│       ├── npm-publish-core.yml
│       ├── publish-lume.yml
│       ├── pypi-publish-agent.yml
│       ├── pypi-publish-computer-server.yml
│       ├── pypi-publish-computer.yml
│       ├── pypi-publish-core.yml
│       ├── pypi-publish-mcp-server.yml
│       ├── pypi-publish-som.yml
│       ├── pypi-reusable-publish.yml
│       ├── python-tests.yml
│       ├── test-cua-models.yml
│       └── test-validation-script.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .prettierignore
├── .prettierrc.yaml
├── .vscode
│   ├── docs.code-workspace
│   ├── extensions.json
│   ├── launch.json
│   ├── libs-ts.code-workspace
│   ├── lume.code-workspace
│   ├── lumier.code-workspace
│   ├── py.code-workspace
│   └── settings.json
├── blog
│   ├── app-use.md
│   ├── assets
│   │   ├── composite-agents.png
│   │   ├── docker-ubuntu-support.png
│   │   ├── hack-booth.png
│   │   ├── hack-closing-ceremony.jpg
│   │   ├── hack-cua-ollama-hud.jpeg
│   │   ├── hack-leaderboard.png
│   │   ├── hack-the-north.png
│   │   ├── hack-winners.jpeg
│   │   ├── hack-workshop.jpeg
│   │   ├── hud-agent-evals.png
│   │   └── trajectory-viewer.jpeg
│   ├── bringing-computer-use-to-the-web.md
│   ├── build-your-own-operator-on-macos-1.md
│   ├── build-your-own-operator-on-macos-2.md
│   ├── cloud-windows-ga-macos-preview.md
│   ├── composite-agents.md
│   ├── computer-use-agents-for-growth-hacking.md
│   ├── cua-hackathon.md
│   ├── cua-playground-preview.md
│   ├── cua-vlm-router.md
│   ├── hack-the-north.md
│   ├── hud-agent-evals.md
│   ├── human-in-the-loop.md
│   ├── introducing-cua-cli.md
│   ├── introducing-cua-cloud-containers.md
│   ├── lume-to-containerization.md
│   ├── neurips-2025-cua-papers.md
│   ├── sandboxed-python-execution.md
│   ├── training-computer-use-models-trajectories-1.md
│   ├── trajectory-viewer.md
│   ├── ubuntu-docker-support.md
│   └── windows-sandbox.md
├── CONTRIBUTING.md
├── Development.md
├── Dockerfile
├── docs
│   ├── .env.example
│   ├── .gitignore
│   ├── content
│   │   └── docs
│   │       ├── agent-sdk
│   │       │   ├── agent-loops.mdx
│   │       │   ├── benchmarks
│   │       │   │   ├── index.mdx
│   │       │   │   ├── interactive.mdx
│   │       │   │   ├── introduction.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── osworld-verified.mdx
│   │       │   │   ├── screenspot-pro.mdx
│   │       │   │   └── screenspot-v2.mdx
│   │       │   ├── callbacks
│   │       │   │   ├── agent-lifecycle.mdx
│   │       │   │   ├── cost-saving.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── logging.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── pii-anonymization.mdx
│   │       │   │   └── trajectories.mdx
│   │       │   ├── chat-history.mdx
│   │       │   ├── custom-tools.mdx
│   │       │   ├── customizing-computeragent.mdx
│   │       │   ├── integrations
│   │       │   │   ├── hud.mdx
│   │       │   │   ├── meta.json
│   │       │   │   └── observability.mdx
│   │       │   ├── mcp-server
│   │       │   │   ├── client-integrations.mdx
│   │       │   │   ├── configuration.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   ├── llm-integrations.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── tools.mdx
│   │       │   │   └── usage.mdx
│   │       │   ├── message-format.mdx
│   │       │   ├── meta.json
│   │       │   ├── migration-guide.mdx
│   │       │   ├── prompt-caching.mdx
│   │       │   ├── supported-agents
│   │       │   │   ├── composed-agents.mdx
│   │       │   │   ├── computer-use-agents.mdx
│   │       │   │   ├── grounding-models.mdx
│   │       │   │   ├── human-in-the-loop.mdx
│   │       │   │   └── meta.json
│   │       │   ├── supported-model-providers
│   │       │   │   ├── cua-vlm-router.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   └── local-models.mdx
│   │       │   ├── telemetry.mdx
│   │       │   └── usage-tracking.mdx
│   │       ├── cli-playbook
│   │       │   ├── commands.mdx
│   │       │   ├── index.mdx
│   │       │   └── meta.json
│   │       ├── computer-sdk
│   │       │   ├── cloud-vm-management.mdx
│   │       │   ├── commands.mdx
│   │       │   ├── computer-server
│   │       │   │   ├── Commands.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── REST-API.mdx
│   │       │   │   └── WebSocket-API.mdx
│   │       │   ├── computer-ui.mdx
│   │       │   ├── computers.mdx
│   │       │   ├── custom-computer-handlers.mdx
│   │       │   ├── meta.json
│   │       │   ├── sandboxed-python.mdx
│   │       │   └── tracing-api.mdx
│   │       ├── example-usecases
│   │       │   ├── form-filling.mdx
│   │       │   ├── gemini-complex-ui-navigation.mdx
│   │       │   ├── meta.json
│   │       │   ├── post-event-contact-export.mdx
│   │       │   └── windows-app-behind-vpn.mdx
│   │       ├── get-started
│   │       │   ├── meta.json
│   │       │   └── quickstart.mdx
│   │       ├── index.mdx
│   │       ├── macos-vm-cli-playbook
│   │       │   ├── lume
│   │       │   │   ├── cli-reference.mdx
│   │       │   │   ├── faq.md
│   │       │   │   ├── http-api.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   ├── meta.json
│   │       │   │   └── prebuilt-images.mdx
│   │       │   ├── lumier
│   │       │   │   ├── building-lumier.mdx
│   │       │   │   ├── docker-compose.mdx
│   │       │   │   ├── docker.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   └── meta.json
│   │       │   └── meta.json
│   │       └── meta.json
│   ├── next.config.mjs
│   ├── package-lock.json
│   ├── package.json
│   ├── pnpm-lock.yaml
│   ├── postcss.config.mjs
│   ├── public
│   │   └── img
│   │       ├── agent_gradio_ui.png
│   │       ├── agent.png
│   │       ├── bg-dark.jpg
│   │       ├── bg-light.jpg
│   │       ├── cli.png
│   │       ├── computer.png
│   │       ├── grounding-with-gemini3.gif
│   │       ├── hero.png
│   │       ├── laminar_trace_example.png
│   │       ├── som_box_threshold.png
│   │       └── som_iou_threshold.png
│   ├── README.md
│   ├── source.config.ts
│   ├── src
│   │   ├── app
│   │   │   ├── (home)
│   │   │   │   ├── [[...slug]]
│   │   │   │   │   └── page.tsx
│   │   │   │   └── layout.tsx
│   │   │   ├── api
│   │   │   │   ├── posthog
│   │   │   │   │   └── [...path]
│   │   │   │   │       └── route.ts
│   │   │   │   └── search
│   │   │   │       └── route.ts
│   │   │   ├── favicon.ico
│   │   │   ├── global.css
│   │   │   ├── layout.config.tsx
│   │   │   ├── layout.tsx
│   │   │   ├── llms.mdx
│   │   │   │   └── [[...slug]]
│   │   │   │       └── route.ts
│   │   │   ├── llms.txt
│   │   │   │   └── route.ts
│   │   │   ├── robots.ts
│   │   │   └── sitemap.ts
│   │   ├── assets
│   │   │   ├── discord-black.svg
│   │   │   ├── discord-white.svg
│   │   │   ├── logo-black.svg
│   │   │   └── logo-white.svg
│   │   ├── components
│   │   │   ├── analytics-tracker.tsx
│   │   │   ├── cookie-consent.tsx
│   │   │   ├── doc-actions-menu.tsx
│   │   │   ├── editable-code-block.tsx
│   │   │   ├── footer.tsx
│   │   │   ├── hero.tsx
│   │   │   ├── iou.tsx
│   │   │   ├── mermaid.tsx
│   │   │   └── page-feedback.tsx
│   │   ├── lib
│   │   │   ├── llms.ts
│   │   │   └── source.ts
│   │   ├── mdx-components.tsx
│   │   └── providers
│   │       └── posthog-provider.tsx
│   └── tsconfig.json
├── examples
│   ├── agent_examples.py
│   ├── agent_ui_examples.py
│   ├── browser_tool_example.py
│   ├── cloud_api_examples.py
│   ├── computer_examples_windows.py
│   ├── computer_examples.py
│   ├── computer_ui_examples.py
│   ├── computer-example-ts
│   │   ├── .env.example
│   │   ├── .gitignore
│   │   ├── package-lock.json
│   │   ├── package.json
│   │   ├── pnpm-lock.yaml
│   │   ├── README.md
│   │   ├── src
│   │   │   ├── helpers.ts
│   │   │   └── index.ts
│   │   └── tsconfig.json
│   ├── docker_examples.py
│   ├── evals
│   │   ├── hud_eval_examples.py
│   │   └── wikipedia_most_linked.txt
│   ├── pylume_examples.py
│   ├── sandboxed_functions_examples.py
│   ├── som_examples.py
│   ├── tracing_examples.py
│   ├── utils.py
│   └── winsandbox_example.py
├── img
│   ├── agent_gradio_ui.png
│   ├── agent.png
│   ├── cli.png
│   ├── computer.png
│   ├── logo_black.png
│   └── logo_white.png
├── libs
│   ├── kasm
│   │   ├── Dockerfile
│   │   ├── LICENSE
│   │   ├── README.md
│   │   └── src
│   │       └── ubuntu
│   │           └── install
│   │               └── firefox
│   │                   ├── custom_startup.sh
│   │                   ├── firefox.desktop
│   │                   └── install_firefox.sh
│   ├── lume
│   │   ├── .cursorignore
│   │   ├── CONTRIBUTING.md
│   │   ├── Development.md
│   │   ├── img
│   │   │   └── cli.png
│   │   ├── Package.resolved
│   │   ├── Package.swift
│   │   ├── README.md
│   │   ├── resources
│   │   │   └── lume.entitlements
│   │   ├── scripts
│   │   │   ├── build
│   │   │   │   ├── build-debug.sh
│   │   │   │   ├── build-release-notarized.sh
│   │   │   │   └── build-release.sh
│   │   │   └── install.sh
│   │   ├── src
│   │   │   ├── Commands
│   │   │   │   ├── Clone.swift
│   │   │   │   ├── Config.swift
│   │   │   │   ├── Create.swift
│   │   │   │   ├── Delete.swift
│   │   │   │   ├── Get.swift
│   │   │   │   ├── Images.swift
│   │   │   │   ├── IPSW.swift
│   │   │   │   ├── List.swift
│   │   │   │   ├── Logs.swift
│   │   │   │   ├── Options
│   │   │   │   │   └── FormatOption.swift
│   │   │   │   ├── Prune.swift
│   │   │   │   ├── Pull.swift
│   │   │   │   ├── Push.swift
│   │   │   │   ├── Run.swift
│   │   │   │   ├── Serve.swift
│   │   │   │   ├── Set.swift
│   │   │   │   └── Stop.swift
│   │   │   ├── ContainerRegistry
│   │   │   │   ├── ImageContainerRegistry.swift
│   │   │   │   ├── ImageList.swift
│   │   │   │   └── ImagesPrinter.swift
│   │   │   ├── Errors
│   │   │   │   └── Errors.swift
│   │   │   ├── FileSystem
│   │   │   │   ├── Home.swift
│   │   │   │   ├── Settings.swift
│   │   │   │   ├── VMConfig.swift
│   │   │   │   ├── VMDirectory.swift
│   │   │   │   └── VMLocation.swift
│   │   │   ├── LumeController.swift
│   │   │   ├── Main.swift
│   │   │   ├── Server
│   │   │   │   ├── Handlers.swift
│   │   │   │   ├── HTTP.swift
│   │   │   │   ├── Requests.swift
│   │   │   │   ├── Responses.swift
│   │   │   │   └── Server.swift
│   │   │   ├── Utils
│   │   │   │   ├── CommandRegistry.swift
│   │   │   │   ├── CommandUtils.swift
│   │   │   │   ├── Logger.swift
│   │   │   │   ├── NetworkUtils.swift
│   │   │   │   ├── Path.swift
│   │   │   │   ├── ProcessRunner.swift
│   │   │   │   ├── ProgressLogger.swift
│   │   │   │   ├── String.swift
│   │   │   │   └── Utils.swift
│   │   │   ├── Virtualization
│   │   │   │   ├── DarwinImageLoader.swift
│   │   │   │   ├── DHCPLeaseParser.swift
│   │   │   │   ├── ImageLoaderFactory.swift
│   │   │   │   └── VMVirtualizationService.swift
│   │   │   ├── VM
│   │   │   │   ├── DarwinVM.swift
│   │   │   │   ├── LinuxVM.swift
│   │   │   │   ├── VM.swift
│   │   │   │   ├── VMDetails.swift
│   │   │   │   ├── VMDetailsPrinter.swift
│   │   │   │   ├── VMDisplayResolution.swift
│   │   │   │   └── VMFactory.swift
│   │   │   └── VNC
│   │   │       ├── PassphraseGenerator.swift
│   │   │       └── VNCService.swift
│   │   └── tests
│   │       ├── Mocks
│   │       │   ├── MockVM.swift
│   │       │   ├── MockVMVirtualizationService.swift
│   │       │   └── MockVNCService.swift
│   │       ├── VM
│   │       │   └── VMDetailsPrinterTests.swift
│   │       ├── VMTests.swift
│   │       ├── VMVirtualizationServiceTests.swift
│   │       └── VNCServiceTests.swift
│   ├── lumier
│   │   ├── .dockerignore
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   └── src
│   │       ├── bin
│   │       │   └── entry.sh
│   │       ├── config
│   │       │   └── constants.sh
│   │       ├── hooks
│   │       │   └── on-logon.sh
│   │       └── lib
│   │           ├── utils.sh
│   │           └── vm.sh
│   ├── python
│   │   ├── agent
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── agent
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── adapters
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── cua_adapter.py
│   │   │   │   │   ├── huggingfacelocal_adapter.py
│   │   │   │   │   ├── human_adapter.py
│   │   │   │   │   ├── mlxvlm_adapter.py
│   │   │   │   │   └── models
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── generic.py
│   │   │   │   │       ├── internvl.py
│   │   │   │   │       ├── opencua.py
│   │   │   │   │       └── qwen2_5_vl.py
│   │   │   │   ├── agent.py
│   │   │   │   ├── callbacks
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── budget_manager.py
│   │   │   │   │   ├── image_retention.py
│   │   │   │   │   ├── logging.py
│   │   │   │   │   ├── operator_validator.py
│   │   │   │   │   ├── pii_anonymization.py
│   │   │   │   │   ├── prompt_instructions.py
│   │   │   │   │   ├── telemetry.py
│   │   │   │   │   └── trajectory_saver.py
│   │   │   │   ├── cli.py
│   │   │   │   ├── computers
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── cua.py
│   │   │   │   │   └── custom.py
│   │   │   │   ├── decorators.py
│   │   │   │   ├── human_tool
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── __main__.py
│   │   │   │   │   ├── server.py
│   │   │   │   │   └── ui.py
│   │   │   │   ├── integrations
│   │   │   │   │   └── hud
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── agent.py
│   │   │   │   │       └── proxy.py
│   │   │   │   ├── loops
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── anthropic.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── composed_grounded.py
│   │   │   │   │   ├── gelato.py
│   │   │   │   │   ├── gemini.py
│   │   │   │   │   ├── generic_vlm.py
│   │   │   │   │   ├── glm45v.py
│   │   │   │   │   ├── gta1.py
│   │   │   │   │   ├── holo.py
│   │   │   │   │   ├── internvl.py
│   │   │   │   │   ├── model_types.csv
│   │   │   │   │   ├── moondream3.py
│   │   │   │   │   ├── omniparser.py
│   │   │   │   │   ├── openai.py
│   │   │   │   │   ├── opencua.py
│   │   │   │   │   ├── uiins.py
│   │   │   │   │   ├── uitars.py
│   │   │   │   │   └── uitars2.py
│   │   │   │   ├── proxy
│   │   │   │   │   ├── examples.py
│   │   │   │   │   └── handlers.py
│   │   │   │   ├── responses.py
│   │   │   │   ├── tools
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── browser_tool.py
│   │   │   │   ├── types.py
│   │   │   │   └── ui
│   │   │   │       ├── __init__.py
│   │   │   │       ├── __main__.py
│   │   │   │       └── gradio
│   │   │   │           ├── __init__.py
│   │   │   │           ├── app.py
│   │   │   │           └── ui_components.py
│   │   │   ├── benchmarks
│   │   │   │   ├── .gitignore
│   │   │   │   ├── contrib.md
│   │   │   │   ├── interactive.py
│   │   │   │   ├── models
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   └── gta1.py
│   │   │   │   ├── README.md
│   │   │   │   ├── ss-pro.py
│   │   │   │   ├── ss-v2.py
│   │   │   │   └── utils.py
│   │   │   ├── example.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_computer_agent.py
│   │   ├── bench-ui
│   │   │   ├── bench_ui
│   │   │   │   ├── __init__.py
│   │   │   │   ├── api.py
│   │   │   │   └── child.py
│   │   │   ├── examples
│   │   │   │   ├── folder_example.py
│   │   │   │   ├── gui
│   │   │   │   │   ├── index.html
│   │   │   │   │   ├── logo.svg
│   │   │   │   │   └── styles.css
│   │   │   │   ├── output_overlay.png
│   │   │   │   └── simple_example.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── tests
│   │   │       └── test_port_detection.py
│   │   ├── computer
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── computer
│   │   │   │   ├── __init__.py
│   │   │   │   ├── computer.py
│   │   │   │   ├── diorama_computer.py
│   │   │   │   ├── helpers.py
│   │   │   │   ├── interface
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── generic.py
│   │   │   │   │   ├── linux.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   ├── models.py
│   │   │   │   │   └── windows.py
│   │   │   │   ├── logger.py
│   │   │   │   ├── models.py
│   │   │   │   ├── providers
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── cloud
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── docker
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── lume
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── lume_api.py
│   │   │   │   │   ├── lumier
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── types.py
│   │   │   │   │   └── winsandbox
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── provider.py
│   │   │   │   │       └── setup_script.ps1
│   │   │   │   ├── tracing_wrapper.py
│   │   │   │   ├── tracing.py
│   │   │   │   ├── ui
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── __main__.py
│   │   │   │   │   └── gradio
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       └── app.py
│   │   │   │   └── utils.py
│   │   │   ├── poetry.toml
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_computer.py
│   │   ├── computer-server
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── computer_server
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── browser.py
│   │   │   │   ├── cli.py
│   │   │   │   ├── diorama
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── diorama_computer.py
│   │   │   │   │   ├── diorama.py
│   │   │   │   │   ├── draw.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   └── safezone.py
│   │   │   │   ├── handlers
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── generic.py
│   │   │   │   │   ├── linux.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   └── windows.py
│   │   │   │   ├── main.py
│   │   │   │   ├── server.py
│   │   │   │   ├── utils
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── wallpaper.py
│   │   │   │   └── watchdog.py
│   │   │   ├── examples
│   │   │   │   ├── __init__.py
│   │   │   │   └── usage_example.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   ├── run_server.py
│   │   │   ├── test_connection.py
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_server.py
│   │   ├── core
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── core
│   │   │   │   ├── __init__.py
│   │   │   │   └── telemetry
│   │   │   │       ├── __init__.py
│   │   │   │       └── posthog.py
│   │   │   ├── poetry.toml
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_telemetry.py
│   │   ├── mcp-server
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── build-extension.py
│   │   │   ├── CONCURRENT_SESSIONS.md
│   │   │   ├── desktop-extension
│   │   │   │   ├── cua-extension.mcpb
│   │   │   │   ├── desktop_extension.png
│   │   │   │   ├── manifest.json
│   │   │   │   ├── README.md
│   │   │   │   ├── requirements.txt
│   │   │   │   ├── run_server.sh
│   │   │   │   └── setup.py
│   │   │   ├── mcp_server
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── server.py
│   │   │   │   └── session_manager.py
│   │   │   ├── pdm.lock
│   │   │   ├── pyproject.toml
│   │   │   ├── QUICK_TEST_COMMANDS.sh
│   │   │   ├── quick_test_local_option.py
│   │   │   ├── README.md
│   │   │   ├── scripts
│   │   │   │   ├── install_mcp_server.sh
│   │   │   │   └── start_mcp_server.sh
│   │   │   ├── test_mcp_server_local_option.py
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_mcp_server.py
│   │   ├── pylume
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_pylume.py
│   │   └── som
│   │       ├── .bumpversion.cfg
│   │       ├── LICENSE
│   │       ├── poetry.toml
│   │       ├── pyproject.toml
│   │       ├── README.md
│   │       ├── som
│   │       │   ├── __init__.py
│   │       │   ├── detect.py
│   │       │   ├── detection.py
│   │       │   ├── models.py
│   │       │   ├── ocr.py
│   │       │   ├── util
│   │       │   │   └── utils.py
│   │       │   └── visualization.py
│   │       └── tests
│   │           ├── conftest.py
│   │           └── test_omniparser.py
│   ├── qemu-docker
│   │   ├── linux
│   │   │   ├── Dockerfile
│   │   │   ├── README.md
│   │   │   └── src
│   │   │       ├── entry.sh
│   │   │       └── vm
│   │   │           ├── image
│   │   │           │   └── README.md
│   │   │           └── setup
│   │   │               ├── install.sh
│   │   │               ├── setup-cua-server.sh
│   │   │               └── setup.sh
│   │   ├── README.md
│   │   └── windows
│   │       ├── Dockerfile
│   │       ├── README.md
│   │       └── src
│   │           ├── entry.sh
│   │           └── vm
│   │               ├── image
│   │               │   └── README.md
│   │               └── setup
│   │                   ├── install.bat
│   │                   ├── on-logon.ps1
│   │                   ├── setup-cua-server.ps1
│   │                   ├── setup-utils.psm1
│   │                   └── setup.ps1
│   ├── typescript
│   │   ├── .gitignore
│   │   ├── .nvmrc
│   │   ├── agent
│   │   │   ├── examples
│   │   │   │   ├── playground-example.html
│   │   │   │   └── README.md
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── client.ts
│   │   │   │   ├── index.ts
│   │   │   │   └── types.ts
│   │   │   ├── tests
│   │   │   │   └── client.test.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── computer
│   │   │   ├── .editorconfig
│   │   │   ├── .gitattributes
│   │   │   ├── .gitignore
│   │   │   ├── LICENSE
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── computer
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── providers
│   │   │   │   │   │   ├── base.ts
│   │   │   │   │   │   ├── cloud.ts
│   │   │   │   │   │   └── index.ts
│   │   │   │   │   └── types.ts
│   │   │   │   ├── index.ts
│   │   │   │   ├── interface
│   │   │   │   │   ├── base.ts
│   │   │   │   │   ├── factory.ts
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── linux.ts
│   │   │   │   │   ├── macos.ts
│   │   │   │   │   └── windows.ts
│   │   │   │   └── types.ts
│   │   │   ├── tests
│   │   │   │   ├── computer
│   │   │   │   │   └── cloud.test.ts
│   │   │   │   ├── interface
│   │   │   │   │   ├── factory.test.ts
│   │   │   │   │   ├── index.test.ts
│   │   │   │   │   ├── linux.test.ts
│   │   │   │   │   ├── macos.test.ts
│   │   │   │   │   └── windows.test.ts
│   │   │   │   └── setup.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── core
│   │   │   ├── .editorconfig
│   │   │   ├── .gitattributes
│   │   │   ├── .gitignore
│   │   │   ├── LICENSE
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── index.ts
│   │   │   │   └── telemetry
│   │   │   │       ├── clients
│   │   │   │       │   ├── index.ts
│   │   │   │       │   └── posthog.ts
│   │   │   │       └── index.ts
│   │   │   ├── tests
│   │   │   │   └── telemetry.test.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── cua-cli
│   │   │   ├── .gitignore
│   │   │   ├── .prettierrc
│   │   │   ├── bun.lock
│   │   │   ├── CLAUDE.md
│   │   │   ├── index.ts
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── auth.ts
│   │   │   │   ├── cli.ts
│   │   │   │   ├── commands
│   │   │   │   │   ├── auth.ts
│   │   │   │   │   └── sandbox.ts
│   │   │   │   ├── config.ts
│   │   │   │   ├── http.ts
│   │   │   │   ├── storage.ts
│   │   │   │   └── util.ts
│   │   │   └── tsconfig.json
│   │   ├── package.json
│   │   ├── pnpm-lock.yaml
│   │   ├── pnpm-workspace.yaml
│   │   └── README.md
│   └── xfce
│       ├── .dockerignore
│       ├── .gitignore
│       ├── Development.md
│       ├── Dockerfile
│       ├── Dockerfile.dev
│       ├── README.md
│       └── src
│           ├── scripts
│           │   ├── resize-display.sh
│           │   ├── start-computer-server.sh
│           │   ├── start-novnc.sh
│           │   ├── start-vnc.sh
│           │   └── xstartup.sh
│           ├── supervisor
│           │   └── supervisord.conf
│           └── xfce-config
│               ├── helpers.rc
│               ├── xfce4-power-manager.xml
│               └── xfce4-session.xml
├── LICENSE.md
├── Makefile
├── notebooks
│   ├── agent_nb.ipynb
│   ├── blog
│   │   ├── build-your-own-operator-on-macos-1.ipynb
│   │   └── build-your-own-operator-on-macos-2.ipynb
│   ├── composite_agents_docker_nb.ipynb
│   ├── computer_nb.ipynb
│   ├── computer_server_nb.ipynb
│   ├── customizing_computeragent.ipynb
│   ├── eval_osworld.ipynb
│   ├── ollama_nb.ipynb
│   ├── README.md
│   ├── sota_hackathon_cloud.ipynb
│   └── sota_hackathon.ipynb
├── package-lock.json
├── package.json
├── pnpm-lock.yaml
├── pyproject.toml
├── pyrightconfig.json
├── README.md
├── scripts
│   ├── install-cli.ps1
│   ├── install-cli.sh
│   ├── playground-docker.sh
│   ├── playground.sh
│   ├── run-docker-dev.sh
│   └── typescript-typecheck.js
├── TESTING.md
├── tests
│   ├── agent_loop_testing
│   │   ├── agent_test.py
│   │   └── README.md
│   ├── pytest.ini
│   ├── shell_cmd.py
│   ├── test_files.py
│   ├── test_mcp_server_session_management.py
│   ├── test_mcp_server_streaming.py
│   ├── test_shell_bash.py
│   ├── test_telemetry.py
│   ├── test_tracing.py
│   ├── test_venv.py
│   └── test_watchdog.py
└── uv.lock
```

# Files

--------------------------------------------------------------------------------
/blog/cua-vlm-router.md:
--------------------------------------------------------------------------------

```markdown
# Cua VLM Router: One Provider for All Your Computer-Use Models

If you've been building computer-use agents, you know the reality: every model provider has its own specification and deployment process. Anthropic has one API format, OpenAI another, Google something else entirely. Want to try a Hugging Face model? That's a completely different setup. Self-hosting? Even more complexity. Each provider requires learning their specific API, managing their credentials, and adapting your code to their particular requirements.

Today we're launching the **Cua VLM Router**: a managed inference API that gives you unified access to multiple vision-language model providers through a single API key. We're starting with Anthropic's Claude models (Sonnet 4.5 and Haiku 4.5)—some of the most loved and widely-used computer-use models in the Cua ecosystem - with more providers coming soon.

![Cua VLM Router Banner](https://github.com/user-attachments/assets/1b978f62-2cae-4cf7-932a-55ac8c8f2e06)

## What You Get

The Cua VLM Router handles the infrastructure so you can focus on building:

**Single API Key**

- One key for all model providers (no juggling multiple credentials)
- Works for both model inference and sandbox access
- Manage everything from one dashboard at cua.ai

**Smart Routing**

- Automatic provider selection for optimal availability and performance
- For Anthropic models, we route to the best provider (Anthropic, AWS Bedrock, or Microsoft Foundry)
- No configuration needed—just specify the model and we handle the rest

**Cost Tracking & Optimization**

- Unified usage dashboard across all models
- Real-time credit balance tracking
- Detailed cost breakdown per request (gateway cost + upstream cost)

**Production-Ready**

- OpenAI-compatible API (drop-in replacement for existing code)
- Full streaming support with Server-Sent Events
- Metadata about routing decisions in every response

## Available Models (Launch)

We're starting with Anthropic's latest Claude models:

| Model                             | Best For                           |
| --------------------------------- | ---------------------------------- |
| `cua/anthropic/claude-sonnet-4.5` | General-purpose tasks, recommended |
| `cua/anthropic/claude-haiku-4.5`  | Fast responses, cost-effective     |

## How It Works

When you request an Anthropic model through Cua, we automatically route to the best available provider—whether that's Anthropic directly, AWS Bedrock, or Microsoft Foundry. You just specify `cua/anthropic/claude-sonnet-4.5`, and we handle the provider selection, failover, and optimization behind the scenes. No need to manage multiple accounts or implement fallback logic yourself.

## Getting Started

Sign up at [cua.ai/signin](https://cua.ai/signin) and create your API key from **Dashboard > API Keys > New API Key** (save it immediately—you won't see it again).

Use it with the Agent SDK (make sure to set your environment variable):

```python
import asyncio
from agent import ComputerAgent
from computer import Computer

async def main():
  # Initialize cloud computer
  computer = Computer(
    os_type="linux",
    provider_type="cloud",
    name="your-container-name",
    api_key="your-cua-api-key"
  )

  # Initialize agent with Claude Sonnet 4.5
  agent = ComputerAgent(
    tools=[computer],
    model="cua/anthropic/claude-sonnet-4.5",
    api_key="your-cua-api-key",
    instructions="You are a helpful assistant that can control computers",
    only_n_most_recent_images=3
  )

  # Run a task
  async for result in agent.run("Open a browser and search for Python tutorials"):
    print(result)

if __name__ == "__main__":
  asyncio.run(main())
```

## Migration is Simple

Already using Anthropic directly? Just add the `cua/` prefix:

**Before:**

```python
export ANTHROPIC_API_KEY="sk-ant-..."
agent = ComputerAgent(model="anthropic/claude-sonnet-4-5-20250929")
```

**After:**

```python
export CUA_API_KEY="sk_cua-api01_..."
agent = ComputerAgent(model="cua/anthropic/claude-sonnet-4.5")
```

Same code structure. No other changes needed.

## Direct API Access

The router exposes an OpenAI-compatible API at `https://inference.cua.ai/v1`:

```bash
curl -X POST https://inference.cua.ai/v1/chat/completions \
  -H "Authorization: Bearer ${CUA_API_KEY}" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "anthropic/claude-sonnet-4.5",
    "messages": [{"role": "user", "content": "Hello!"}],
    "stream": true
  }'
```

Works with any OpenAI-compatible client library.

## FAQs

<details>
<summary><strong>Do I still need provider API keys?</strong></summary>

No. Cua manages all provider API keys and infrastructure. You only need one Cua API key for everything—model inference and sandbox access.

</details>

<details>
<summary><strong>How does pricing work?</strong></summary>

Requests are billed in credits, deducted from your Cua account balance. Every response includes both the Cua gateway cost and the actual upstream API cost for transparency.

</details>

<details>
<summary><strong>Can I still use my own Anthropic key (BYOK)?</strong></summary>

Yes. The agent SDK still supports direct provider access. Just use `anthropic/claude-sonnet-4-5-20250929` instead of the `cua/` prefix and set your `ANTHROPIC_API_KEY`. See [Supported Model Providers](https://cua.ai/docs/agent-sdk/supported-model-providers/) for details.

</details>

<details>
<summary><strong>What about other providers?</strong></summary>

We're starting with Anthropic and adding more providers based on what people actually use. Request access to specific models in [Discord](https://discord.gg/cua-ai).

</details>

<details>
<summary><strong>Does streaming work?</strong></summary>

Yes. Set `"stream": true` in your request to receive Server-Sent Events. Works identically to OpenAI's streaming API.

</details>

## What's Next

This is just the beginning. We're actively iterating based on feedback:

- Additional model providers
- Custom model routing rules
- Usage alerts and budget controls
- Team collaboration features

If there's a model or feature you need, let us know in [Discord](https://discord.gg/cua-ai).

## Need Help?

- **Documentation**: [cua.ai/docs/agent-sdk/supported-model-providers/cua-vlm-router](https://cua.ai/docs/agent-sdk/supported-model-providers/cua-vlm-router)
- **Quickstart Guide**: [cua.ai/docs/get-started/quickstart](https://cua.ai/docs/get-started/quickstart)
- **Discord Community**: [discord.gg/cua-ai](https://discord.gg/cua-ai)

---

Get started at [cua.ai](https://cua.ai) or check out the [VLM Router docs](https://cua.ai/docs/agent-sdk/supported-model-providers/cua-vlm-router).

```

--------------------------------------------------------------------------------
/tests/test_venv.py:
--------------------------------------------------------------------------------

```python
"""
Virtual Environment Testing Module
This module tests the ability to execute python code in a virtual environment within Cua Containers.

Required environment variables:
- CUA_API_KEY: API key for Cua cloud provider
- CUA_CONTAINER_NAME: Name of the container to use
"""

import asyncio
import os
import sys
import traceback
from pathlib import Path

import pytest

# Load environment variables from .env file
project_root = Path(__file__).parent.parent
env_file = project_root / ".env"
print(f"Loading environment from: {env_file}")
from dotenv import load_dotenv

load_dotenv(env_file)

# Add paths to sys.path if needed
pythonpath = os.environ.get("PYTHONPATH", "")
for path in pythonpath.split(":"):
    if path and path not in sys.path:
        sys.path.insert(0, path)  # Insert at beginning to prioritize
        print(f"Added to sys.path: {path}")

from computer import Computer, VMProviderType
from computer.helpers import sandboxed, set_default_computer


@pytest.fixture(scope="session")
async def computer():
    """Shared Computer instance for all test cases."""
    # Create a remote Linux computer with Cua
    computer = Computer(
        os_type="linux",
        api_key=os.getenv("CUA_API_KEY"),
        name=str(os.getenv("CUA_CONTAINER_NAME")),
        provider_type=VMProviderType.CLOUD,
    )

    # # Create a local macOS computer with Cua
    # computer = Computer()

    try:
        await computer.run()
        yield computer
    finally:
        await computer.disconnect()


# Sample test cases
@pytest.mark.asyncio(loop_scope="session")
async def test_venv_install(computer):
    """Test virtual environment creation and package installation."""
    # Create a test virtual environment and install requests
    stdout, _ = await computer.venv_install("test_env", ["requests"])

    # Check that installation was successful (no major errors)
    assert "Successfully installed" in stdout or "Requirement already satisfied" in stdout


@pytest.mark.asyncio(loop_scope="session")
async def test_venv_cmd(computer):
    """Test executing shell commands in virtual environment."""
    # Test Python version check
    stdout, _ = await computer.venv_cmd("test_env", "python --version")

    assert "Python" in stdout


@pytest.mark.asyncio(loop_scope="session")
async def test_venv_exec(computer):
    """Test executing Python functions in virtual environment."""

    def test_function(message="Hello World"):
        import sys

        return f"Python {sys.version_info.major}.{sys.version_info.minor}: {message}"

    result = await computer.venv_exec("test_env", test_function, message="Test successful!")

    assert "Python" in result
    assert "Test successful!" in result


@pytest.mark.asyncio(loop_scope="session")
async def test_venv_exec_with_package(computer):
    """Test executing Python functions that use installed packages."""

    def test_requests():
        import requests

        return f"requests version: {requests.__version__}"

    result = await computer.venv_exec("test_env", test_requests)

    assert "requests version:" in result


@pytest.mark.asyncio(loop_scope="session")
async def test_venv_exec_error_handling(computer):
    """Test error handling in venv_exec."""

    def test_error():
        raise ValueError("This is a test error")

    with pytest.raises(ValueError, match="This is a test error"):
        await computer.venv_exec("test_env", test_error)


@pytest.mark.asyncio(loop_scope="session")
async def test_venv_exec_with_args_kwargs(computer):
    """Test executing Python functions with args and kwargs that return an object."""

    def create_data_object(name, age, *hobbies, **metadata):
        return {
            "name": name,
            "age": age,
            "hobbies": list(hobbies),
            "metadata": metadata,
            "status": "active",
        }

    args = ["Alice", 25, "reading", "coding"]
    kwargs = {"location": "New York", "department": "Engineering"}

    result = await computer.venv_exec("test_env", create_data_object, *args, **kwargs)

    assert result["name"] == "Alice"
    assert result["age"] == 25
    assert result["hobbies"] == ["reading", "coding"]
    assert result["metadata"]["location"] == "New York"
    assert result["status"] == "active"


@pytest.mark.asyncio(loop_scope="session")
async def test_venv_exec_stdout_capture(computer, capfd):
    """Test capturing stdout from Python functions executed in virtual environment."""

    def hello_world_function():
        print("Hello World!")
        return "Function completed"

    # Execute the function in the virtual environment
    result = await computer.venv_exec("test_env", hello_world_function)

    # Capture stdout and stderr
    out, _ = capfd.readouterr()

    # Assert the stdout contains our expected output
    assert out == "Hello World!\n\n"
    assert result == "Function completed"


@pytest.mark.asyncio(loop_scope="session")
async def test_remote_decorator(computer):
    """Test the remote decorator from computer.helpers module."""
    # Set the computer as default for the remote decorator
    set_default_computer(computer)

    # Define a function with the remote decorator
    @sandboxed("test_env")
    def get_package_version():
        import platform
        import sys

        return {"python_version": sys.version, "platform": platform.platform(), "success": True}

    # Call the decorated function
    result = await get_package_version()

    # Verify the function executed in the virtual environment
    assert "python_version" in result
    assert "platform" in result
    assert result["success"] == True


@pytest.mark.asyncio(loop_scope="session")
async def test_remote_decorator_with_custom_computer(computer):
    """Test the remote decorator with explicitly specified computer instance."""

    # Define a function with the remote decorator that explicitly specifies the computer
    @sandboxed("test_env", computer=computer)
    def get_system_info():
        import os
        import sys

        return {
            "python_version": sys.version,
            "environment_vars": dict(os.environ),
            "working_directory": os.getcwd(),
        }

    # Call the decorated function
    result = await get_system_info()

    # Verify the function executed in the virtual environment
    assert "python_version" in result
    assert "environment_vars" in result
    assert "working_directory" in result
    # The virtual environment should have a different working directory
    # than the current test process
    assert result["working_directory"] != os.getcwd()


if __name__ == "__main__":
    # Run tests directly
    pytest.main([__file__, "-v"])

```

--------------------------------------------------------------------------------
/libs/python/mcp-server/quick_test_local_option.py:
--------------------------------------------------------------------------------

```python
#!/usr/bin/env python3
"""
Quick test to verify the local desktop option logic without full setup.

This script tests the environment variable parsing and logic flow
without requiring VMs, computer-server, or MCP clients to be running.
"""

import os
import sys


def test_env_var_parsing():
    """Test that environment variable is parsed correctly."""
    print("Testing CUA_USE_HOST_COMPUTER_SERVER environment variable parsing...")
    print("-" * 60)

    test_cases = [
        # (env_value, expected_result, description)
        ("true", True, "lowercase 'true'"),
        ("True", True, "capitalized 'True'"),
        ("TRUE", True, "uppercase 'TRUE'"),
        ("1", True, "numeric '1'"),
        ("yes", True, "lowercase 'yes'"),
        ("Yes", True, "capitalized 'Yes'"),
        ("false", False, "lowercase 'false'"),
        ("False", False, "capitalized 'False'"),
        ("FALSE", False, "uppercase 'FALSE'"),
        ("0", False, "numeric '0'"),
        ("no", False, "lowercase 'no'"),
        ("", False, "empty string"),
        ("random", False, "random value"),
        (None, False, "not set (None)"),
    ]

    passed = 0
    failed = 0

    for env_value, expected, description in test_cases:
        # Simulate the logic from session_manager.py line 59
        if env_value is None:
            actual = os.getenv("CUA_USE_HOST_COMPUTER_SERVER", "false").lower() in (
                "true",
                "1",
                "yes",
            )
        else:
            os.environ["CUA_USE_HOST_COMPUTER_SERVER"] = env_value
            actual = os.getenv("CUA_USE_HOST_COMPUTER_SERVER", "false").lower() in (
                "true",
                "1",
                "yes",
            )

        status = "✓ PASS" if actual == expected else "✗ FAIL"
        if actual == expected:
            passed += 1
        else:
            failed += 1

        print(
            f"{status} | Value: {env_value!r:15} | Expected: {expected!s:5} | Got: {actual!s:5} | {description}"
        )

    # Clean up
    os.environ.pop("CUA_USE_HOST_COMPUTER_SERVER", None)

    print("-" * 60)
    print(f"Results: {passed} passed, {failed} failed")
    return failed == 0


def test_session_manager_logic():
    """Test the logic flow in session_manager.py without actual Computer creation."""
    print("\nTesting session_manager.py logic flow...")
    print("-" * 60)

    # Read the actual session_manager.py to verify the logic
    import pathlib

    session_manager_path = (
        pathlib.Path(__file__).parent.parent
        / "libs"
        / "python"
        / "mcp-server"
        / "mcp_server"
        / "session_manager.py"
    )

    if not session_manager_path.exists():
        print(f"✗ FAIL | session_manager.py not found at {session_manager_path}")
        return False

    content = session_manager_path.read_text()

    # Check for the key logic
    checks = [
        ('os.getenv("CUA_USE_HOST_COMPUTER_SERVER"', "Environment variable check present"),
        ("use_host_computer_server=use_host", "use_host_computer_server parameter passed"),
        ("Computer(", "Computer instantiation present"),
    ]

    all_checks_passed = True
    for check_str, description in checks:
        if check_str in content:
            print(f"✓ PASS | {description}")
        else:
            print(f"✗ FAIL | {description} - not found")
            all_checks_passed = False

    print("-" * 60)
    return all_checks_passed


def test_documentation_consistency():
    """Verify documentation mentions the new feature."""
    print("\nTesting documentation consistency...")
    print("-" * 60)

    import pathlib

    docs_to_check = [
        ("configuration.mdx", "CUA_USE_HOST_COMPUTER_SERVER"),
        ("usage.mdx", "Targeting Your Local Desktop"),
    ]

    docs_path = (
        pathlib.Path(__file__).parent.parent
        / "docs"
        / "content"
        / "docs"
        / "libraries"
        / "mcp-server"
    )

    all_docs_ok = True
    for doc_file, expected_content in docs_to_check:
        doc_path = docs_path / doc_file
        if not doc_path.exists():
            print(f"✗ FAIL | {doc_file} not found")
            all_docs_ok = False
            continue

        content = doc_path.read_text()
        if expected_content in content:
            print(f"✓ PASS | {doc_file} contains '{expected_content}'")
        else:
            print(f"✗ FAIL | {doc_file} missing '{expected_content}'")
            all_docs_ok = False

    print("-" * 60)
    return all_docs_ok


def print_usage_examples():
    """Print usage examples for both modes."""
    print("\n" + "=" * 60)
    print("USAGE EXAMPLES")
    print("=" * 60)

    print("\n1. DEFAULT MODE (VM):")
    print("-" * 60)
    print(
        """
{
  "mcpServers": {
    "cua-agent": {
      "command": "/bin/bash",
      "args": ["~/.cua/start_mcp_server.sh"],
      "env": {
        "CUA_MODEL_NAME": "anthropic/claude-sonnet-4-5-20250929"
      }
    }
  }
}

Note: CUA_USE_HOST_COMPUTER_SERVER is not set, so VM mode is used (safe).
"""
    )

    print("\n2. LOCAL DESKTOP MODE:")
    print("-" * 60)
    print(
        """
Step 1: Start computer-server locally:
    python -m computer_server

Step 2: Configure MCP client:
{
  "mcpServers": {
    "cua-agent": {
      "command": "/bin/bash",
      "args": ["~/.cua/start_mcp_server.sh"],
      "env": {
        "CUA_MODEL_NAME": "anthropic/claude-sonnet-4-5-20250929",
        "CUA_USE_HOST_COMPUTER_SERVER": "true"
      }
    }
  }
}

⚠️  WARNING: AI will have direct access to your desktop!
"""
    )


def main():
    """Run all quick tests."""
    print("=" * 60)
    print("QUICK TEST: MCP Server Local Desktop Option")
    print("=" * 60)
    print()

    results = []

    # Run tests
    results.append(("Environment Variable Parsing", test_env_var_parsing()))
    results.append(("Session Manager Logic", test_session_manager_logic()))
    results.append(("Documentation Consistency", test_documentation_consistency()))

    # Print summary
    print("\n" + "=" * 60)
    print("SUMMARY")
    print("=" * 60)
    for test_name, passed in results:
        status = "✓ PASSED" if passed else "✗ FAILED"
        print(f"{status} | {test_name}")

    all_passed = all(result for _, result in results)

    if all_passed:
        print("\n🎉 All quick tests passed!")
        print_usage_examples()
        print("\nNext steps:")
        print("1. Run full automated tests: pytest tests/test_mcp_server_local_option.py")
        print("2. Follow manual testing guide: tests/MANUAL_TEST_LOCAL_OPTION.md")
        return 0
    else:
        print("\n❌ Some tests failed. Please review the output above.")
        return 1


if __name__ == "__main__":
    sys.exit(main())

```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/agent-loops.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Agent Loops
description: Supported computer-using agent loops and models
---

<Callout>
  A corresponding <a href="https://github.com/trycua/cua/blob/main/notebooks/agent_nb.ipynb" target="_blank">Jupyter Notebook</a> is available for this documentation.
</Callout>

An agent can be thought of as a loop - it generates actions, executes them, and repeats until done:

1. **Generate**: Your `model` generates `output_text`, `computer_call`, `function_call`
2. **Execute**: The `computer` safely executes those items
3. **Complete**: If the model has no more calls, it's done!

To run an agent loop simply do:

```python
from agent import ComputerAgent
import asyncio
from computer import Computer


async def take_screenshot():
    async with Computer(
        os_type="linux",
        provider_type="cloud",
        name="your-sandbox-name",
        api_key="your-api-key"
    ) as computer:

        agent = ComputerAgent(
            model="anthropic/claude-sonnet-4-5-20250929",
            tools=[computer],
            max_trajectory_budget=5.0
        )

        messages = [{"role": "user", "content": "Take a screenshot and tell me what you see"}]

        async for result in agent.run(messages):
            for item in result["output"]:
                if item["type"] == "message":
                    print(item["content"][0]["text"])


if __name__ == "__main__":
    asyncio.run(take_screenshot())
```

For a list of supported models and configurations, see the [Supported Agents](./supported-agents/computer-use-agents) page.

### Response Format

```python
{
    "output": [
        {
            "type": "message",
            "role": "assistant",
            "content": [{"type": "output_text", "text": "I can see..."}]
        },
        {
            "type": "computer_call",
            "action": {"type": "screenshot"},
            "call_id": "call_123"
        },
        {
            "type": "computer_call_output",
            "call_id": "call_123",
            "output": {"image_url": "data:image/png;base64,..."}
        }
    ],
    "usage": {
        "prompt_tokens": 150,
        "completion_tokens": 75,
        "total_tokens": 225,
        "response_cost": 0.01,
    }
}
```

### Environment Variables

Use the following environment variables to configure the agent and its access to cloud computers and LLM providers:

```bash
# Computer instance (cloud)
export CUA_SANDBOX_NAME="your-sandbox-name"
export CUA_API_KEY="your-cua-api-key"

# LLM API keys
export ANTHROPIC_API_KEY="your-anthropic-key"
export OPENAI_API_KEY="your-openai-key"
```

### Input and output

The input prompt passed to `Agent.run` can either be a string or a list of message dictionaries:

```python
messages = [
    {
        "role": "user",
        "content": "Take a screenshot and describe what you see"
    },
    {
        "role": "assistant",
        "content": "I'll take a screenshot for you."
    }
]
```

The output is an AsyncGenerator that yields response chunks.

### Parameters

The `ComputerAgent` constructor provides a wide range of options for customizing agent behavior, tool integration, callbacks, resource management, and more.

- `model` (`str`): Default: **required**
  The LLM or agent model to use. Determines which agent loop is selected unless `custom_loop` is provided. (e.g., "claude-sonnet-4-5-20250929", "computer-use-preview", "omni+vertex_ai/gemini-pro")
- `tools` (`List[Any]`):
  List of tools the agent can use (e.g., `Computer`, sandboxed Python functions, etc.).
- `custom_loop` (`Callable`):
  Optional custom agent loop function. If provided, overrides automatic loop selection.
- `only_n_most_recent_images` (`int`):
  If set, only the N most recent images are kept in the message history. Useful for limiting memory usage. Automatically adds `ImageRetentionCallback`.
- `callbacks` (`List[Any]`):
  List of callback instances for advanced preprocessing, postprocessing, logging, or custom hooks. See [Callbacks & Extensibility](#callbacks--extensibility).
- `verbosity` (`int`):
  Logging level (e.g., `logging.INFO`). If set, adds a logging callback.
- `trajectory_dir` (`str`):
  Directory path to save full trajectory data, including screenshots and responses. Adds `TrajectorySaverCallback`.
- `max_retries` (`int`): Default: `3`
  Maximum number of retries for failed API calls (default: 3).
- `screenshot_delay` (`float` | `int`): Default: `0.5`
  Delay (in seconds) before taking screenshots (default: 0.5).
- `use_prompt_caching` (`bool`): Default: `False`
  Enables prompt caching for repeated prompts (mainly for Anthropic models).
- `max_trajectory_budget` (`float` | `dict`):
  If set (float or dict), adds a budget manager callback that tracks usage costs and stops execution if the budget is exceeded. Dict allows advanced options (e.g., `{ "max_budget": 5.0, "raise_error": True }`).
- `instructions` (`str` | `list[str]`):
  System instructions for the agent. Can be a single string or multiple strings in a tuple/list for readability; they are concatenated into one system prompt.
- `api_key` (`str`):
  Optional API key override for the model provider.
- `api_base` (`str`):
  Optional API base URL override for the model provider.
- `**additional_generation_kwargs` (`any`):
  Any additional keyword arguments are passed through to the agent loop or model provider.

**Example with advanced options:**

```python
from agent import ComputerAgent
from computer import Computer
from agent.callbacks import ImageRetentionCallback

agent = ComputerAgent(
    model="anthropic/claude-sonnet-4-5-20250929",
    tools=[Computer(...)],
    only_n_most_recent_images=3,
    callbacks=[ImageRetentionCallback(only_n_most_recent_images=3)],
    verbosity=logging.INFO,
    trajectory_dir="trajectories",
    max_retries=5,
    screenshot_delay=1.0,
    use_prompt_caching=True,
    max_trajectory_budget={"max_budget": 5.0, "raise_error": True},
    instructions=(
        "You are a helpful computer-using agent"
        "Output computer calls until you complete the given task"
    ),
    api_key="your-api-key",
    api_base="https://your-api-base.com/v1",
)
```

### Streaming Responses

```python
async for result in agent.run(messages, stream=True):
    # Process streaming chunks
    for item in result["output"]:
        if item["type"] == "message":
            print(item["content"][0]["text"], end="", flush=True)
        elif item["type"] == "computer_call":
            action = item["action"]
            print(f"\n[Action: {action['type']}]")
```

### Error Handling

```python
try:
    async for result in agent.run(messages):
        # Process results
        pass
except BudgetExceededException:
    print("Budget limit exceeded")
except Exception as e:
    print(f"Agent error: {e}")
```

```

--------------------------------------------------------------------------------
/scripts/install-cli.sh:
--------------------------------------------------------------------------------

```bash
#!/bin/bash
set -e

# CUA CLI Installation Script for macOS/Linux
echo "🚀 Installing CUA CLI..."

# Function to print success message
print_success() {
    local bin_path="$1"
    local version="$2"
    local config_file="$3"
    
    printf "\033[32m✅  CUA CLI %s was installed successfully to %s\033[0m\n" "$version" "$bin_path"
    printf "\033[90mAdded \"%s\" to \$PATH in \"%s\"\033[0m\n" "$bin_path" "$config_file"
    printf "\n\033[90mTo get started, run:\033[0m\n"
    printf "  source %s\n" "$config_file"
    printf "  cua --help\n"
    printf "\033[90m📚 For more help, visit: https://docs.cua.ai/libraries/cua-cli\033[0m\n"
}

# Function to install with bun as fallback
install_with_bun() {
    echo "📦 Installing CUA CLI using Bun..."
    
    # Check if bun is already installed
    if ! command -v bun &> /dev/null; then
        echo "📦 Installing Bun..."
        curl -fsSL https://bun.sh/install | bash
        
        # Source the shell profile to make bun available
        if [ -f "$HOME/.bashrc" ]; then
            source "$HOME/.bashrc"
        elif [ -f "$HOME/.zshrc" ]; then
            source "$HOME/.zshrc"
        fi
        
        # Add bun to PATH for this session
        export PATH="$HOME/.bun/bin:$PATH"
    fi

    # Verify bun installation
    if ! command -v bun &> /dev/null; then
        echo "❌ Failed to install Bun. Please install manually from https://bun.sh"
        exit 1
    fi

    echo "📦 Installing CUA CLI..."
    if ! bun add -g @trycua/cli; then
        echo "❌ Failed to install with Bun, trying npm..."
        if ! npm install -g @trycua/cli; then
            echo "❌ Installation failed. Please try installing manually:"
            echo "   npm install -g @trycua/cli"
            exit 1
        fi
    fi

    # Verify installation
    if command -v cua &> /dev/null; then
        # Determine which config file was updated
        local config_file="$HOME/.bashrc"
        if [ -f "$HOME/.zshrc" ]; then
            config_file="$HOME/.zshrc"
        elif [ -f "$HOME/.profile" ]; then
            config_file="$HOME/.profile"
        fi
        # Determine installed version via npm registry (fallback to unknown)
        local VERSION_BUN
        VERSION_BUN=$(npm view @trycua/cli version 2>/dev/null || echo "unknown")
        # Write version file to ~/.cua/bin/.version
        local INSTALL_DIR="$HOME/.cua/bin"
        mkdir -p "$INSTALL_DIR"
        echo "$VERSION_BUN" > "$INSTALL_DIR/.version"
        # Print success and exit
        print_success "$(command -v cua)" "$VERSION_BUN" "$config_file"
        exit 0
    else
        echo "❌ Installation failed. Please try installing manually:"
        echo "   npm install -g @trycua/cli"
        exit 1
    fi
}

# Determine OS and architecture
OS=$(uname -s | tr '[:upper:]' '[:lower:]')
ARCH=$(uname -m)

# Map architecture to the format used in release assets
case "$ARCH" in
    x86_64) ARCH="x64" ;;
    aarch64) ARCH="arm64" ;;
    arm64) ARCH="arm64" ;;
    *) ARCH="$ARCH" ;;
esac

# Determine the binary name
BINARY_NAME="cua-${OS}-${ARCH}"
if [ "$OS" = "darwin" ] && [ "$ARCH" = "arm64" ]; then
    BINARY_NAME="cua-darwin-arm64"
elif [ "$OS" = "darwin" ] && [ "$ARCH" = "x64" ]; then
    BINARY_NAME="cua-darwin-x64"
elif [ "$OS" = "linux" ] && [ "$ARCH" = "x64" ]; then
    BINARY_NAME="cua-linux-x64"
else
    echo "⚠️  Pre-built binary not available for ${OS}-${ARCH}, falling back to Bun installation"
    install_with_bun
    exit 0
fi

# Get the latest release version
LATEST_RELEASE=$(curl -s https://api.github.com/repos/trycua/cua/releases/latest)
if [ -z "$LATEST_RELEASE" ]; then
    echo "⚠️  Could not fetch latest release, falling back to Bun installation"
    install_with_bun
    exit 0
fi

# Extract version number (remove 'cua-v' prefix)
TAG_NAME=$(echo "$LATEST_RELEASE" | grep 'tag_name' | cut -d '"' -f 4)
VERSION=${TAG_NAME#cua-v}

# Find the binary URL in the release assets
BINARY_URL=$(echo "$LATEST_RELEASE" | grep -o 'https://.*/download/[^"]*/'${BINARY_NAME}'"' | head -1)
BINARY_URL="${BINARY_URL%\"}"
printf "\033[90mBINARY_URL: %s\033[0m\n" "$BINARY_URL"

if [ -z "$BINARY_URL" ]; then
    echo "⚠️  Could not find ${BINARY_NAME} in release assets, falling back to Bun installation"
    install_with_bun
    exit 0
fi

# Create ~/.cua/bin directory if it doesn't exist
INSTALL_DIR="$HOME/.cua/bin"
mkdir -p "$INSTALL_DIR"

# Download the binary
echo "📥 Downloading CUA CLI $VERSION for ${OS}-${ARCH}..."
echo "📍 Downloading from: $BINARY_URL"

# Download with progress bar and proper error handling
if ! curl -L --progress-bar --fail "$BINARY_URL" -o "$INSTALL_DIR/cua"; then
    echo "❌ Failed to download pre-built binary from $BINARY_URL"
    echo "⚠️  Falling back to Bun installation"
    install_with_bun
    exit 0
fi

# Verify the downloaded file exists and has content
if [ ! -f "$INSTALL_DIR/cua" ] || [ ! -s "$INSTALL_DIR/cua" ]; then
    echo "❌ Downloaded file is missing or empty"
    echo "⚠️  Falling back to Bun installation"
    rm -f "$INSTALL_DIR/cua"
    install_with_bun
    exit 0
fi

# Check if the downloaded file looks like a binary (not HTML error page)
if file "$INSTALL_DIR/cua" | grep -q "HTML\|text"; then
    echo "❌ Downloaded file appears to be corrupted (HTML/text instead of binary)"
    echo "⚠️  Falling back to Bun installation"
    rm -f "$INSTALL_DIR/cua"
    install_with_bun
    exit 0
fi

# Make the binary executable
chmod +x "$INSTALL_DIR/cua"

# Write version file
echo "$VERSION" > "$INSTALL_DIR/.version"

# Add ~/.cua/bin to PATH if not already in PATH
if [[ ":$PATH:" != *":$INSTALL_DIR:"* ]]; then
    # Add to .bashrc, .zshrc, or .profile
    if [ -f "$HOME/.bashrc" ]; then
        echo "export PATH=\"$INSTALL_DIR:\$PATH\"" >> "$HOME/.bashrc"
        echo "Added $INSTALL_DIR to PATH in ~/.bashrc"
    fi
    
    if [ -f "$HOME/.zshrc" ]; then
        echo "export PATH=\"$INSTALL_DIR:\$PATH\"" >> "$HOME/.zshrc"
        echo "Added $INSTALL_DIR to PATH in ~/.zshrc"
    fi
    
    if [ -f "$HOME/.profile" ] && [ ! -f "$HOME/.bashrc" ] && [ ! -f "$HOME/.zshrc" ]; then
        echo "export PATH=\"$INSTALL_DIR:\$PATH\"" >> "$HOME/.profile"
        echo "Added $INSTALL_DIR to PATH in ~/.profile"
    fi
    
    # Add to current session
    export PATH="$INSTALL_DIR:$PATH"
fi

# Verify installation
if command -v cua &> /dev/null; then
    # Determine which config file was updated
    config_file="$HOME/.bashrc"
    if [ -f "$HOME/.zshrc" ]; then
        config_file="$HOME/.zshrc"
    elif [ -f "$HOME/.profile" ]; then
        config_file="$HOME/.profile"
    fi
    
    print_success "$(which cua)" "$VERSION" "$config_file"
    exit 0
else
    echo "❌ Installation failed. Please try installing manually:"
    echo "   curl -fsSL https://cua.ai/install.sh | sh"
    exit 1
fi

```

--------------------------------------------------------------------------------
/libs/qemu-docker/windows/src/vm/setup/setup-cua-server.ps1:
--------------------------------------------------------------------------------

```
# Setup CUA Computer Server on Windows 11
# Creates a scheduled task to run computer server in background

Set-StrictMode -Version Latest
$ErrorActionPreference = 'Continue'

# Import shared utilities
$scriptFolder = "C:\OEM"
Import-Module (Join-Path $scriptFolder -ChildPath "setup-utils.psm1")

# --- Logging ---
$LogDir = "C:\Windows\Temp"
if (!(Test-Path $LogDir)) { New-Item -ItemType Directory -Force -Path $LogDir | Out-Null }
$RunId = (Get-Date -Format 'yyyyMMdd_HHmmss') + "_" + $PID
$script:LogFile = Join-Path $LogDir ("setup_cua_server_" + $RunId + ".log")

Write-Log -LogFile $script:LogFile -Message "=== Installing CUA Computer Server ==="

# Ensure Chocolatey and Python 3.12 are present
try {
  $ChocoExe = Resolve-ChocoPath
  if ($ChocoExe) {
    Write-Log -LogFile $script:LogFile -Message "Installing Python 3.12 via Chocolatey"
    try {
      & $ChocoExe install -y python312 | Out-Null
    } catch {
      Write-Log -LogFile $script:LogFile -Message "Python 3.12 install warning: $($_.Exception.Message)"
    }
  } else {
    Write-Log -LogFile $script:LogFile -Message "Chocolatey not available; skipping python312 install"
  }
} catch {
  Write-Log -LogFile $script:LogFile -Message "Chocolatey bootstrap warning: $($_.Exception.Message)"
}

# Create venv
$HomeDir = $env:USERPROFILE
$CuaDir  = Join-Path $HomeDir '.cua-server'
$VenvDir = Join-Path $CuaDir 'venv'
New-Item -ItemType Directory -Force -Path $CuaDir | Out-Null

Write-Log -LogFile $script:LogFile -Message "Creating Python virtual environment at $VenvDir"
$ExistingVenvPython = Join-Path $VenvDir 'Scripts\python.exe'
if (Test-Path -LiteralPath $ExistingVenvPython) {
  Write-Log -LogFile $script:LogFile -Message "Existing venv detected; skipping creation"
} else {
  try {
    & py -m venv $VenvDir
    Write-Log -LogFile $script:LogFile -Message "Virtual environment created successfully"
  } catch {
    Write-Log -LogFile $script:LogFile -Message "venv creation error: $($_.Exception.Message)"
    throw
  }
}

$PyExe  = Join-Path $VenvDir 'Scripts\python.exe'
$PipExe = Join-Path $VenvDir 'Scripts\pip.exe'
$ActivateScript = Join-Path $VenvDir 'Scripts\Activate.ps1'

Write-Log -LogFile $script:LogFile -Message "Activating virtual environment"
& $ActivateScript

Write-Log -LogFile $script:LogFile -Message "Upgrading pip, setuptools, and wheel"
try {
  & $PipExe install --upgrade pip setuptools wheel 2>&1 | Tee-Object -FilePath $script:LogFile -Append | Out-Null
} catch {
  Write-Log -LogFile $script:LogFile -Message "pip bootstrap warning: $($_.Exception.Message)"
}

Write-Log -LogFile $script:LogFile -Message "Installing cua-computer-server"
try {
  & $PipExe install --upgrade cua-computer-server 2>&1 | Tee-Object -FilePath $script:LogFile -Append | Out-Null
  Write-Log -LogFile $script:LogFile -Message "cua-computer-server installed successfully"
} catch {
  Write-Log -LogFile $script:LogFile -Message "Server install error: $($_.Exception.Message)"
  throw
}

# Open firewall for port 5000
Write-Log -LogFile $script:LogFile -Message "Opening firewall for port 5000"
try {
  netsh advfirewall firewall add rule name="CUA Computer Server 5000" dir=in action=allow protocol=TCP localport=5000 | Out-Null
  Write-Log -LogFile $script:LogFile -Message "Firewall rule added successfully"
} catch {
  Write-Log -LogFile $script:LogFile -Message "Firewall rule warning: $($_.Exception.Message)"
}

# Create start script with auto-restart
$StartScript = Join-Path $CuaDir 'start-server.ps1'
$StartScriptContent = @"
param()

`$env:PYTHONUNBUFFERED = '1'

`$LogFile = Join-Path '$CuaDir' 'server.log'
`$ActivateScript = '$ActivateScript'
`$PipExe = '$PipExe'
`$Python = '$PyExe'

function Start-Server {
    Write-Output "Activating virtual environment and updating cua-computer-server..." | Out-File -FilePath `$LogFile -Append
    & `$ActivateScript
    & `$PipExe install --upgrade cua-computer-server 2>&1 | Out-File -FilePath `$LogFile -Append

    Write-Output "Starting CUA Computer Server on port 5000..." | Out-File -FilePath `$LogFile -Append
    & `$Python -m computer_server --port 5000 2>&1 | Out-File -FilePath `$LogFile -Append
    return `$LASTEXITCODE
}

while (`$true) {
    Start-Server
    `$code = `$LASTEXITCODE
    Write-Output "Server exited with code: `$code. Restarting in 5s..." | Out-File -FilePath `$LogFile -Append
    Start-Sleep -Seconds 5
}
"@

Set-Content -Path $StartScript -Value $StartScriptContent -Encoding UTF8
Write-Log -LogFile $script:LogFile -Message "Start script created at $StartScript"

# Create VBScript wrapper to launch PowerShell hidden
$VbsWrapper = Join-Path $CuaDir 'start-server-hidden.vbs'
$VbsContent = @"
Set objShell = CreateObject("WScript.Shell")
objShell.Run "powershell.exe -NoProfile -ExecutionPolicy Bypass -File ""$StartScript""", 0, False
"@
Set-Content -Path $VbsWrapper -Value $VbsContent -Encoding ASCII
Write-Log -LogFile $script:LogFile -Message "VBScript wrapper created at $VbsWrapper"

# Create scheduled task to run at logon
try {
  $TaskName = 'CUA-Computer-Server'
  $Username = 'Docker'  # Default user for Dockur Windows

  # Remove existing task if present
  $existingTask = Get-ScheduledTask -TaskName $TaskName -ErrorAction SilentlyContinue
  if ($existingTask) {
    Write-Log -LogFile $script:LogFile -Message "Removing existing scheduled task: $TaskName"
    Unregister-ScheduledTask -TaskName $TaskName -Confirm:$false
  }

  # Create action to run VBScript wrapper (hidden)
  $Action = New-ScheduledTaskAction -Execute 'wscript.exe' -Argument "`"$VbsWrapper`""

  # Trigger: At logon of user
  $UserId = "$env:COMPUTERNAME\$Username"
  $Trigger = New-ScheduledTaskTrigger -AtLogOn -User $UserId

  # Principal: Run in background without window (S4U = Service For User)
  $Principal = New-ScheduledTaskPrincipal -UserId $UserId -LogonType S4U -RunLevel Highest

  # Task settings - hide window
  $Settings = New-ScheduledTaskSettingsSet `
    -AllowStartIfOnBatteries `
    -DontStopIfGoingOnBatteries `
    -StartWhenAvailable `
    -RestartCount 999 `
    -RestartInterval (New-TimeSpan -Minutes 1) `
    -ExecutionTimeLimit (New-TimeSpan -Days 365) `
    -Hidden

  # Register the task
  Write-Log -LogFile $script:LogFile -Message "Registering scheduled task '$TaskName' to run as $Username at logon (hidden)"
  Register-ScheduledTask `
    -TaskName $TaskName `
    -Action $Action `
    -Trigger $Trigger `
    -Principal $Principal `
    -Settings $Settings `
    -Force | Out-Null

  Write-Log -LogFile $script:LogFile -Message "Scheduled task '$TaskName' registered successfully (runs hidden in background)"

} catch {
  Write-Log -LogFile $script:LogFile -Message "Scheduled task setup error: $($_.Exception.Message)"
  throw
}

Write-Log -LogFile $script:LogFile -Message "=== CUA Computer Server setup completed ==="
exit 0

```

--------------------------------------------------------------------------------
/blog/introducing-cua-cli.md:
--------------------------------------------------------------------------------

```markdown
# Introducing the Cua CLI: Manage Cloud Sandboxes from Your Terminal

If you've been using our Cloud Sandboxes, you've probably been managing them through the web dashboard - clicking through forms to create instances, copying credentials, manually starting and stopping sandboxes. It works, but it's not exactly built for power users like yourself.

Today we're launching the **Cua CLI**: a command-line interface that brings the full power of our Cloud Sandbox platform to your terminal. Create, manage, and connect to Linux, Windows, or macOS sandboxes in seconds—all from a single command.

![Cua CLI Banner](https://github.com/user-attachments/assets/f8358acf-9194-46ee-b9e3-50cfcff5e489)

## What You Can Do

The Cua CLI handles everything you need to work with Cloud Sandboxes:

**Authentication**

- Browser-based OAuth login with automatic credential storage
- Direct API key support for CI/CD pipelines
- Export credentials to `.env` files for SDK integration

**Sandbox Management**

- Create sandboxes with your choice of OS, size, and region
- List all your sandboxes with status and connection details
- Start, stop, restart, and delete sandboxes
- Open remote desktop (VNC) connections directly in your browser

**Two Command Styles**
The CLI supports both flat and grouped command structures—use whichever fits your workflow:

```bash
# Grouped style (explicit & clear)
cua sb ls
cua sb create --os linux --size small --region north-america
cua sb vnc my-sandbox

# Flat style (quick & concise)
cua ls
cua create --os linux --size small --region north-america
cua vnc my-sandbox
```

Both styles work identically. The CLI shows grouped commands in help by default, but all flat commands remain available for backwards compatibility.

## Installation

One command installs everything (includes Bun runtime + Cua CLI):

```bash
# macOS/Linux
curl -LsSf https://cua.ai/cli/install.sh | sh

# Windows
powershell -ExecutionPolicy ByPass -c "irm https://cua.ai/cli/install.ps1 | iex"
```

Or install via npm if you prefer:

```bash
npm install -g @trycua/cli
```

## Getting Started

Authenticate with your Cua account:

```bash
# Interactive browser login (recommended)
cua auth login

# Or provide your API key directly
cua auth login --api-key sk-your-api-key-here
```

Create a sandbox:

```bash
cua sb create --os linux --size small --region north-america
# Sandbox created and ready: my-sandbox-abc123
# Password: secure-password-here
# Host: my-sandbox-abc123.sandbox.cua.ai
```

List your sandboxes:

```bash
cua sb list
# NAME                 STATUS    HOST
# my-sandbox-abc123    running   my-sandbox-abc123.sandbox.cua.ai
# test-windows-456     stopped   test-windows-456.sandbox.cua.ai
```

Open a remote desktop:

```bash
cua sb vnc my-sandbox-abc123
# Opens your browser to the VNC interface with password pre-filled
```

## SDK Integration

Export your API key to a `.env` file for seamless SDK integration:

```bash
cd my-project
cua auth env
# Wrote /path/to/my-project/.env
```

Then use it with our Python or TypeScript SDKs:

```python
from computer import Computer

computer = Computer(
    os_type="linux",
    provider_type="cloud",
    name="my-sandbox-abc123",
    api_key="your-api-key"  # Or load from .env
)

await computer.run()
```

## Sandbox Sizes & Regions

Create sandboxes in the size and region that fits your needs:

**Sizes:**

- `small` - 2 cores, 8 GB RAM, 128 GB SSD
- `medium` - 4 cores, 16 GB RAM, 128 GB SSD
- `large` - 8 cores, 32 GB RAM, 256 GB SSD

**Regions:**

- `north-america`
- `europe`
- `asia-pacific`
- `south-america`

**OS Options:**

- `linux` - Ubuntu with XFCE desktop
- `windows` - Windows 11 with Edge and Python
- `macos` - macOS (preview access)

## Example Workflows

**Quick Testing Environment**

```bash
# Spin up a sandbox, test something, tear it down
cua sb create --os linux --size small --region north-america
# ... do your testing ...
cua sb delete my-sandbox-abc123
```

**Persistent Development Sandbox**

```bash
# Create a sandbox for long-term use
cua sb create --os linux --size medium --region north-america

# Stop it when not in use (data persists)
cua sb stop my-sandbox-abc123

# Start it again when needed
cua sb start my-sandbox-abc123
```

**CI/CD Integration**

```bash
# Provision sandboxes in your pipeline
export CUA_API_KEY="sk-your-api-key"
cua auth login --api-key "$CUA_API_KEY"
cua sb create --os linux --size large --region north-america

# Run your tests with the Cua Computer SDK
python run_tests.py

# Clean up
cua sb delete my-test-sandbox
```

## Command Aliases

We've added aliases for common commands to speed up your workflow:

```bash
# List aliases
cua list    # or: cua ls, cua ps, cua sb list

# VNC aliases
cua vnc     # or: cua open, cua sb vnc
```

## FAQs

<details>
<summary><strong>Can I use this in scripts and CI/CD?</strong></summary>

Yes. All commands support non-interactive mode with `--api-key` flags, and the CLI exits with proper status codes for scripting. The flat command style (`cua list`, `cua create`) is particularly useful for quick scripts.

</details>

<details>
<summary><strong>Where are my credentials stored?</strong></summary>

API keys are stored in `~/.cua/cli.sqlite` using a local SQLite database. They never leave your machine. Use `cua auth logout` to clear stored credentials.

</details>

<details>
<summary><strong>What happens to passwords in the output?</strong></summary>

Passwords are hidden by default in `cua list` for security. Use `cua list --show-passwords` to display them when needed.

</details>

<details>
<summary><strong>Can I manage sandboxes created through the web dashboard?</strong></summary>

Yes. The CLI and dashboard share the same API. Any sandbox you create in the dashboard will show up in `cua list`, and vice versa.

</details>

<details>
<summary><strong>How do I update the CLI?</strong></summary>

If you installed via script:

```bash
curl -LsSf https://cua.ai/cli/install.sh | sh
```

If you installed via npm:

```bash
npm install -g @trycua/cli@latest
```

</details>

## What's Next

We're actively iterating based on feedback. Planned features include:

- SSH key management for secure sandbox access
- Template-based sandbox creation
- Batch operations (start/stop multiple sandboxes)
- Custom sandbox configurations
- Snapshot management

If there's a feature you need, let us know in [Discord](https://discord.gg/cua-ai).

## Need Help?

- **Documentation**: [https://cua.ai/docs/cli-playbook/commands](https://cua.ai/docs/cli-playbook/commands)
- **Installation Guide**: [https://cua.ai/docs/cli-playbook](https://cua.ai/docs/cli-playbook)
- **Discord Community**: [https://discord.gg/cua-ai](https://discord.gg/cua-ai)

---

Get started at [cua.ai](https://cua.ai) or check out the [quickstart guide](https://cua.ai/docs/get-started/quickstart).

```

--------------------------------------------------------------------------------
/libs/python/computer/computer/providers/factory.py:
--------------------------------------------------------------------------------

```python
"""Factory for creating VM providers."""

import logging
from typing import Any, Dict, Optional, Type, Union

from .base import BaseVMProvider, VMProviderType

logger = logging.getLogger(__name__)


class VMProviderFactory:
    """Factory for creating VM providers based on provider type."""

    @staticmethod
    def create_provider(
        provider_type: Union[str, VMProviderType],
        provider_port: int = 7777,
        host: str = "localhost",
        bin_path: Optional[str] = None,
        storage: Optional[str] = None,
        shared_path: Optional[str] = None,
        image: Optional[str] = None,
        verbose: bool = False,
        ephemeral: bool = False,
        noVNC_port: Optional[int] = None,
        api_port: Optional[int] = None,
        **kwargs,
    ) -> BaseVMProvider:
        """Create a VM provider of the specified type.

        Args:
            provider_type: Type of VM provider to create
            provider_port: Port for the provider's API server
            host: Hostname for the API server
            bin_path: Path to provider binary if needed
            storage: Path for persistent VM storage
            shared_path: Path for shared folder between host and VM
            image: VM image to use (for Lumier provider)
            verbose: Enable verbose logging
            ephemeral: Use ephemeral (temporary) storage
            noVNC_port: Specific port for noVNC interface (for Lumier and Docker provider)
            api_port: Specific port for Computer API server (for Docker provider)

        Returns:
            An instance of the requested VM provider

        Raises:
            ImportError: If the required dependencies for the provider are not installed
            ValueError: If the provider type is not supported
        """
        # Convert string to enum if needed
        if isinstance(provider_type, str):
            try:
                provider_type = VMProviderType(provider_type.lower())
            except ValueError:
                provider_type = VMProviderType.UNKNOWN

        if provider_type == VMProviderType.LUME:
            try:
                from .lume import HAS_LUME, LumeProvider

                if not HAS_LUME:
                    raise ImportError(
                        "The pylume package is required for LumeProvider. "
                        "Please install it with 'pip install cua-computer[lume]'"
                    )
                return LumeProvider(
                    provider_port=provider_port,
                    host=host,
                    storage=storage,
                    verbose=verbose,
                    ephemeral=ephemeral,
                )
            except ImportError as e:
                logger.error(f"Failed to import LumeProvider: {e}")
                raise ImportError(
                    "The pylume package is required for LumeProvider. "
                    "Please install it with 'pip install cua-computer[lume]'"
                ) from e
        elif provider_type == VMProviderType.LUMIER:
            try:
                from .lumier import HAS_LUMIER, LumierProvider

                if not HAS_LUMIER:
                    raise ImportError(
                        "Docker is required for LumierProvider. "
                        "Please install Docker for Apple Silicon and Lume CLI before using this provider."
                    )
                return LumierProvider(
                    provider_port=provider_port,
                    host=host,
                    storage=storage,
                    shared_path=shared_path,
                    image=image or "macos-sequoia-cua:latest",
                    verbose=verbose,
                    ephemeral=ephemeral,
                    noVNC_port=noVNC_port,
                )
            except ImportError as e:
                logger.error(f"Failed to import LumierProvider: {e}")
                raise ImportError(
                    "Docker and Lume CLI are required for LumierProvider. "
                    "Please install Docker for Apple Silicon and run the Lume installer script."
                ) from e

        elif provider_type == VMProviderType.CLOUD:
            try:
                from .cloud import CloudProvider

                return CloudProvider(
                    verbose=verbose,
                    **kwargs,
                )
            except ImportError as e:
                logger.error(f"Failed to import CloudProvider: {e}")
                raise ImportError(
                    "The CloudProvider is not fully implemented yet. "
                    "Please use LUME or LUMIER provider instead."
                ) from e
        elif provider_type == VMProviderType.WINSANDBOX:
            try:
                from .winsandbox import HAS_WINSANDBOX, WinSandboxProvider

                if not HAS_WINSANDBOX:
                    raise ImportError(
                        "pywinsandbox is required for WinSandboxProvider. "
                        "Please install it with 'pip install -U git+https://github.com/karkason/pywinsandbox.git'"
                    )
                return WinSandboxProvider(
                    host=host,
                    storage=storage,
                    verbose=verbose,
                    ephemeral=ephemeral,
                    **kwargs,
                )
            except ImportError as e:
                logger.error(f"Failed to import WinSandboxProvider: {e}")
                raise ImportError(
                    "pywinsandbox is required for WinSandboxProvider. "
                    "Please install it with 'pip install -U git+https://github.com/karkason/pywinsandbox.git'"
                ) from e
        elif provider_type == VMProviderType.DOCKER:
            try:
                from .docker import HAS_DOCKER, DockerProvider

                if not HAS_DOCKER:
                    raise ImportError(
                        "Docker is required for DockerProvider. "
                        "Please install Docker and ensure it is running."
                    )
                return DockerProvider(
                    host=host,
                    storage=storage,
                    shared_path=shared_path,
                    image=image or "trycua/cua-ubuntu:latest",
                    verbose=verbose,
                    ephemeral=ephemeral,
                    vnc_port=noVNC_port,
                    api_port=api_port,
                )
            except ImportError as e:
                logger.error(f"Failed to import DockerProvider: {e}")
                raise ImportError(
                    "Docker is required for DockerProvider. "
                    "Please install Docker and ensure it is running."
                ) from e
        else:
            raise ValueError(f"Unsupported provider type: {provider_type}")

```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/telemetry.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Telemetry
description: How telemetry works in Cua and how to control it
---

# Telemetry

Cua collects anonymized usage and error statistics. We follow [Posthog's ethical telemetry approach](https://posthog.com/blog/open-source-telemetry-ethical). To opt out, set `telemetry_enabled` to false.

## What we collect

### Enabled by default (opt-out)

- System info: OS, OS version, Python version
- Module initialization: When modules are imported and their versions
- Performance: Agent run durations, step counts, token usage, API costs
- Session tracking: Anonymous session IDs and run IDs

### Disabled by default (opt-in)

**Trajectory logging** captures full conversation history:

- User messages and agent responses
- Computer actions and outputs
- Agent reasoning traces

Must be explicitly enabled.

### We don't collect

- Personal information or user identifiers
- API keys or credentials
- File contents or application data
- Files being accessed
- Screenshots or screen contents (unless trajectory logging is enabled)
- Text being typed, user inputs, model outputs, computer outputs, or tool call outputs (unless trajectory logging is enabled)

## How to disable

### Environment variable (global)

Set `CUA_TELEMETRY_ENABLED` to a falsy value (`0`, `false`, `no`, or `off`):

```bash
export CUA_TELEMETRY_ENABLED=false
```

Or in Python:

```python
import os
os.environ["CUA_TELEMETRY_ENABLED"] = "false"
```

<Callout type="info">
  **Deprecated environment variables:** The environment variables `CUA_TELEMETRY` and
  `CUA_TELEMETRY_DISABLED` are deprecated and no longer have any effect. Use `CUA_TELEMETRY_ENABLED`
  instead.
</Callout>

### Per instance

**Computer SDK:**

```python
from computer import Computer

computer = Computer(telemetry_enabled=False)
```

**Agent SDK:**

```python
from agent import ComputerAgent
import os

# Basic telemetry - performance metrics only (opt-out, enabled by default)
agent = ComputerAgent(
    model="claude-sonnet-4-5-20250929",
    telemetry_enabled=True  # Default is True
)

# Enable telemetry with full conversation trajectory logging (opt-in)
agent = ComputerAgent(
    model="claude-sonnet-4-5-20250929",
    telemetry_enabled={
        "log_trajectory": True  # Logs full conversation items
    }
)

# Disable completely
agent = ComputerAgent(
    model="claude-sonnet-4-5-20250929",
    telemetry_enabled=False
)

# Enable trajectory logging (opt-in)
agent = ComputerAgent(
    model="claude-sonnet-4-5-20250929",
    telemetry_enabled={"log_trajectory": True}
)
```

Check status:

```python
print(computer.telemetry_enabled)  # True or False
print(agent.telemetry_enabled)     # True, False, or dict
```

Telemetry settings are configured at initialization and can't be changed afterward.

## Events collected

### Computer SDK

| Event Name               | Data Collected                                                                                                                        | Trigger Notes                                                           |
| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------- |
| **computer_initialized** | • `os`: Operating system (e.g., 'windows', 'darwin', 'linux')<br />• `os_version`: OS version<br />• `python_version`: Python version | Triggered when a Computer instance is created                           |
| **module_init**          | • `module`: "computer"<br />• `version`: Package version<br />• `python_version`: Full Python version string                          | Triggered once when the computer package is imported for the first time |

### Agent SDK

| Event Name              | Data Collected                                                                                                                                                                                                                                                                                                        | Trigger Notes                                                         |
| ----------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------- |
| **module_init**         | • `module`: "agent"<br />• `version`: Package version<br />• `python_version`: Full Python version string                                                                                                                                                                                                             | Triggered once when the agent package is imported for the first time  |
| **agent_session_start** | • `session_id`: Unique UUID for this agent instance<br />• `agent_type`: Class name (e.g., "ComputerAgent")<br />• `model`: Model name (e.g., "claude-sonnet-4-5")<br />• `os`: Operating system<br />• `os_version`: OS version<br />• `python_version`: Python version                                              | Triggered when TelemetryCallback is initialized (agent instantiation) |
| **agent_run_start**     | • `session_id`: Agent session UUID<br />• `run_id`: Unique UUID for this run<br />• `start_time`: Unix timestamp<br />• `input_context_size`: Character count of input messages<br />• `num_existing_messages`: Count of existing messages<br />• `uploaded_trajectory`: Full conversation items (opt-in)             | Triggered at the start of each agent.run() call                       |
| **agent_run_end**       | • `session_id`: Agent session UUID<br />• `run_id`: Run UUID<br />• `end_time`: Unix timestamp<br />• `duration_seconds`: Total run duration<br />• `num_steps`: Total steps taken in this run<br />• `total_usage`: Accumulated token usage and costs<br />• `uploaded_trajectory`: Full conversation items (opt-in) | Triggered at the end of each agent.run() call                         |
| **agent_step**          | • `session_id`: Agent session UUID<br />• `run_id`: Run UUID<br />• `step`: Step number (incremental)<br />• `timestamp`: Unix timestamp<br />• `duration_seconds`: Duration of previous step                                                                                                                         | Triggered on each agent response/step during a run                    |
| **agent_usage**         | • `session_id`: Agent session UUID<br />• `run_id`: Run UUID<br />• `step`: Current step number<br />• `prompt_tokens`: Tokens in prompt<br />• `completion_tokens`: Tokens in response<br />• `total_tokens`: Total tokens used<br />• `response_cost`: Cost of this API call                                        | Triggered whenever usage information is received from LLM API         |

## Questions

Questions about telemetry? Open an issue on our [GitHub repository](https://github.com/trycua/cua).

```

--------------------------------------------------------------------------------
/libs/python/som/som/util/utils.py:
--------------------------------------------------------------------------------

```python
import logging
import signal
import time
from contextlib import contextmanager
from typing import Any, List, Optional, Sequence, Tuple, Union, cast

import cv2
import easyocr
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image

logger = logging.getLogger(__name__)


class TimeoutException(Exception):
    pass


@contextmanager
def timeout(seconds):
    def timeout_handler(signum, frame):
        logger.warning(f"OCR process timed out after {seconds} seconds")
        raise TimeoutException("OCR processing timed out")

    # Register the signal handler
    original_handler = signal.signal(signal.SIGALRM, timeout_handler)
    signal.alarm(seconds)

    try:
        yield
    finally:
        signal.alarm(0)
        signal.signal(signal.SIGALRM, original_handler)


# Initialize EasyOCR with optimized settings
logger.info("Initializing EasyOCR with optimized settings...")
reader = easyocr.Reader(
    ["en"],
    gpu=True,  # Use GPU if available
    model_storage_directory=None,  # Use default directory
    download_enabled=True,
    detector=True,  # Enable text detection
    recognizer=True,  # Enable text recognition
    verbose=False,  # Disable verbose output
    quantize=True,  # Enable quantization for faster inference
    cudnn_benchmark=True,  # Enable cuDNN benchmarking
)
logger.info("EasyOCR initialization complete")


def check_ocr_box(
    image_source: Union[str, Image.Image],
    display_img=True,
    output_bb_format="xywh",
    goal_filtering=None,
    easyocr_args=None,
    use_paddleocr=False,
) -> Tuple[Tuple[List[str], List[Tuple[float, float, float, float]]], Optional[Any]]:
    """Check OCR box using EasyOCR with optimized settings.

    Args:
        image_source: Either a file path or PIL Image
        display_img: Whether to display the annotated image
        output_bb_format: Format for bounding boxes ('xywh' or 'xyxy')
        goal_filtering: Optional filtering of results
        easyocr_args: Arguments for EasyOCR
        use_paddleocr: Ignored (kept for backward compatibility)

    Returns:
        Tuple containing:
        - Tuple of (text_list, bounding_boxes)
        - goal_filtering value
    """
    logger.info("Starting OCR processing...")
    start_time = time.time()

    if isinstance(image_source, str):
        logger.info(f"Loading image from path: {image_source}")
        image_source = Image.open(image_source)
    if image_source.mode == "RGBA":
        logger.info("Converting RGBA image to RGB")
        image_source = image_source.convert("RGB")
    image_np = np.array(image_source)
    w, h = image_source.size
    logger.info(f"Image size: {w}x{h}")

    # Default EasyOCR arguments optimized for speed
    default_args = {
        "paragraph": False,  # Disable paragraph detection
        "text_threshold": 0.5,  # Confidence threshold
        "link_threshold": 0.4,  # Text link threshold
        "canvas_size": 2560,  # Max image size
        "mag_ratio": 1.0,  # Magnification ratio
        "slope_ths": 0.1,  # Slope threshold
        "ycenter_ths": 0.5,  # Y-center threshold
        "height_ths": 0.5,  # Height threshold
        "width_ths": 0.5,  # Width threshold
        "add_margin": 0.1,  # Margin around text
        "min_size": 20,  # Minimum text size
    }

    # Update with user-provided arguments
    if easyocr_args:
        logger.info(f"Using custom EasyOCR arguments: {easyocr_args}")
        default_args.update(easyocr_args)

    try:
        # Use EasyOCR with timeout
        logger.info("Starting EasyOCR detection with 5 second timeout...")
        with timeout(5):  # 5 second timeout
            # EasyOCR's readtext returns a list of tuples, where each tuple is (bbox, text, confidence)
            raw_result = reader.readtext(image_np, **default_args)
            result = cast(Sequence[Tuple[List[Tuple[float, float]], str, float]], raw_result)
            coord = [item[0] for item in result]  # item[0] is the bbox coordinates
            text = [item[1] for item in result]  # item[1] is the text content
            logger.info(f"OCR completed successfully. Found {len(text)} text regions")
            logger.info(f"Detected text: {text}")

    except TimeoutException:
        logger.error("OCR processing timed out after 5 seconds")
        coord = []
        text = []
    except Exception as e:
        logger.error(f"OCR processing failed with error: {str(e)}")
        coord = []
        text = []

    processing_time = time.time() - start_time
    logger.info(f"Total OCR processing time: {processing_time:.2f} seconds")

    if display_img:
        logger.info("Creating visualization of OCR results...")
        opencv_img = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
        bb = []
        for item in coord:
            x, y, a, b = get_xywh(item)
            bb.append((x, y, a, b))
            # Convert float coordinates to integers for cv2.rectangle
            x_val = cast(float, x)
            y_val = cast(float, y)
            a_val = cast(float, a)
            b_val = cast(float, b)
            x_int, y_int = int(x_val), int(y_val)
            a_int, b_int = int(a_val), int(b_val)
            cv2.rectangle(
                opencv_img, (x_int, y_int), (x_int + a_int, y_int + b_int), (0, 255, 0), 2
            )
        plt.imshow(cv2.cvtColor(opencv_img, cv2.COLOR_BGR2RGB))
    else:
        if output_bb_format == "xywh":
            bb = [get_xywh(item) for item in coord]
        elif output_bb_format == "xyxy":
            bb = [get_xyxy(item) for item in coord]

    # Cast the bounding boxes to the expected type
    bb = cast(List[Tuple[float, float, float, float]], bb)

    logger.info("OCR processing complete")
    return (text, bb), goal_filtering


def get_xywh(box):
    """
    Convert a bounding box to xywh format (x, y, width, height).

    Args:
        box: Bounding box coordinates (various formats supported)

    Returns:
        Tuple of (x, y, width, height)
    """
    # Handle different input formats
    if len(box) == 4:
        # If already in xywh format or xyxy format
        if isinstance(box[0], (int, float)) and isinstance(box[2], (int, float)):
            if box[2] < box[0] or box[3] < box[1]:
                # Already xyxy format, convert to xywh
                x1, y1, x2, y2 = box
                return x1, y1, x2 - x1, y2 - y1
            else:
                # Already in xywh format
                return box
    elif len(box) == 2:
        # Format like [[x1,y1],[x2,y2]] from some OCR engines
        (x1, y1), (x2, y2) = box
        return x1, y1, x2 - x1, y2 - y1

    # Default case - try to convert assuming it's a list of points
    x_coords = [p[0] for p in box]
    y_coords = [p[1] for p in box]
    x1, y1 = min(x_coords), min(y_coords)
    width, height = max(x_coords) - x1, max(y_coords) - y1
    return x1, y1, width, height


def get_xyxy(box):
    """
    Convert a bounding box to xyxy format (x1, y1, x2, y2).

    Args:
        box: Bounding box coordinates (various formats supported)

    Returns:
        Tuple of (x1, y1, x2, y2)
    """
    # Get xywh first, then convert to xyxy
    x, y, w, h = get_xywh(box)
    return x, y, x + w, y + h

```

--------------------------------------------------------------------------------
/libs/python/agent/benchmarks/ss-v2.py:
--------------------------------------------------------------------------------

```python
#!/usr/bin/env python3
"""
ScreenSpot-v2 Benchmark Script

Evaluates models on the ScreenSpot-v2 dataset for click prediction accuracy.
Supports both ComputerAgent model strings and custom model classes.
"""

import argparse
import asyncio
import random
import statistics
import time
from typing import Optional

from datasets import load_dataset
from tqdm import tqdm
from utils import (
    ModelWrapper,
    get_available_models,
    get_gpu_memory,
    is_click_in_bbox,
    save_results_to_markdown,
    save_visualizations,
)


async def evaluate_model(
    model_wrapper: ModelWrapper, samples, max_samples: Optional[int] = None
) -> dict:
    """
    Evaluate a model on any iterable of samples.

    Args:
        model_wrapper: ModelWrapper instance
        samples: Iterable of dicts with keys: image, bbox, instruction
        max_samples: Maximum number of samples to evaluate (None for all)

    Returns:
        Dictionary with evaluation results
    """
    print(f"\nEvaluating model: {model_wrapper.model_name}")

    # Load model
    await model_wrapper.load_model()

    # Convert to list if needed and limit samples
    if hasattr(samples, "__len__"):
        total_samples = len(samples)
        if max_samples is not None:
            total_samples = min(max_samples, total_samples)
        sample_list = list(samples)[:total_samples]
    else:
        # For iterators, take max_samples or all
        sample_list = list(samples)
        if max_samples is not None:
            sample_list = sample_list[:max_samples]
        total_samples = len(sample_list)

    correct_predictions = 0
    error_predictions = 0
    results = []

    for i, sample in enumerate(tqdm(sample_list, desc=f"Evaluating {model_wrapper.model_name}")):
        # Extract required data (only these 3 keys matter)
        image = sample["image"]
        instruction = sample["instruction"]
        bbox = sample["bbox"]  # [x1, y1, x2, y2]

        # Predict click coordinates with timing
        start_time = time.time()
        click_coords = await model_wrapper.predict_click(image, instruction)
        prediction_time = time.time() - start_time

        # Check if prediction is correct
        is_correct = is_click_in_bbox(click_coords, bbox)

        if is_correct:
            correct_predictions += 1

        results.append(
            {
                "sample_idx": i,
                "instruction": instruction,
                "bbox": bbox,
                "predicted_coords": click_coords,
                "is_correct": is_correct,
                "failed": False,
                "prediction_time": prediction_time,
            }
        )

    # Unload model
    await model_wrapper.unload_model()

    # Calculate metrics
    accuracy = correct_predictions / total_samples if total_samples > 0 else 0.0
    error_rate = error_predictions / total_samples if total_samples > 0 else 0.0

    # Calculate timing statistics
    successful_times = [r["prediction_time"] for r in results if not r["failed"]]
    avg_prediction_time = sum(successful_times) / len(successful_times) if successful_times else 0.0
    median_prediction_time = statistics.median(successful_times) if successful_times else 0.0
    min_prediction_time = min(successful_times) if successful_times else 0.0
    max_prediction_time = max(successful_times) if successful_times else 0.0

    # Get VRAM statistics
    vram_stats = model_wrapper.get_vram_stats()

    return {
        "model_name": model_wrapper.model_name,
        "total_samples": total_samples,
        "correct_predictions": correct_predictions,
        "failed_predictions": error_predictions,
        "accuracy": accuracy,
        "failure_rate": error_rate,
        "avg_prediction_time": avg_prediction_time,
        "median_prediction_time": median_prediction_time,
        "min_prediction_time": min_prediction_time,
        "max_prediction_time": max_prediction_time,
        "vram_max_mb": vram_stats["max_mb"],
        "vram_avg_mb": vram_stats["avg_mb"],
        "results": results,
    }


async def main():
    """
    Main function to run the benchmark.
    """
    # Parse command line arguments
    parser = argparse.ArgumentParser(description="ScreenSpot-v2 Benchmark Script")
    parser.add_argument(
        "--samples", type=int, default=500, help="Number of samples to evaluate (default: 500)"
    )
    parser.add_argument(
        "--seed", type=int, default=42, help="Random seed for shuffling (default: 42)"
    )
    args = parser.parse_args()

    # Set random seed
    random.seed(args.seed)

    # Load dataset
    print("Loading ScreenSpot-v2 dataset...")
    ds = load_dataset("lmms-lab/ScreenSpot-v2")
    dataset = ds["train"]  # type: ignore
    # Convert to simple list of dicts with only required keys
    samples = []
    for item in dataset:
        # Convert dataset item to dict if needed
        item_dict = dict(item) if hasattr(item, "keys") else item

        # Convert ScreenSpot-v2 bbox format [x, y, w, h] to [x1, y1, x2, y2]
        bbox_xywh = item_dict["bbox"]  # type: ignore
        x, y, w, h = bbox_xywh
        bbox_xyxy = [x, y, x + w, y + h]

        samples.append(
            {
                "image": item_dict["image"],  # type: ignore
                "instruction": item_dict["instruction"],  # type: ignore
                "bbox": bbox_xyxy,
            }
        )
    print(f"Dataset loaded: {len(samples)} samples")

    # Shuffle samples with seed
    random.shuffle(samples)
    print(f"Samples shuffled with seed {args.seed}")

    # Get available models
    models = get_available_models()

    # Evaluation settings
    max_samples = args.samples  # Use command line argument

    # Run evaluations
    all_results = []

    for model in models:
        model_wrapper = ModelWrapper(model)
        result = await evaluate_model(model_wrapper, samples, max_samples)
        all_results.append(result)

        # Print summary
        print(f"\n{result['model_name']} Results:")
        print(f"  Accuracy: {result['accuracy']*100:.2f}%")
        print(f"  Correct: {result['correct_predictions']}/{result['total_samples']}")
        print(f"  Errors: {result['failed_predictions']}")
        print(f"  Error Rate: {result['failure_rate']*100:.2f}%")
        print(f"  Avg Time: {result['avg_prediction_time']:.2f}s")
        print(f"  Median Time: {result['median_prediction_time']:.2f}s")
        print(
            f"  Time Range: {result['min_prediction_time']:.2f}s - {result['max_prediction_time']:.2f}s"
        )
        print(f"  VRAM Max: {result['vram_max_mb']:.1f}MB")
        print(f"  VRAM Avg: {result['vram_avg_mb']:.1f}MB")

        # Print GPU memory info
        gpu_memory = get_gpu_memory()
        if gpu_memory and gpu_memory[0] > 0:
            print(f"  GPU Free Memory: {gpu_memory[0]:.1f}MB")

    # Save results
    if all_results:
        save_results_to_markdown(
            all_results, "screenspot_v2_results.md", title="ScreenSpot-v2 Benchmark Results"
        )
        save_visualizations(all_results, samples)
        print("\nBenchmark completed successfully!")
    else:
        print("\nNo successful evaluations completed.")


if __name__ == "__main__":
    asyncio.run(main())

```

--------------------------------------------------------------------------------
/libs/python/agent/agent/callbacks/telemetry.py:
--------------------------------------------------------------------------------

```python
"""
Telemetry callback handler for Computer-Use Agent (cua-agent)
"""

import platform
import time
import uuid
from typing import Any, Dict, List, Optional, Union

from core.telemetry import (
    is_telemetry_enabled,
    record_event,
)

from .base import AsyncCallbackHandler

SYSTEM_INFO = {
    "os": platform.system().lower(),
    "os_version": platform.release(),
    "python_version": platform.python_version(),
}


class TelemetryCallback(AsyncCallbackHandler):
    """
    Telemetry callback handler for Computer-Use Agent (cua-agent)

    Tracks agent usage, performance metrics, and optionally trajectory data.
    """

    def __init__(self, agent, log_trajectory: bool = False):
        """
        Initialize telemetry callback.

        Args:
            agent: The ComputerAgent instance
            log_trajectory: Whether to log full trajectory items (opt-in)
        """
        self.agent = agent
        self.log_trajectory = log_trajectory

        # Generate session/run IDs
        self.session_id = str(uuid.uuid4())
        self.run_id = None

        # Track timing and metrics
        self.run_start_time = None
        self.step_count = 0
        self.step_start_time = None
        self.total_usage = {
            "prompt_tokens": 0,
            "completion_tokens": 0,
            "total_tokens": 0,
            "response_cost": 0.0,
        }

        # Record agent initialization
        if is_telemetry_enabled():
            self._record_agent_initialization()

    def _record_agent_initialization(self) -> None:
        """Record agent type/model and session initialization."""
        # Get the agent loop type (class name)
        agent_type = "unknown"
        if hasattr(self.agent, "agent_loop") and self.agent.agent_loop is not None:
            agent_type = type(self.agent.agent_loop).__name__

        agent_info = {
            "session_id": self.session_id,
            "agent_type": agent_type,
            "model": getattr(self.agent, "model", "unknown"),
            **SYSTEM_INFO,
        }

        record_event("agent_session_start", agent_info)

    async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
        """Called at the start of an agent run loop."""
        if not is_telemetry_enabled():
            return

        self.run_id = str(uuid.uuid4())
        self.run_start_time = time.time()
        self.step_count = 0

        # Calculate input context size
        input_context_size = self._calculate_context_size(old_items)

        run_data = {
            "session_id": self.session_id,
            "run_id": self.run_id,
            "start_time": self.run_start_time,
            "input_context_size": input_context_size,
            "num_existing_messages": len(old_items),
        }

        # Log trajectory if opted in
        if self.log_trajectory:
            trajectory = self._extract_trajectory(old_items)
            if trajectory:
                run_data["uploaded_trajectory"] = trajectory

        record_event("agent_run_start", run_data)

    async def on_run_end(
        self,
        kwargs: Dict[str, Any],
        old_items: List[Dict[str, Any]],
        new_items: List[Dict[str, Any]],
    ) -> None:
        """Called at the end of an agent run loop."""
        if not is_telemetry_enabled() or not self.run_start_time:
            return

        run_duration = time.time() - self.run_start_time

        run_data = {
            "session_id": self.session_id,
            "run_id": self.run_id,
            "end_time": time.time(),
            "duration_seconds": run_duration,
            "num_steps": self.step_count,
            "total_usage": self.total_usage.copy(),
        }

        # Log trajectory if opted in
        if self.log_trajectory:
            trajectory = self._extract_trajectory(new_items)
            if trajectory:
                run_data["uploaded_trajectory"] = trajectory

        record_event("agent_run_end", run_data)

    async def on_usage(self, usage: Dict[str, Any]) -> None:
        """Called when usage information is received."""
        if not is_telemetry_enabled():
            return

        # Accumulate usage stats
        self.total_usage["prompt_tokens"] += usage.get("prompt_tokens", 0)
        self.total_usage["completion_tokens"] += usage.get("completion_tokens", 0)
        self.total_usage["total_tokens"] += usage.get("total_tokens", 0)
        self.total_usage["response_cost"] += usage.get("response_cost", 0.0)

        # Record individual usage event
        usage_data = {
            "session_id": self.session_id,
            "run_id": self.run_id,
            "step": self.step_count,
            **usage,
        }

        record_event("agent_usage", usage_data)

    async def on_responses(self, kwargs: Dict[str, Any], responses: Dict[str, Any]) -> None:
        """Called when responses are received."""
        if not is_telemetry_enabled():
            return

        self.step_count += 1
        step_duration = None

        if self.step_start_time:
            step_duration = time.time() - self.step_start_time

        self.step_start_time = time.time()

        step_data = {
            "session_id": self.session_id,
            "run_id": self.run_id,
            "step": self.step_count,
            "timestamp": self.step_start_time,
        }

        if step_duration is not None:
            step_data["duration_seconds"] = step_duration

        record_event("agent_step", step_data)

    def _calculate_context_size(self, items: List[Dict[str, Any]]) -> int:
        """Calculate approximate context size in tokens/characters."""
        total_size = 0

        for item in items:
            if item.get("type") == "message" and "content" in item:
                content = item["content"]
                if isinstance(content, str):
                    total_size += len(content)
                elif isinstance(content, list):
                    for part in content:
                        if isinstance(part, dict) and "text" in part:
                            total_size += len(part["text"])
            elif "content" in item and isinstance(item["content"], str):
                total_size += len(item["content"])

        return total_size

    def _extract_trajectory(self, items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """Extract trajectory items that should be logged."""
        trajectory = []

        for item in items:
            # Include user messages, assistant messages, reasoning, computer calls, and computer outputs
            if (
                item.get("role") == "user"  # User inputs
                or (
                    item.get("type") == "message" and item.get("role") == "assistant"
                )  # Model outputs
                or item.get("type") == "reasoning"  # Reasoning traces
                or item.get("type") == "computer_call"  # Computer actions
                or item.get("type") == "computer_call_output"  # Computer outputs
            ):
                # Create a copy of the item with timestamp
                trajectory_item = item.copy()
                trajectory_item["logged_at"] = time.time()
                trajectory.append(trajectory_item)

        return trajectory

```

--------------------------------------------------------------------------------
/blog/computer-use-agents-for-growth-hacking.md:
--------------------------------------------------------------------------------

```markdown
# Computer Use Agents for Growth Hacking: The Cua-la Strategy

_Published on January 16, 2025 by Sarina Li_

<img src="./assets/esther-and-sarina.JPG" alt="Esther and Sarina at DevFest Toronto">

Growing a developer-focused product is hard. Traditional marketing doesn't work. Booth rentals cost thousands. Sponsorships cost tens of thousands.

So we tried something different at Google DevFest Toronto: show up with backpacks full of cute cua-la keychains and see what happens.

This is the story of how two new hires, a growth engineer and a designer/artist, guerrilla marketed their way through a major tech conference with $200 worth of merch and a post-event automation pipeline.

## Meet the Team

**Sarina** (Growth Engineering): Built the post-event automation pipeline that extracts LinkedIn connections and generates personalized messages while you sleep.

**Esther** (Design + Art): Hand-crafted every piece of artwork, giving life to Cua through illustrations, branding, and yes, extremely cute cua-la keychains.

The thesis: what if we could draw people in with irresistible physical merch, then use computer use agents to handle all the tedious follow-up work?

## The cua-la Strategy

<img src="./assets/cua-at-devfest.JPG" alt="Guerrilla marketing at DevFest Toronto">

Google DevFest Toronto brought together hundreds of developers and AI enthusiasts. We didn't have a booth. We didn't have demos. We showed up with backpacks full of cua-la keychains with the cua.ai logo and started handing them out.

That's it. Pure guerrilla marketing, the cua-las were absurdly effective.

People would literally crowd around us, not because they were interested in computer use (at first), but because they wanted a cua-la. We'd pitch Cua while handing out keychains, and suddenly we had an engaged audience!

<img src="./assets/devfest-image.JPG" alt="DevFest crowd">

### The Magic Moment

A few people stuck the cua-las on their bags immediately. Then, throughout the event, we started getting approached:

"Wait, are you the Cua girls?"

They'd seen the cua-las on someone's bag, asked about it, and tracked us down! The keychains became walking advertisements.

<img src="./assets/htn-at-devfest.JPG" alt="Hack the North recognition at DevFest">

Even better: two attendees recognized Cua from Hack the North. Our previous event marketing was actually working. People remembered us.

## Part 2: The Automation (Try It Yourself)

After DevFest, we had 20+ new LinkedIn connections. Normally, this means hours of:

- Manually copying names, roles, companies
- Opening each profile to find contact info
- Crafting personalized follow-up messages
- Updating your CRM

Sarina had a better idea: build the automation we wish existed, then open source it.

**The automation is live**: [Post-Event Contact Export cookbook](https://cua.ai/docs/example-usecases/post-event-contact-export)

### How It Works

<video controls width="100%">
  <source src="./assets/linkedin-scraping.mp4" type="video/mp4">
  LinkedIn scraping automation in action
</video>

The agent navigates LinkedIn like a human would: click profile, extract info, navigate back, repeat. But it does it overnight while you sleep.

The secret sauce: **VM session persistence**. By logging into LinkedIn once through Cua's VM, the session stays alive. No captchas, no bot detection, just smooth automation.

<video controls width="100%">
  <source src="./assets/adding-row-csv.mp4" type="video/mp4">
  Automatic CSV generation
</video>

Wake up to a clean CSV with:

- First name, last name
- Current role and company
- LinkedIn profile URLs
- Pre-generated messaging links

Then use that data to craft personalized messages. Sarina wrote unique follow-ups for each person, mentioning specific conversations from DevFest.

**Works for any platform**: LinkedIn, X/Twitter, or wherever your connections are. The cookbook includes full setup instructions and customizable code.

## The Results

**Cost Breakdown**

- Booth rental: $0 (didn't have one)
- Sponsorship: $0 (didn't buy one)
- cua-la keychains: ~$200
- Automation: Built by Sarina in a few hours post-event
- **Total spend: $200**

**What We Got**

- People crowding around us for cua-las
- Walking advertisements on bags throughout the event
- Instant brand recognition ("Are you the Cua girls?")
- Two people who remembered us from Hack the North
- 20+ quality connections extracted and messaged within 24 hours
- Several demo requests from personalized follow-ups

**ROI**
Traditional event marketing at this scale: $5-10K minimum for booth + sponsorship.

Our approach: $200 + scrappy execution.

The automation is reuseable and will save hours of manual work, and the cua-las created more organic conversations than any booth could have.

## What Didn't Work (Yet)

**cua-la Distribution**
We ran out faster than expected! Next time: bigger bag, or limit to one per person.

**Automation Setup**
The VM login step added friction. "Log in manually first, then run the script" confused some people who wanted to try it themselves. Need better first-run UX.

**Message Personalization**
While the extraction was automated, I still wrote each follow-up message manually, I think we are looking for ways to better enrich messages with context from the event, which is hard to automate.

## What's Next: NeurIPS 2025

NeurIPS is the biggest AI conference of the year. Thousands of researchers, hundreds of companies.

**The good news**: We still have one giant bag of cua-las left. They're already packed and ready.

**The better news**: We're upgrading the automation.

### The Hypothesis

The cua-las get people interested. The automation ensures we actually follow through.

Most event marketing fails at the follow-up stage. You collect business cards, connect on LinkedIn, and then... nothing. The moment passes. People forget.

With Cua handling the mechanical work (data organization, connection tracking, follow-up scheduling), we can focus on the human part: genuine conversations, valuable introductions, and actually helping people.

## The Framework: Cute Merch + Smart Automation

Traditional event marketing: show up, pitch, collect cards.

Our approach: combine two forces that shouldn't work together but do.

**The Physical Hook**

- Make something people actually want (not another branded pen)
- Hand-crafted, memorable, Instagram-worthy
- Turns attendees into walking billboards
- Creates natural conversation starters

**The Digital Follow-Through**

- Automate the tedious post-event work
- Extract connections while you sleep
- Personalize follow-ups with real context
- Actually close the loop before the moment passes

**Why It Works**
The cua-las get you in the door. The automation ensures you don't waste the opportunity.

Most companies nail one or the other:

- Great merch, terrible follow-up → missed opportunities
- Amazing automation, boring presence → no one cares

Do both, and you create a flywheel: each event builds brand recognition for the next, while automation ensures maximum value from every connection.

See you at NeurIPS 2025!

---

_Want to build your own growth hacking automations? Check out [Cua on GitHub](https://github.com/trycua/cua) or join our [Discord](https://discord.gg/cua) to share your experiments. cua-las not included (yet)._

```

--------------------------------------------------------------------------------
/blog/ubuntu-docker-support.md:
--------------------------------------------------------------------------------

```markdown
# Ubuntu Docker Support in Cua with Kasm

_Published Aug 26, 2025 by Francesco Bonacci_

Today we’re shipping **Ubuntu Docker support** in Cua. You get a full Linux desktop inside a Docker container, viewable right in your browser—no VM spin-up, no extra clients. It behaves the same on macOS, Windows, and Linux.

<img src="./assets/docker-ubuntu-support.png" alt="Cua + KasmVNC Ubuntu container desktop">

## Why we did this

If you build automation or RL workflows with Cua, you’ve probably run into the usual platform walls: macOS VMs (via Lume) are Apple-Silicon only; Windows Sandbox needs Pro/Enterprise; giving agents your host desktop is… exciting, but risky; and little OS quirks make “build once, run anywhere” harder than it should be.

We wanted something lightweight, isolated, and identical across machines. So we put a desktop in a container.

## Why we didn’t use QEMU/KVM

Short answer: **portability, startup time, and ops friction.**

- **Runs everywhere, no hypervisor drama.** KVM needs Linux; Hyper-V/Virtualization.Framework setups vary by host and policy. Docker is ubiquitous across macOS/Windows/Linux and allowed in most CI runners—so your GUI env actually runs where your team works.
- **Faster boot & smaller footprints.** Containers cold-start in seconds and images are GB-scale; VMs tend to be minutes and tens of GB. That matters for parallel agents, CI, and local iteration.
- **Lower ops overhead.** No nested virt, kernel modules, or privileged host tweaks that many orgs (and cloud runners) block. Pull → run → browser.
- **Same image, everywhere.** One Docker image gives you an identical desktop on every dev laptop and in CI.
- **Web-first access out of the box.** KasmVNC serves the desktop over HTTP—no extra VNC/RDP clients or SPICE config.

**When we _do_ reach for QEMU/KVM:**

- You need **true OS isolation** or to run **non-Linux** guests.
- You want **kernel-level features** or **device/GPU passthrough** (VFIO).
- You’re optimizing for **hardware realism** over startup speed and density.

For this release, the goal was a **cross-platform Linux desktop that feels instant and identical** across local dev and CI. Containers + KasmVNC hit that sweet spot.

## What we built

Under the hood it’s **KasmVNC + Ubuntu 22.04 (Xfce) in Docker**, pre-configured for computer-use automation. You get a proper GUI desktop served over HTTP (no VNC/RDP client), accessible from any modern browser. Cua’s Computer server boots automatically so your agents can connect immediately.

### How it works (at a glance)

```
Your System
└─ Docker Container
   └─ Xfce Desktop + KasmVNC → open in your browser
```

---

## Quick start

1. **Install Docker** — Docker Desktop (macOS/Windows) or Docker Engine (Linux).

2. **Pull or build the image**

```bash
# Pull (recommended)
docker pull --platform=linux/amd64 trycua/cua-ubuntu:latest

# Or build locally
cd libs/kasm
docker build -t cua-ubuntu:latest .
```

3. **Run with Cua’s Computer SDK**

```python
from computer import Computer

computer = Computer(
    os_type="linux",
    provider_type="docker",
    image="trycua/cua-ubuntu:latest",
    name="my-automation-container"
)

await computer.run()
```

### Make an agent that drives this desktop

```python
from agent import ComputerAgent

# assumes `computer` is the instance created above
agent = ComputerAgent("openrouter/z-ai/glm-4.5v", tools=[computer])

async for _ in agent.run("Click on the search bar and type 'hello world'"):
    pass
```

> Use any VLM with tool use; just make sure your OpenRouter creds are set.

By default you land on **Ubuntu 22.04 + Xfce** with a browser and desktop basics, the **Computer server** is running, the **web viewer** is available at `http://localhost:8006`, and common automation tools are preinstalled.

---

## What’s inside (in plain English)

A tidy Linux desktop with web access through **KasmVNC**, Python 3.11 and dev tools, plus utilities you’ll actually use for automation—`wmctrl` for windows, `xclip` for clipboard, `ffmpeg` for media, screenshot helpers, and so on. It starts as a **non-root `kasm-user`**, lives in an **isolated filesystem** (unless you mount volumes), and ships with **SSL off for local dev** so you terminate TLS upstream when you deploy.

---

## How it compares

| Feature          | KasmVNC Docker        | Lume (macOS VM)       | Windows Sandbox        |
| ---------------- | --------------------- | --------------------- | ---------------------- |
| Platform support | macOS, Windows, Linux | macOS (Apple Silicon) | Windows Pro/Enterprise |
| Resource usage   | Low (container)       | Medium (full VM)      | Medium (full VM)       |
| Setup time       | \~30s                 | 2–5 min               | 1–2 min                |
| GUI desktop      | Linux                 | macOS                 | Windows                |
| Web access       | Browser (no client)   | Typically VNC client  | Typically RDP client   |
| Consistency      | Same everywhere       | Hardware-dependent    | OS-dependent           |

**Use KasmVNC Docker when…** you want the **same GUI env across devs/CI/platforms**, you’re doing **RL or end-to-end GUI tests**, or you need **many isolated desktops on one machine**.
**Use alternatives when…** you need native **macOS** (→ Lume) or native **Windows** (→ Windows Sandbox).

---

## Using the Agent Framework (parallel example)

A compact pattern for running multiple desktops and agents side-by-side:

```python
import asyncio
from computer import Computer
from agent import ComputerAgent

# Create multiple computer instances (each gets its own desktop)
computers = []
for i in range(3):
    c = Computer(
        os_type="linux",
        provider_type="docker",
        image="trycua/cua-ubuntu:latest",
        name=f"parallel-desktop-{i}"
    )
    computers.append(c)
    await c.run()

# Pair each desktop with a task
tasks = [
    "open github and search for 'trycua/cua'",
    "open a text editor and write 'hello world'",
    "open the browser and go to google.com",
]

agents = [
    ComputerAgent(model="openrouter/z-ai/glm-4.5v", tools=[c])
    for c in computers
]

async def run_agent(agent, task):
    async for _ in agent.run(task):
        pass

await asyncio.gather(*[run_agent(a, t) for a, t in zip(agents, tasks)])
```

---

## What’s next

We’re polishing a **CLI to push/scale these containers on Cua Cloud**, exploring **GPU acceleration** for in-container inference, and publishing **prebuilt images** for Playwright, Selenium, and friends.

---

## Try it

```python
from computer import Computer
computer = Computer(os_type="linux", provider_type="docker", image="trycua/cua-ubuntu:latest")
await computer.run()
```

---

## Links

- **Docker Provider Docs:** [https://cua.ai/docs/computers/docker](https://cua.ai/docs/computer-sdk/computers#linux-on-docker)
- **KasmVNC:** [https://github.com/kasmtech/KasmVNC](https://github.com/kasmtech/KasmVNC)
- **Container Source:** [https://github.com/trycua/cua/tree/main/libs/kasm](https://github.com/trycua/cua/tree/main/libs/kasm)
- **Computer SDK:** [https://cua.ai/docs/computer-sdk/computers](https://cua.ai/docs/computer-sdk/computers)
- **Discord:** [https://discord.gg/cua-ai](https://discord.gg/cua-ai)

Questions or weird edge cases? Ping us on Discord—we’re curious to see what you build.

```

--------------------------------------------------------------------------------
/libs/python/bench-ui/bench_ui/child.py:
--------------------------------------------------------------------------------

```python
import asyncio
import json
import os
import random
import socket
import sys
import threading
from pathlib import Path
from typing import Optional

import webview
from aiohttp import web


def _get_free_port() -> int:
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        s.bind(("127.0.0.1", 0))
        return s.getsockname()[1]


def _start_http_server(
    window: webview.Window,
    port: int,
    ready_event: threading.Event,
    html_content: str | None = None,
    folder_path: str | None = None,
):
    async def rect_handler(request: web.Request):
        try:
            data = await request.json()
        except Exception:
            return web.json_response({"error": "invalid_json"}, status=400)
        selector = data.get("selector")
        space = data.get("space", "window")
        if not isinstance(selector, str):
            return web.json_response({"error": "selector_required"}, status=400)

        # Ensure window content is loaded
        if not ready_event.is_set():
            # give it a short chance to finish loading
            ready_event.wait(timeout=2.0)
        if not ready_event.is_set():
            return web.json_response({"error": "window_not_ready"}, status=409)

        # Safely embed selector into JS
        selector_js = json.dumps(selector)
        if space == "screen":
            # Compute approximate screen coordinates using window metrics
            js = (
                "(function(){"
                f"const s = {selector_js};"
                "const el = document.querySelector(s);"
                "if(!el){return null;}"
                "const r = el.getBoundingClientRect();"
                "const sx = (window.screenX ?? window.screenLeft ?? 0);"
                "const syRaw = (window.screenY ?? window.screenTop ?? 0);"
                "const frameH = (window.outerHeight - window.innerHeight) || 0;"
                "const sy = syRaw + frameH;"
                "return {x:sx + r.left, y:sy + r.top, width:r.width, height:r.height};"
                "})()"
            )
        else:
            js = (
                "(function(){"
                f"const s = {selector_js};"
                "const el = document.querySelector(s);"
                "if(!el){return null;}"
                "const r = el.getBoundingClientRect();"
                "return {x:r.left,y:r.top,width:r.width,height:r.height};"
                "})()"
            )
        try:
            # Evaluate JS on the target window; this call is thread-safe in pywebview
            result = window.evaluate_js(js)
        except Exception as e:
            return web.json_response({"error": str(e)}, status=500)
        return web.json_response({"rect": result})

    async def eval_handler(request: web.Request):
        try:
            data = await request.json()
        except Exception:
            return web.json_response({"error": "invalid_json"}, status=400)
        code = data.get("javascript") or data.get("code")
        if not isinstance(code, str):
            return web.json_response({"error": "javascript_required"}, status=400)

        if not ready_event.is_set():
            ready_event.wait(timeout=2.0)
        if not ready_event.is_set():
            return web.json_response({"error": "window_not_ready"}, status=409)

        try:
            result = window.evaluate_js(code)
        except Exception as e:
            return web.json_response({"error": str(e)}, status=500)
        return web.json_response({"result": result})

    async def index_handler(request: web.Request):
        if html_content is None:
            return web.json_response({"status": "ok", "message": "bench-ui control server"})
        return web.Response(text=html_content, content_type="text/html")

    app = web.Application()

    # If serving a folder, add static file routes
    if folder_path:
        app.router.add_static("/", folder_path, show_index=True)
    else:
        app.router.add_get("/", index_handler)

    app.router.add_post("/rect", rect_handler)
    app.router.add_post("/eval", eval_handler)

    loop = asyncio.new_event_loop()

    def run_loop():
        asyncio.set_event_loop(loop)
        runner = web.AppRunner(app)
        loop.run_until_complete(runner.setup())
        site = web.TCPSite(runner, "127.0.0.1", port)
        loop.run_until_complete(site.start())
        loop.run_forever()

    t = threading.Thread(target=run_loop, daemon=True)
    t.start()


def main():
    if len(sys.argv) < 2:
        print("Usage: python -m bench_ui.child <config.json>", file=sys.stderr)
        sys.exit(2)

    cfg_path = Path(sys.argv[1])
    cfg = json.loads(cfg_path.read_text(encoding="utf-8"))

    html: Optional[str] = cfg.get("html") or ""
    url: Optional[str] = cfg.get("url")
    folder: Optional[str] = cfg.get("folder")
    title: str = cfg.get("title", "Window")
    x: Optional[int] = cfg.get("x")
    y: Optional[int] = cfg.get("y")
    width: int = int(cfg.get("width", 600))
    height: int = int(cfg.get("height", 400))
    icon: Optional[str] = cfg.get("icon")
    use_inner_size: bool = bool(cfg.get("use_inner_size", False))
    title_bar_style: str = cfg.get("title_bar_style", "default")

    # Choose port early so we can point the window to it when serving inline HTML or folder
    port = _get_free_port()

    # Create window
    if url:
        window = webview.create_window(
            title,
            url=url,
            width=width,
            height=height,
            x=x,
            y=y,
            confirm_close=False,
            text_select=True,
            background_color="#FFFFFF",
        )
        html_for_server = None
        folder_for_server = None
    elif folder:
        # Serve static folder at control server root and point window to index.html
        resolved_url = f"http://127.0.0.1:{port}/index.html"
        window = webview.create_window(
            title,
            url=resolved_url,
            width=width,
            height=height,
            x=x,
            y=y,
            confirm_close=False,
            text_select=True,
            background_color="#FFFFFF",
        )
        html_for_server = None
        folder_for_server = folder
    else:
        # Serve inline HTML at control server root and point window to it
        resolved_url = f"http://127.0.0.1:{port}/"
        window = webview.create_window(
            title,
            url=resolved_url,
            width=width,
            height=height,
            x=x,
            y=y,
            confirm_close=False,
            text_select=True,
            background_color="#FFFFFF",
        )
        html_for_server = html
        folder_for_server = None

    # Track when the page is loaded so JS execution succeeds
    window_ready = threading.Event()

    def _on_loaded():
        window_ready.set()

    window.events.loaded += _on_loaded  # type: ignore[attr-defined]

    # Start HTTP server for control (and optionally serve inline HTML or static folder)
    _start_http_server(
        window, port, window_ready, html_content=html_for_server, folder_path=folder_for_server
    )

    # Print startup info for parent to read
    print(json.dumps({"pid": os.getpid(), "port": port}), flush=True)

    # Start GUI (blocking)
    webview.start(debug=os.environ.get("CUA_BENCH_UI_DEBUG", "false").lower() in ("true", "1"))


if __name__ == "__main__":
    main()

```

--------------------------------------------------------------------------------
/libs/lume/src/Commands/Config.swift:
--------------------------------------------------------------------------------

```swift
import ArgumentParser
import Foundation

struct Config: ParsableCommand {
    static let configuration = CommandConfiguration(
        commandName: "config",
        abstract: "Get or set lume configuration",
        subcommands: [Get.self, Storage.self, Cache.self, Caching.self],
        defaultSubcommand: Get.self
    )

    // MARK: - Basic Configuration Subcommands

    struct Get: ParsableCommand {
        static let configuration = CommandConfiguration(
            commandName: "get",
            abstract: "Get current configuration"
        )

        func run() throws {
            let controller = LumeController()
            let settings = controller.getSettings()

            // Display default location
            print(
                "Default VM storage: \(settings.defaultLocationName) (\(settings.defaultLocation?.path ?? "not set"))"
            )

            // Display cache directory
            print("Cache directory: \(settings.cacheDirectory)")

            // Display caching enabled status
            print("Caching enabled: \(settings.cachingEnabled)")

            // Display all locations
            if !settings.vmLocations.isEmpty {
                print("\nConfigured VM storage locations:")
                for location in settings.sortedLocations {
                    let isDefault = location.name == settings.defaultLocationName
                    let defaultMark = isDefault ? " (default)" : ""
                    print("  - \(location.name): \(location.path)\(defaultMark)")
                }
            }
        }
    }

    // MARK: - Debug Command

    struct Debug: ParsableCommand {
        static let configuration = CommandConfiguration(
            commandName: "debug",
            abstract: "Output detailed debug information about current configuration",
            shouldDisplay: false
        )

        func run() throws {
            let debugInfo = SettingsManager.shared.debugSettings()
            print(debugInfo)
        }
    }

    // MARK: - Caching Management Subcommands

    struct Caching: ParsableCommand {
        static let configuration = CommandConfiguration(
            commandName: "caching",
            abstract: "Manage image caching settings",
            subcommands: [GetCaching.self, SetCaching.self]
        )

        struct GetCaching: ParsableCommand {
            static let configuration = CommandConfiguration(
                commandName: "get",
                abstract: "Show current caching status"
            )

            func run() throws {
                let controller = LumeController()
                let cachingEnabled = controller.isCachingEnabled()
                print("Caching enabled: \(cachingEnabled)")
            }
        }

        struct SetCaching: ParsableCommand {
            static let configuration = CommandConfiguration(
                commandName: "set",
                abstract: "Enable or disable image caching"
            )

            @Argument(help: "Enable or disable caching (true/false)")
            var enabled: Bool

            func run() throws {
                let controller = LumeController()
                try controller.setCachingEnabled(enabled)
                print("Caching \(enabled ? "enabled" : "disabled")")
            }
        }
    }

    // MARK: - Cache Management Subcommands

    struct Cache: ParsableCommand {
        static let configuration = CommandConfiguration(
            commandName: "cache",
            abstract: "Manage cache settings",
            subcommands: [GetCache.self, SetCache.self]
        )

        struct GetCache: ParsableCommand {
            static let configuration = CommandConfiguration(
                commandName: "get",
                abstract: "Get current cache directory"
            )

            func run() throws {
                let controller = LumeController()
                let cacheDir = controller.getCacheDirectory()
                print("Cache directory: \(cacheDir)")
            }
        }

        struct SetCache: ParsableCommand {
            static let configuration = CommandConfiguration(
                commandName: "set",
                abstract: "Set cache directory"
            )

            @Argument(help: "Path to cache directory")
            var path: String

            func run() throws {
                let controller = LumeController()
                try controller.setCacheDirectory(path: path)
                print("Cache directory set to: \(path)")
            }
        }
    }

    // MARK: - Storage Management Subcommands

    struct Storage: ParsableCommand {
        static let configuration = CommandConfiguration(
            commandName: "storage",
            abstract: "Manage VM storage locations",
            subcommands: [Add.self, Remove.self, List.self, Default.self]
        )

        struct Add: ParsableCommand {
            static let configuration = CommandConfiguration(
                commandName: "add",
                abstract: "Add a new VM storage location"
            )

            @Argument(help: "Storage name (alphanumeric with dashes/underscores)")
            var name: String

            @Argument(help: "Path to VM storage directory")
            var path: String

            func run() throws {
                let controller = LumeController()
                try controller.addLocation(name: name, path: path)
                print("Added VM storage location: \(name) at \(path)")
            }
        }

        struct Remove: ParsableCommand {
            static let configuration = CommandConfiguration(
                commandName: "remove",
                abstract: "Remove a VM storage location"
            )

            @Argument(help: "Storage name to remove")
            var name: String

            func run() throws {
                let controller = LumeController()
                try controller.removeLocation(name: name)
                print("Removed VM storage location: \(name)")
            }
        }

        struct List: ParsableCommand {
            static let configuration = CommandConfiguration(
                commandName: "list",
                abstract: "List all VM storage locations"
            )

            func run() throws {
                let controller = LumeController()
                let settings = controller.getSettings()

                if settings.vmLocations.isEmpty {
                    print("No VM storage locations configured")
                    return
                }

                print("VM Storage Locations:")
                for location in settings.sortedLocations {
                    let isDefault = location.name == settings.defaultLocationName
                    let defaultMark = isDefault ? " (default)" : ""
                    print("  - \(location.name): \(location.path)\(defaultMark)")
                }
            }
        }

        struct Default: ParsableCommand {
            static let configuration = CommandConfiguration(
                commandName: "default",
                abstract: "Set the default VM storage location"
            )

            @Argument(help: "Storage name to set as default")
            var name: String

            func run() throws {
                let controller = LumeController()
                try controller.setDefaultLocation(name: name)
                print("Set default VM storage location to: \(name)")
            }
        }
    }
}

```

--------------------------------------------------------------------------------
/libs/python/agent/agent/loops/holo.py:
--------------------------------------------------------------------------------

```python
"""
Holo 1.5 agent loop implementation for click prediction using litellm.acompletion.

Implements the Holo1.5 grounding behavior:
- Prompt asks for absolute pixel coordinates in JSON: {"action":"click_absolute","x":int,"y":int}
- Optionally resizes the image using Qwen2-VL smart_resize parameters (via transformers AutoProcessor)
- If resized, maps predicted coordinates back to the original screenshot resolution

Note: We do NOT manually load the model; acompletions (via HuggingFaceLocalAdapter)
will handle loading based on the provided model name.
"""

from __future__ import annotations

import base64
import json
from io import BytesIO
from typing import Any, Dict, List, Optional, Tuple

import litellm
from PIL import Image

from ..decorators import register_agent
from ..types import AgentCapability
from .base import AsyncAgentConfig


def _strip_hf_prefix(model: str) -> str:
    """Strip provider prefixes like 'huggingface-local/' from model names for HF processor load."""
    if "/" in model and model.lower().startswith("huggingface-local/"):
        return model.split("/", 1)[1]
    return model


def _maybe_smart_resize(image: Image.Image, model: str) -> Tuple[Image.Image, Tuple[int, int]]:
    """
    Try to compute Qwen2-VL smart_resize output size using transformers AutoProcessor.

    Returns (processed_image, (orig_w, orig_h)). If transformers or processor unavailable,
    returns the original image and size without resizing.
    """
    orig_w, orig_h = image.size
    try:
        # Import lazily to avoid hard dependency if not installed
        from transformers import AutoProcessor  # type: ignore
        from transformers.models.qwen2_vl.image_processing_qwen2_vl import (  # type: ignore
            smart_resize,
        )

        processor_name = _strip_hf_prefix(model)
        processor = AutoProcessor.from_pretrained(processor_name)
        image_processor = getattr(processor, "image_processor", None)
        if image_processor is None:
            return image, (orig_w, orig_h)

        factor = getattr(image_processor, "patch_size", 14) * getattr(
            image_processor, "merge_size", 1
        )
        min_pixels = getattr(image_processor, "min_pixels", 256 * 256)
        max_pixels = getattr(image_processor, "max_pixels", 1536 * 1536)

        resized_h, resized_w = smart_resize(
            orig_h,
            orig_w,
            factor=factor,
            min_pixels=min_pixels,
            max_pixels=max_pixels,
        )

        if (resized_w, resized_h) == (orig_w, orig_h):
            return image, (orig_w, orig_h)

        processed = image.resize((resized_w, resized_h), resample=Image.Resampling.LANCZOS)
        return processed, (orig_w, orig_h)
    except Exception:
        # If any failure (no transformers, processor load error), fall back to original
        return image, (orig_w, orig_h)


def _build_holo_prompt(instruction: str) -> str:
    """Construct the Holo1.5 grounding prompt."""
    # Keep it close to the cookbook while avoiding heavy schema generation
    schema_hint = '{"action": "click_absolute", "x": <int>, "y": <int>}'
    return (
        "Localize an element on the GUI image according to the provided target and output a click position. "
        f"You must output a valid JSON following the format: {schema_hint} "
        f"Your target is: {instruction}"
    )


def _parse_click_json(output_text: str) -> Optional[Tuple[int, int]]:
    """
    Parse JSON from model output and extract x, y ints.
    Tries to find the first JSON object substring if extra text is present.
    """
    try:
        # Fast path: direct JSON
        data = json.loads(output_text)
    except Exception:
        # Try to locate a JSON object within the text
        start = output_text.find("{")
        end = output_text.rfind("}")
        if start == -1 or end == -1 or end <= start:
            return None
        try:
            data = json.loads(output_text[start : end + 1])
        except Exception:
            return None

    try:
        x = int(data.get("x"))
        y = int(data.get("y"))
        return x, y
    except Exception:
        return None


@register_agent(models=r"(?i).*(Holo1\.5|Hcompany/Holo1\.5).*")
class HoloConfig(AsyncAgentConfig):
    """Holo is a family of UI grounding models from H Company"""

    async def predict_step(
        self,
        messages: List[Dict[str, Any]],
        model: str,
        tools: Optional[List[Dict[str, Any]]] = None,
        max_retries: Optional[int] = None,
        stream: bool = False,
        computer_handler=None,
        _on_api_start=None,
        _on_api_end=None,
        _on_usage=None,
        _on_screenshot=None,
        **kwargs,
    ) -> Dict[str, Any]:
        # Holo models are only trained on UI localization tasks, not all-in-one agent
        raise NotImplementedError()

    async def predict_click(
        self,
        model: str,
        image_b64: str,
        instruction: str,
        **kwargs,
    ) -> Optional[Tuple[int, int]]:
        """
        Predict click coordinates using Holo1.5 via litellm.acompletion.

        - Optionally smart-resizes the image using Qwen2-VL rules if transformers are available
        - Prompts for JSON with absolute pixel coordinates
        - Parses x,y and maps back to original screenshot size if resized
        """
        try:
            img_bytes = base64.b64decode(image_b64)
            original_img = Image.open(BytesIO(img_bytes))
        except Exception:
            return None

        # Optional preprocessing
        processed_img, (orig_w, orig_h) = _maybe_smart_resize(original_img, model)

        # If we resized, send the resized image; otherwise send original
        img_to_send = processed_img
        buf = BytesIO()
        img_to_send.save(buf, format="PNG")
        processed_b64 = base64.b64encode(buf.getvalue()).decode("utf-8")

        prompt = _build_holo_prompt(instruction)

        messages = [
            {
                "role": "user",
                "content": [
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/png;base64,{processed_b64}"},
                    },
                    {"type": "text", "text": prompt},
                ],
            }
        ]

        api_kwargs = {
            "model": model,
            "messages": messages,
            # Deterministic, small output
            "max_tokens": kwargs.get("max_tokens", 256),
            "temperature": kwargs.get("temperature", 0.0),
        }

        response = await litellm.acompletion(**api_kwargs)
        output_text = (response.choices[0].message.content or "").strip()  # type: ignore

        coords = _parse_click_json(output_text)
        if coords is None:
            return None

        x, y = coords

        # Map back to original size if we resized
        proc_w, proc_h = img_to_send.size
        if (proc_w, proc_h) != (orig_w, orig_h):
            try:
                sx = orig_w / float(proc_w)
                sy = orig_h / float(proc_h)
                x = int(round(x * sx))
                y = int(round(y * sy))
            except Exception:
                # Fallback: clamp within original bounds
                pass

        # Clamp to original image bounds
        x = max(0, min(orig_w - 1, x))
        y = max(0, min(orig_h - 1, y))
        return x, y

    def get_capabilities(self) -> List[AgentCapability]:
        return ["click"]

```

--------------------------------------------------------------------------------
/libs/python/mcp-server/CONCURRENT_SESSIONS.md:
--------------------------------------------------------------------------------

```markdown
# MCP Server Concurrent Session Management

This document describes the improvements made to the MCP Server to address concurrent session management and resource lifecycle issues.

## Problem Statement

The original MCP server implementation had several critical issues:

1. **Global Computer Instance**: Used a single `global_computer` variable shared across all clients
2. **No Resource Isolation**: Multiple clients would interfere with each other
3. **Sequential Task Processing**: Multi-task operations were always sequential
4. **No Graceful Shutdown**: Server couldn't properly cleanup resources on shutdown
5. **Hidden Event Loop**: `server.run()` hid the event loop, preventing proper lifecycle management

## Solution Architecture

### 1. Session Manager (`session_manager.py`)

The `SessionManager` class provides:

- **Per-session computer instances**: Each client gets isolated computer resources
- **Computer instance pooling**: Efficient reuse of computer instances with lifecycle management
- **Task registration**: Track active tasks per session for graceful cleanup
- **Automatic cleanup**: Background task cleans up idle sessions
- **Resource limits**: Configurable maximum concurrent sessions

#### Key Components:

```python
class SessionManager:
    def __init__(self, max_concurrent_sessions: int = 10):
        self._sessions: Dict[str, SessionInfo] = {}
        self._computer_pool = ComputerPool()
        # ... lifecycle management
```

#### Session Lifecycle:

1. **Creation**: New session created when client first connects
2. **Task Registration**: Each task is registered with the session
3. **Activity Tracking**: Last activity time updated on each operation
4. **Cleanup**: Sessions cleaned up when idle or on shutdown

### 2. Computer Pool (`ComputerPool`)

Manages computer instances efficiently:

- **Pool Size Limits**: Maximum number of concurrent computer instances
- **Instance Reuse**: Available instances reused across sessions
- **Lifecycle Management**: Proper startup/shutdown of computer instances
- **Resource Cleanup**: All instances properly closed on shutdown

### 3. Enhanced Server Tools

All server tools now support:

- **Session ID Parameter**: Optional `session_id` for multi-client support
- **Resource Isolation**: Each session gets its own computer instance
- **Task Tracking**: Proper registration/unregistration of tasks
- **Error Handling**: Graceful error handling with session cleanup

#### Updated Tool Signatures:

```python
async def screenshot_cua(ctx: Context, session_id: Optional[str] = None) -> Any:
async def run_cua_task(ctx: Context, task: str, session_id: Optional[str] = None) -> Any:
async def run_multi_cua_tasks(ctx: Context, tasks: List[str], session_id: Optional[str] = None, concurrent: bool = False) -> Any:
```

### 4. Concurrent Task Execution

The `run_multi_cua_tasks` tool now supports:

- **Sequential Mode** (default): Tasks run one after another
- **Concurrent Mode**: Tasks run in parallel using `asyncio.gather()`
- **Progress Tracking**: Proper progress reporting for both modes
- **Error Handling**: Individual task failures don't stop other tasks

### 5. Graceful Shutdown

The server now provides:

- **Signal Handlers**: Proper handling of SIGINT and SIGTERM
- **Session Cleanup**: All active sessions properly cleaned up
- **Resource Release**: Computer instances returned to pool and closed
- **Async Lifecycle**: Event loop properly exposed for cleanup

## Usage Examples

### Basic Usage (Backward Compatible)

```python
# These calls work exactly as before
await screenshot_cua(ctx)
await run_cua_task(ctx, "Open browser")
await run_multi_cua_tasks(ctx, ["Task 1", "Task 2"])
```

### Multi-Client Usage

```python
# Client 1
session_id_1 = "client-1-session"
await screenshot_cua(ctx, session_id_1)
await run_cua_task(ctx, "Open browser", session_id_1)

# Client 2 (completely isolated)
session_id_2 = "client-2-session"
await screenshot_cua(ctx, session_id_2)
await run_cua_task(ctx, "Open editor", session_id_2)
```

### Concurrent Task Execution

```python
# Run tasks concurrently instead of sequentially
tasks = ["Open browser", "Open editor", "Open terminal"]
results = await run_multi_cua_tasks(ctx, tasks, concurrent=True)
```

### Session Management

```python
# Get session statistics
stats = await get_session_stats(ctx)
print(f"Active sessions: {stats['total_sessions']}")

# Cleanup specific session
await cleanup_session(ctx, "session-to-cleanup")
```

## Configuration

### Environment Variables

- `CUA_MODEL_NAME`: Model to use (default: `anthropic/claude-sonnet-4-5-20250929`)
- `CUA_MAX_IMAGES`: Maximum images to keep (default: `3`)

### Session Manager Configuration

```python
# In session_manager.py
class SessionManager:
    def __init__(self, max_concurrent_sessions: int = 10):
        # Configurable maximum concurrent sessions

class ComputerPool:
    def __init__(self, max_size: int = 5, idle_timeout: float = 300.0):
        # Configurable pool size and idle timeout
```

## Performance Improvements

### Before (Issues):

- ❌ Single global computer instance
- ❌ Client interference and resource conflicts
- ❌ Sequential task processing only
- ❌ No graceful shutdown
- ❌ 30s timeout issues with long-running tasks

### After (Benefits):

- ✅ Per-session computer instances with proper isolation
- ✅ Computer instance pooling for efficient resource usage
- ✅ Concurrent task execution support
- ✅ Graceful shutdown with proper cleanup
- ✅ Streaming updates prevent timeout issues
- ✅ Configurable resource limits
- ✅ Automatic session cleanup

## Testing

Comprehensive test coverage includes:

- Session creation and reuse
- Concurrent session isolation
- Task registration and cleanup
- Error handling with session management
- Concurrent vs sequential task execution
- Session statistics and cleanup

Run tests with:

```bash
pytest tests/test_mcp_server_session_management.py -v
```

## Migration Guide

### For Existing Clients

No changes required! The new implementation is fully backward compatible:

```python
# This still works exactly as before
await run_cua_task(ctx, "My task")
```

### For New Multi-Client Applications

Use session IDs for proper isolation:

```python
# Create a unique session ID for each client
session_id = str(uuid.uuid4())
await run_cua_task(ctx, "My task", session_id)
```

### For Concurrent Task Execution

Enable concurrent mode for better performance:

```python
tasks = ["Task 1", "Task 2", "Task 3"]
results = await run_multi_cua_tasks(ctx, tasks, concurrent=True)
```

## Monitoring and Debugging

### Session Statistics

```python
stats = await get_session_stats(ctx)
print(f"Total sessions: {stats['total_sessions']}")
print(f"Max concurrent: {stats['max_concurrent']}")
for session_id, session_info in stats['sessions'].items():
    print(f"Session {session_id}: {session_info['active_tasks']} active tasks")
```

### Logging

The server provides detailed logging for:

- Session creation and cleanup
- Task registration and completion
- Resource pool usage
- Error conditions and recovery

### Graceful Shutdown

The server properly handles shutdown signals:

```bash
# Send SIGTERM for graceful shutdown
kill -TERM <server_pid>

# Or use Ctrl+C (SIGINT)
```

## Future Enhancements

Potential future improvements:

1. **Session Persistence**: Save/restore session state across restarts
2. **Load Balancing**: Distribute sessions across multiple server instances
3. **Resource Monitoring**: Real-time monitoring of resource usage
4. **Auto-scaling**: Dynamic adjustment of pool size based on demand
5. **Session Timeouts**: Configurable timeouts for different session types

```

--------------------------------------------------------------------------------
/blog/human-in-the-loop.md:
--------------------------------------------------------------------------------

```markdown
# When Agents Need Human Wisdom - Introducing Human-In-The-Loop Support

_Published on August 29, 2025 by Francesco Bonacci_

Sometimes the best AI agent is a human. Whether you're creating training demonstrations, evaluating complex scenarios, or need to intervene when automation hits a wall, our new Human-In-The-Loop integration puts you directly in control.

With yesterday's [HUD evaluation integration](hud-agent-evals.md), you could benchmark any agent at scale. Today's update lets you _become_ the agent when it matters most—seamlessly switching between automated intelligence and human judgment.

<div align="center">
  <video src="https://github.com/user-attachments/assets/9091b50f-26e7-4981-95ce-40e5d42a1260" width="600" controls></video>
</div>

## What you get

- **One-line human takeover** for any agent configuration with `human/human` or `model+human/human`
- **Interactive web UI** to see what your agent sees and control what it does
- **Zero context switching** - step in exactly where automation left off
- **Training data generation** - create perfect demonstrations by doing tasks yourself
- **Ground truth evaluation** - validate agent performance with human expertise

## Why Human-In-The-Loop?

Even the most sophisticated agents encounter edge cases, ambiguous interfaces, or tasks requiring human judgment. Rather than failing gracefully, they can now fail _intelligently_—by asking for human help.

This approach bridges the gap between fully automated systems and pure manual control, letting you:

- **Demonstrate complex workflows** that agents can learn from
- **Evaluate tricky scenarios** where ground truth requires human assessment
- **Intervene selectively** when automated agents need guidance
- **Test and debug** your tools and environments manually

## Getting Started

Launch the human agent interface:

```bash
python -m agent.human_tool
```

The web UI will show pending completions. Click any completion to take control of the agent and see exactly what it sees.

## Usage Examples

### Direct Human Control

Perfect for creating demonstrations or when you want full manual control:

```python
from agent import ComputerAgent
from agent.computer import computer

agent = ComputerAgent(
    "human/human",
    tools=[computer]
)

# You'll get full control through the web UI
async for _ in agent.run("Take a screenshot, analyze the UI, and click on the most prominent button"):
    pass
```

### Hybrid: AI Planning + Human Execution

Combine model intelligence with human precision—let AI plan, then execute manually:

```python
agent = ComputerAgent(
    "huggingface-local/HelloKKMe/GTA1-7B+human/human",
    tools=[computer]
)

# AI creates the plan, human executes each step
async for _ in agent.run("Navigate to the settings page and enable dark mode"):
    pass
```

### Fallback Pattern

Start automated, escalate to human when needed:

```python
# Primary automated agent
primary_agent = ComputerAgent("openai/computer-use-preview", tools=[computer])

# Human fallback agent
fallback_agent = ComputerAgent("human/human", tools=[computer])

try:
    async for result in primary_agent.run(task):
        if result.confidence < 0.7:  # Low confidence threshold
            # Seamlessly hand off to human
            async for _ in fallback_agent.run(f"Continue this task: {task}"):
                pass
except Exception:
    # Agent failed, human takes over
    async for _ in fallback_agent.run(f"Handle this failed task: {task}"):
        pass
```

## Interactive Features

The human-in-the-loop interface provides a rich, responsive experience:

### **Visual Environment**

- **Screenshot display** with live updates as you work
- **Click handlers** for direct interaction with UI elements
- **Zoom and pan** to see details clearly

### **Action Controls**

- **Click actions** - precise cursor positioning and clicking
- **Keyboard input** - type text naturally or send specific key combinations
- **Action history** - see the sequence of actions taken
- **Undo support** - step back when needed

### **Tool Integration**

- **Full OpenAI compatibility** - standard tool call format
- **Custom tools** - integrate your own tools seamlessly
- **Real-time feedback** - see tool responses immediately

### **Smart Polling**

- **Responsive updates** - UI refreshes when new completions arrive
- **Background processing** - continue working while waiting for tasks
- **Session persistence** - resume interrupted sessions

## Real-World Use Cases

### **Training Data Generation**

Create perfect demonstrations for fine-tuning:

```python
# Generate training examples for spreadsheet tasks
demo_agent = ComputerAgent("human/human", tools=[computer])

tasks = [
    "Create a budget spreadsheet with income and expense categories",
    "Apply conditional formatting to highlight overbudget items",
    "Generate a pie chart showing expense distribution"
]

for task in tasks:
    # Human demonstrates each task perfectly
    async for _ in demo_agent.run(task):
        pass  # Recorded actions become training data
```

### **Evaluation and Ground Truth**

Validate agent performance on complex scenarios:

```python
# Human evaluates agent performance
evaluator = ComputerAgent("human/human", tools=[computer])

async for _ in evaluator.run("Review this completed form and rate accuracy (1-10)"):
    pass  # Human provides authoritative quality assessment
```

### **Interactive Debugging**

Step through agent behavior manually:

```python
# Test a workflow step by step
debug_agent = ComputerAgent("human/human", tools=[computer])

async for _ in debug_agent.run("Reproduce the agent's failed login sequence"):
    pass  # Human identifies exactly where automation breaks
```

### **Edge Case Handling**

Handle scenarios that break automated agents:

```python
# Complex UI interaction requiring human judgment
edge_case_agent = ComputerAgent("human/human", tools=[computer])

async for _ in edge_case_agent.run("Navigate this CAPTCHA-protected form"):
    pass  # Human handles what automation cannot
```

## Configuration Options

Customize the human agent experience:

- **UI refresh rate**: Adjust polling frequency for your workflow
- **Image quality**: Balance detail vs. performance for screenshots
- **Action logging**: Save detailed traces for analysis and training
- **Session timeout**: Configure idle timeouts for security
- **Tool permissions**: Restrict which tools humans can access

## When to Use Human-In-The-Loop

| **Scenario**                 | **Why Human Control**                                 |
| ---------------------------- | ----------------------------------------------------- |
| **Creating training data**   | Perfect demonstrations for model fine-tuning          |
| **Evaluating complex tasks** | Human judgment for subjective or nuanced assessment   |
| **Handling edge cases**      | CAPTCHAs, unusual UIs, context-dependent decisions    |
| **Debugging workflows**      | Step through failures to identify breaking points     |
| **High-stakes operations**   | Critical tasks requiring human oversight and approval |
| **Testing new environments** | Validate tools and environments work as expected      |

## Learn More

- **Interactive examples**: Try human-in-the-loop control with sample tasks
- **Training data pipelines**: Learn how to convert human demonstrations into model training data
- **Evaluation frameworks**: Build human-validated test suites for your agents
- **API documentation**: Full reference for human agent configuration

Ready to put humans back in the loop? The most sophisticated AI system knows when to ask for help.

---

_Questions about human-in-the-loop agents? Join the conversation in our [Discord community](https://discord.gg/cua-ai) or check out our [documentation](https://cua.ai/docs/agent-sdk/supported-agents/human-in-the-loop)._

```

--------------------------------------------------------------------------------
/libs/python/agent/agent/human_tool/server.py:
--------------------------------------------------------------------------------

```python
import asyncio
import uuid
from dataclasses import asdict, dataclass
from datetime import datetime
from enum import Enum
from typing import Any, Dict, List, Optional

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel


class CompletionStatus(str, Enum):
    PENDING = "pending"
    COMPLETED = "completed"
    FAILED = "failed"


@dataclass
class CompletionCall:
    id: str
    messages: List[Dict[str, Any]]
    model: str
    status: CompletionStatus
    created_at: datetime
    completed_at: Optional[datetime] = None
    response: Optional[str] = None
    tool_calls: Optional[List[Dict[str, Any]]] = None
    error: Optional[str] = None


class ToolCall(BaseModel):
    id: str
    type: str = "function"
    function: Dict[str, Any]


class CompletionRequest(BaseModel):
    messages: List[Dict[str, Any]]
    model: str


class CompletionResponse(BaseModel):
    response: Optional[str] = None
    tool_calls: Optional[List[Dict[str, Any]]] = None


class CompletionQueue:
    def __init__(self):
        self._queue: Dict[str, CompletionCall] = {}
        self._pending_order: List[str] = []
        self._lock = asyncio.Lock()

    async def add_completion(self, messages: List[Dict[str, Any]], model: str) -> str:
        """Add a completion call to the queue."""
        async with self._lock:
            call_id = str(uuid.uuid4())
            completion_call = CompletionCall(
                id=call_id,
                messages=messages,
                model=model,
                status=CompletionStatus.PENDING,
                created_at=datetime.now(),
            )
            self._queue[call_id] = completion_call
            self._pending_order.append(call_id)
            return call_id

    async def get_pending_calls(self) -> List[Dict[str, Any]]:
        """Get all pending completion calls."""
        async with self._lock:
            pending_calls = []
            for call_id in self._pending_order:
                if (
                    call_id in self._queue
                    and self._queue[call_id].status == CompletionStatus.PENDING
                ):
                    call = self._queue[call_id]
                    pending_calls.append(
                        {
                            "id": call.id,
                            "model": call.model,
                            "created_at": call.created_at.isoformat(),
                            "messages": call.messages,
                        }
                    )
            return pending_calls

    async def get_call_status(self, call_id: str) -> Optional[Dict[str, Any]]:
        """Get the status of a specific completion call."""
        async with self._lock:
            if call_id not in self._queue:
                return None

            call = self._queue[call_id]
            result = {
                "id": call.id,
                "status": call.status.value,
                "created_at": call.created_at.isoformat(),
                "model": call.model,
                "messages": call.messages,
            }

            if call.completed_at:
                result["completed_at"] = call.completed_at.isoformat()
            if call.response:
                result["response"] = call.response
            if call.tool_calls:
                result["tool_calls"] = call.tool_calls
            if call.error:
                result["error"] = call.error

            return result

    async def complete_call(
        self,
        call_id: str,
        response: Optional[str] = None,
        tool_calls: Optional[List[Dict[str, Any]]] = None,
    ) -> bool:
        """Mark a completion call as completed with a response or tool calls."""
        async with self._lock:
            if call_id not in self._queue:
                return False

            call = self._queue[call_id]
            if call.status != CompletionStatus.PENDING:
                return False

            call.status = CompletionStatus.COMPLETED
            call.completed_at = datetime.now()
            call.response = response
            call.tool_calls = tool_calls

            # Remove from pending order
            if call_id in self._pending_order:
                self._pending_order.remove(call_id)

            return True

    async def fail_call(self, call_id: str, error: str) -> bool:
        """Mark a completion call as failed with an error."""
        async with self._lock:
            if call_id not in self._queue:
                return False

            call = self._queue[call_id]
            if call.status != CompletionStatus.PENDING:
                return False

            call.status = CompletionStatus.FAILED
            call.completed_at = datetime.now()
            call.error = error

            # Remove from pending order
            if call_id in self._pending_order:
                self._pending_order.remove(call_id)

            return True

    async def wait_for_completion(self, call_id: str, timeout: float = 300.0) -> Optional[str]:
        """Wait for a completion call to be completed and return the response."""
        start_time = asyncio.get_event_loop().time()

        while True:
            status = await self.get_call_status(call_id)
            if not status:
                return None

            if status["status"] == CompletionStatus.COMPLETED.value:
                return status.get("response")
            elif status["status"] == CompletionStatus.FAILED.value:
                raise Exception(f"Completion failed: {status.get('error', 'Unknown error')}")

            # Check timeout
            if asyncio.get_event_loop().time() - start_time > timeout:
                await self.fail_call(call_id, "Timeout waiting for human response")
                raise TimeoutError("Timeout waiting for human response")

            # Wait a bit before checking again
            await asyncio.sleep(0.5)


# Global queue instance
completion_queue = CompletionQueue()

# FastAPI app
app = FastAPI(title="Human Completion Server", version="1.0.0")


@app.post("/queue", response_model=Dict[str, str])
async def queue_completion(request: CompletionRequest):
    """Add a completion request to the queue."""
    call_id = await completion_queue.add_completion(request.messages, request.model)
    return {"id": call_id, "status": "queued"}


@app.get("/pending")
async def list_pending():
    """List all pending completion calls."""
    pending_calls = await completion_queue.get_pending_calls()
    return {"pending_calls": pending_calls}


@app.get("/status/{call_id}")
async def get_status(call_id: str):
    """Get the status of a specific completion call."""
    status = await completion_queue.get_call_status(call_id)
    if not status:
        raise HTTPException(status_code=404, detail="Completion call not found")
    return status


@app.post("/complete/{call_id}")
async def complete_call(call_id: str, response: CompletionResponse):
    """Complete a call with a human response."""
    success = await completion_queue.complete_call(
        call_id, response=response.response, tool_calls=response.tool_calls
    )
    if success:
        return {"status": "success", "message": "Call completed"}
    else:
        raise HTTPException(status_code=404, detail="Call not found or already completed")


@app.post("/fail/{call_id}")
async def fail_call(call_id: str, error: Dict[str, str]):
    """Mark a call as failed."""
    success = await completion_queue.fail_call(call_id, error.get("error", "Unknown error"))
    if not success:
        raise HTTPException(
            status_code=404, detail="Completion call not found or already completed"
        )
    return {"status": "failed"}


@app.get("/")
async def root():
    """Root endpoint."""
    return {"message": "Human Completion Server is running"}


if __name__ == "__main__":
    import uvicorn

    uvicorn.run(app, host="0.0.0.0", port=8002)

```

--------------------------------------------------------------------------------
/libs/python/agent/agent/computers/custom.py:
--------------------------------------------------------------------------------

```python
"""
Custom computer handler implementation that accepts a dictionary of functions.
"""

import base64
import io
from typing import Any, Callable, Dict, List, Literal, Optional, Union

from PIL import Image

from .base import AsyncComputerHandler


class CustomComputerHandler(AsyncComputerHandler):
    """Computer handler that implements the Computer protocol using a dictionary of custom functions."""

    def __init__(self, functions: Dict[str, Callable]):
        """
        Initialize with a dictionary of functions.

        Args:
            functions: Dictionary where keys are method names and values are callable functions.
                      Only 'screenshot' is required, all others are optional.

        Raises:
            ValueError: If required 'screenshot' function is not provided.
        """
        if "screenshot" not in functions:
            raise ValueError("'screenshot' function is required in functions dictionary")

        self.functions = functions
        self._last_screenshot_size: Optional[tuple[int, int]] = None

    async def _call_function(self, func, *args, **kwargs):
        """
        Call a function, handling both async and sync functions.

        Args:
            func: The function to call
            *args: Positional arguments to pass to the function
            **kwargs: Keyword arguments to pass to the function

        Returns:
            The result of the function call
        """
        import asyncio
        import inspect

        if callable(func):
            if inspect.iscoroutinefunction(func):
                return await func(*args, **kwargs)
            else:
                return func(*args, **kwargs)
        else:
            return func

    async def _get_value(self, attribute: str):
        """
        Get value for an attribute, checking both 'get_{attribute}' and '{attribute}' keys.

        Args:
            attribute: The attribute name to look for

        Returns:
            The value from the functions dict, called if callable, returned directly if not
        """
        # Check for 'get_{attribute}' first
        get_key = f"get_{attribute}"
        if get_key in self.functions:
            return await self._call_function(self.functions[get_key])

        # Check for '{attribute}'
        if attribute in self.functions:
            return await self._call_function(self.functions[attribute])

        return None

    def _to_b64_str(self, img: Union[bytes, Image.Image, str]) -> str:
        """
        Convert image to base64 string.

        Args:
            img: Image as bytes, PIL Image, or base64 string

        Returns:
            str: Base64 encoded image string
        """
        if isinstance(img, str):
            # Already a base64 string
            return img
        elif isinstance(img, bytes):
            # Raw bytes
            return base64.b64encode(img).decode("utf-8")
        elif isinstance(img, Image.Image):
            # PIL Image
            buffer = io.BytesIO()
            img.save(buffer, format="PNG")
            return base64.b64encode(buffer.getvalue()).decode("utf-8")
        else:
            raise ValueError(f"Unsupported image type: {type(img)}")

    # ==== Computer-Use-Preview Action Space ====

    async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]:
        """Get the current environment type."""
        result = await self._get_value("environment")
        if result is None:
            return "linux"
        assert result in ["windows", "mac", "linux", "browser"]
        return result  # type: ignore

    async def get_dimensions(self) -> tuple[int, int]:
        """Get screen dimensions as (width, height)."""
        result = await self._get_value("dimensions")
        if result is not None:
            return result  # type: ignore

        # Fallback: use last screenshot size if available
        if not self._last_screenshot_size:
            await self.screenshot()
        assert self._last_screenshot_size is not None, "Failed to get screenshot size"

        return self._last_screenshot_size

    async def screenshot(self, text: Optional[str] = None) -> str:
        """Take a screenshot and return as base64 string.

        Args:
            text: Optional descriptive text (for compatibility with GPT-4o models, ignored)
        """
        result = await self._call_function(self.functions["screenshot"])
        b64_str = self._to_b64_str(result)  # type: ignore

        # Try to extract dimensions for fallback use
        try:
            if isinstance(result, Image.Image):
                self._last_screenshot_size = result.size
            elif isinstance(result, bytes):
                # Try to decode bytes to get dimensions
                img = Image.open(io.BytesIO(result))
                self._last_screenshot_size = img.size
        except Exception:
            # If we can't get dimensions, that's okay
            pass

        return b64_str

    async def click(self, x: int, y: int, button: str = "left") -> None:
        """Click at coordinates with specified button."""
        if "click" in self.functions:
            await self._call_function(self.functions["click"], x, y, button)
        # No-op if not implemented

    async def double_click(self, x: int, y: int) -> None:
        """Double click at coordinates."""
        if "double_click" in self.functions:
            await self._call_function(self.functions["double_click"], x, y)
        # No-op if not implemented

    async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None:
        """Scroll at coordinates with specified scroll amounts."""
        if "scroll" in self.functions:
            await self._call_function(self.functions["scroll"], x, y, scroll_x, scroll_y)
        # No-op if not implemented

    async def type(self, text: str) -> None:
        """Type text."""
        if "type" in self.functions:
            await self._call_function(self.functions["type"], text)
        # No-op if not implemented

    async def wait(self, ms: int = 1000) -> None:
        """Wait for specified milliseconds."""
        if "wait" in self.functions:
            await self._call_function(self.functions["wait"], ms)
        else:
            # Default implementation
            import asyncio

            await asyncio.sleep(ms / 1000.0)

    async def move(self, x: int, y: int) -> None:
        """Move cursor to coordinates."""
        if "move" in self.functions:
            await self._call_function(self.functions["move"], x, y)
        # No-op if not implemented

    async def keypress(self, keys: Union[List[str], str]) -> None:
        """Press key combination."""
        if "keypress" in self.functions:
            await self._call_function(self.functions["keypress"], keys)
        # No-op if not implemented

    async def drag(self, path: List[Dict[str, int]]) -> None:
        """Drag along specified path."""
        if "drag" in self.functions:
            await self._call_function(self.functions["drag"], path)
        # No-op if not implemented

    async def get_current_url(self) -> str:
        """Get current URL (for browser environments)."""
        if "get_current_url" in self.functions:
            return await self._get_value("current_url")  # type: ignore
        return ""  # Default fallback

    async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
        """Left mouse down at coordinates."""
        if "left_mouse_down" in self.functions:
            await self._call_function(self.functions["left_mouse_down"], x, y)
        # No-op if not implemented

    async def left_mouse_up(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
        """Left mouse up at coordinates."""
        if "left_mouse_up" in self.functions:
            await self._call_function(self.functions["left_mouse_up"], x, y)
        # No-op if not implemented

```

--------------------------------------------------------------------------------
/libs/typescript/core/src/telemetry/clients/posthog.ts:
--------------------------------------------------------------------------------

```typescript
/**
 * Telemetry client using PostHog for collecting anonymous usage data.
 */

import * as fs from 'node:fs';
import * as os from 'node:os';
import * as path from 'node:path';
import { pino } from 'pino';
import { PostHog } from 'posthog-node';
import { v4 as uuidv4 } from 'uuid';

// Controls how frequently telemetry will be sent (percentage)
export const TELEMETRY_SAMPLE_RATE = 100; // 100% sampling rate

// Public PostHog config for anonymous telemetry
// These values are intentionally public and meant for anonymous telemetry only
// https://posthog.com/docs/product-analytics/troubleshooting#is-it-ok-for-my-api-key-to-be-exposed-and-public
export const PUBLIC_POSTHOG_API_KEY = 'phc_eSkLnbLxsnYFaXksif1ksbrNzYlJShr35miFLDppF14';
export const PUBLIC_POSTHOG_HOST = 'https://eu.i.posthog.com';

export class PostHogTelemetryClient {
  private config: {
    enabled: boolean;
    sampleRate: number;
    posthog: { apiKey: string; host: string };
  };
  private installationId: string;
  private initialized = false;
  private queuedEvents: {
    name: string;
    properties: Record<string, unknown>;
    timestamp: number;
  }[] = [];
  private startTime: number; // seconds
  private posthogClient?: PostHog;
  private counters: Record<string, number> = {};

  private logger = pino({ name: 'core.telemetry' });

  constructor() {
    // set up config
    this.config = {
      enabled: true,
      sampleRate: TELEMETRY_SAMPLE_RATE,
      posthog: { apiKey: PUBLIC_POSTHOG_API_KEY, host: PUBLIC_POSTHOG_HOST },
    };
    // Check CUA_TELEMETRY_ENABLED environment variable (defaults to enabled)
    const telemetryEnabled = ['1', 'true', 'yes', 'on'].includes(
      process.env.CUA_TELEMETRY_ENABLED?.toLowerCase() || 'true'
    );

    this.config.enabled = telemetryEnabled;
    this.config.sampleRate = Number.parseFloat(
      process.env.CUA_TELEMETRY_SAMPLE_RATE || String(TELEMETRY_SAMPLE_RATE)
    );
    // init client
    this.installationId = this._getOrCreateInstallationId();
    this.startTime = Date.now() / 1000; // Convert to seconds

    // Log telemetry status on startup
    if (this.config.enabled) {
      this.logger.info(`Telemetry enabled (sampling at ${this.config.sampleRate}%)`);
      // Initialize PostHog client if config is available
      this._initializePosthog();
    } else {
      this.logger.info('Telemetry disabled');
    }
  }

  /**
   * Get or create a random installation ID.
   * This ID is not tied to any personal information.
   */
  private _getOrCreateInstallationId(): string {
    const homeDir = os.homedir();
    const idFile = path.join(homeDir, '.cua', 'installation_id');

    try {
      if (fs.existsSync(idFile)) {
        return fs.readFileSync(idFile, 'utf-8').trim();
      }
    } catch (error) {
      this.logger.debug(`Failed to read installation ID: ${error}`);
    }

    // Create new ID if not exists
    const newId = uuidv4();
    try {
      const dir = path.dirname(idFile);
      if (!fs.existsSync(dir)) {
        fs.mkdirSync(dir, { recursive: true });
      }
      fs.writeFileSync(idFile, newId);
      return newId;
    } catch (error) {
      this.logger.debug(`Failed to write installation ID: ${error}`);
    }

    // Fallback to in-memory ID if file operations fail
    return newId;
  }

  /**
   * Initialize the PostHog client with configuration.
   */
  private _initializePosthog(): boolean {
    if (this.initialized) {
      return true;
    }

    try {
      this.posthogClient = new PostHog(this.config.posthog.apiKey, {
        host: this.config.posthog.host,
        flushAt: 20, // Number of events to batch before sending
        flushInterval: 30000, // Send events every 30 seconds
      });
      this.initialized = true;
      this.logger.debug('PostHog client initialized successfully');

      // Process any queued events
      this._processQueuedEvents();
      return true;
    } catch (error) {
      this.logger.error(`Failed to initialize PostHog client: ${error}`);
      return false;
    }
  }

  /**
   * Process any events that were queued before initialization.
   */
  private _processQueuedEvents(): void {
    if (!this.posthogClient || this.queuedEvents.length === 0) {
      return;
    }

    for (const event of this.queuedEvents) {
      this._captureEvent(event.name, event.properties);
    }
    this.queuedEvents = [];
  }

  /**
   * Capture an event with PostHog.
   */
  private _captureEvent(eventName: string, properties?: Record<string, unknown>): void {
    if (!this.posthogClient) {
      return;
    }

    try {
      // Add standard properties
      const eventProperties = {
        ...properties,
        version: process.env.npm_package_version || 'unknown',
        platform: process.platform,
        node_version: process.version,
        is_ci: this._isCI,
      };

      this.posthogClient.capture({
        distinctId: this.installationId,
        event: eventName,
        properties: eventProperties,
      });
    } catch (error) {
      this.logger.debug(`Failed to capture event: ${error}`);
    }
  }

  private get _isCI(): boolean {
    /**
     * Detect if running in CI environment.
     */
    return !!(
      process.env.CI ||
      process.env.CONTINUOUS_INTEGRATION ||
      process.env.GITHUB_ACTIONS ||
      process.env.GITLAB_CI ||
      process.env.CIRCLECI ||
      process.env.TRAVIS ||
      process.env.JENKINS_URL
    );
  }

  increment(counterName: string, value = 1) {
    /**
     * Increment a named counter.
     */
    if (!this.config.enabled) {
      return;
    }

    if (!(counterName in this.counters)) {
      this.counters[counterName] = 0;
    }
    this.counters[counterName] += value;
  }

  recordEvent(eventName: string, properties?: Record<string, unknown>): void {
    /**
     * Record an event with optional properties.
     */
    if (!this.config.enabled) {
      return;
    }

    // Increment counter for this event type
    const counterKey = `event:${eventName}`;
    this.increment(counterKey);

    // Apply sampling
    if (Math.random() * 100 > this.config.sampleRate) {
      return;
    }

    const event = {
      name: eventName,
      properties: properties || {},
      timestamp: Date.now() / 1000,
    };

    if (this.initialized && this.posthogClient) {
      this._captureEvent(eventName, properties);
    } else {
      // Queue event if not initialized
      this.queuedEvents.push(event);
      // Try to initialize again
      if (this.config.enabled && !this.initialized) {
        this._initializePosthog();
      }
    }
  }

  /**
   * Flush any pending events to PostHog.
   */
  async flush(): Promise<boolean> {
    if (!this.config.enabled || !this.posthogClient) {
      return false;
    }

    try {
      // Send counter data as a single event
      if (Object.keys(this.counters).length > 0) {
        this._captureEvent('telemetry_counters', {
          counters: { ...this.counters },
          duration: Date.now() / 1000 - this.startTime,
        });
      }

      await this.posthogClient.flush();
      this.logger.debug('Telemetry flushed successfully');

      // Clear counters after sending
      this.counters = {};
      return true;
    } catch (error) {
      this.logger.debug(`Failed to flush telemetry: ${error}`);
      return false;
    }
  }

  enable(): void {
    /**
     * Enable telemetry collection.
     */
    this.config.enabled = true;
    this.logger.info('Telemetry enabled');
    if (!this.initialized) {
      this._initializePosthog();
    }
  }

  async disable(): Promise<void> {
    /**
     * Disable telemetry collection.
     */
    this.config.enabled = false;
    await this.posthogClient?.disable();
    this.logger.info('Telemetry disabled');
  }

  get enabled(): boolean {
    /**
     * Check if telemetry is enabled.
     */
    return this.config.enabled;
  }

  async shutdown(): Promise<void> {
    /**
     * Shutdown the telemetry client and flush any pending events.
     */
    if (this.posthogClient) {
      await this.flush();
      await this.posthogClient.shutdown();
      this.initialized = false;
      this.posthogClient = undefined;
    }
  }
}

```

--------------------------------------------------------------------------------
/docs/src/components/editable-code-block.tsx:
--------------------------------------------------------------------------------

```typescript
'use client';

import React, { createContext, useContext, useState, ReactNode } from 'react';
import * as Base from 'fumadocs-ui/components/codeblock';
import { cn } from 'fumadocs-ui/utils/cn';

/**
 * Context for managing editable values within code blocks
 */
interface EditableCodeContextValue {
  values: Record<string, string>;
  updateValue: (key: string, value: string) => void;
}

const EditableCodeContext = createContext<EditableCodeContextValue | null>(null);

/**
 * Hook to access the editable code context
 */
function useEditableCode() {
  const context = useContext(EditableCodeContext);
  if (!context) {
    throw new Error('useEditableCode must be used within EditableCodeBlock');
  }
  return context;
}

/**
 * Props for EditableCodeBlock component
 */
interface EditableCodeBlockProps {
  /** Programming language for styling */
  lang?: string;
  /** Initial values for placeholders */
  defaultValues?: Record<string, string>;
  /** Code content with embedded EditableValue components */
  children: ReactNode;
  /** Additional CSS classes */
  className?: string;
  /** Title for the code block */
  title?: string;
}

/**
 * Code block component that supports inline editable values
 * Uses fumadocs-ui styling with interactive input fields
 */
export function EditableCodeBlock({
  lang = 'python',
  defaultValues = {},
  children,
  className,
  title,
}: EditableCodeBlockProps) {
  const [values, setValues] = useState<Record<string, string>>(defaultValues);

  const updateValue = (key: string, value: string) => {
    setValues((prev) => ({ ...prev, [key]: value }));
  };

  return (
    <EditableCodeContext.Provider value={{ values, updateValue }}>
      <Base.CodeBlock title={title} className={cn('my-4', className)}>
        <Base.Pre className={cn(`language-${lang}`, 'px-3')}>
          <code
            className={cn(`language-${lang}`)}
            style={{ display: 'block', whiteSpace: 'pre-wrap' }}
          >
            {children}
          </code>
        </Base.Pre>
      </Base.CodeBlock>
    </EditableCodeContext.Provider>
  );
}

/**
 * Props for EditableValue component
 */
interface EditableValueProps {
  /** Unique identifier for this value */
  placeholder: string;
  /** Display width in characters (default: auto) */
  width?: number;
  /** Optional default value */
  defaultValue?: string;
  /** Input type */
  type?: 'text' | 'password';
}

/**
 * Inline editable input that blends with code styling
 * Appears as an underlined, hoverable value within code
 */
export function EditableValue({
  placeholder,
  width: explicitWidth,
  defaultValue = '',
  type = 'text',
}: EditableValueProps) {
  const { values, updateValue } = useEditableCode();
  const value = values[placeholder] ?? defaultValue;
  const spanRef = React.useRef<HTMLSpanElement>(null);
  const placeholderSpanRef = React.useRef<HTMLSpanElement>(null);
  const inputRef = React.useRef<HTMLInputElement>(null);
  const [measuredWidth, setMeasuredWidth] = React.useState(0);
  const [placeholderWidth, setPlaceholderWidth] = React.useState(0);
  const [isHovered, setIsHovered] = React.useState(false);
  const [tooltipPosition, setTooltipPosition] = React.useState({ top: 0, left: 0 });
  const [isVisible, setIsVisible] = React.useState(false);

  // Observe visibility changes to trigger remeasurement
  React.useEffect(() => {
    if (!inputRef.current) return;

    const observer = new IntersectionObserver(
      (entries) => {
        entries.forEach((entry) => {
          setIsVisible(entry.isIntersecting);
        });
      },
      { threshold: 0.01 }
    );

    observer.observe(inputRef.current);

    return () => {
      observer.disconnect();
    };
  }, []);

  // Measure the actual text width using a hidden span
  React.useEffect(() => {
    if (spanRef.current && isVisible) {
      setMeasuredWidth(spanRef.current.offsetWidth);
    }
  }, [value, isVisible]);

  // Measure placeholder width when visible
  React.useEffect(() => {
    if (placeholderSpanRef.current && isVisible) {
      setPlaceholderWidth(placeholderSpanRef.current.offsetWidth);
    }
  }, [placeholder, isVisible]);

  // Update tooltip position when hovered
  React.useEffect(() => {
    if (isHovered && inputRef.current) {
      const rect = inputRef.current.getBoundingClientRect();
      setTooltipPosition({
        top: rect.top - 28,
        left: rect.left + rect.width / 2,
      });
    }
  }, [isHovered]);

  const inputWidth = explicitWidth
    ? `${explicitWidth}ch`
    : `${Math.max(placeholderWidth, measuredWidth, 80)}px`;

  return (
    <span
      style={{ display: 'inline', whiteSpace: 'nowrap', position: 'relative' }}
      onMouseEnter={() => setIsHovered(true)}
      onMouseLeave={() => setIsHovered(false)}
    >
      {/* Hidden span to measure current value width */}
      <span
        ref={spanRef}
        style={{
          position: 'absolute',
          visibility: 'hidden',
          whiteSpace: 'pre',
          fontFamily: 'inherit',
          pointerEvents: 'none',
        }}
        aria-hidden="true"
      >
        {value}
      </span>

      {/* Hidden span to measure placeholder width */}
      <span
        ref={placeholderSpanRef}
        style={{
          position: 'absolute',
          visibility: 'hidden',
          whiteSpace: 'pre',
          fontFamily: 'inherit',
          pointerEvents: 'none',
        }}
        aria-hidden="true"
      >
        {placeholder}
      </span>

      {/* Tooltip */}
      <span
        style={{
          position: 'fixed',
          top: tooltipPosition.top,
          left: tooltipPosition.left,
          transform: 'translateX(-50%)',
          padding: '4px 8px',
          backgroundColor: 'rgba(0, 0, 0, 0.8)',
          color: 'white',
          fontSize: '12px',
          borderRadius: '4px',
          whiteSpace: 'nowrap',
          pointerEvents: 'none',
          opacity: isHovered ? 1 : 0,
          transition: 'opacity 0.2s ease-in-out',
          zIndex: 9999,
        }}
      >
        Edit me!
      </span>

      <input
        ref={inputRef}
        type={type}
        value={value}
        onChange={(e) => updateValue(placeholder, e.target.value)}
        placeholder={placeholder}
        className={cn(type === 'password' && value && 'text-security-disc')}
        style={{
          display: 'inline',
          width: inputWidth,
          verticalAlign: 'baseline',
          lineHeight: 'inherit',
          fontSize: 'inherit',
          fontFamily: 'inherit',
          height: 'auto',
          padding: 0,
          margin: 0,
          background: 'transparent',
          border: 'none',
          borderBottom: '2px dashed rgba(96, 165, 250, 0.5)',
          outline: 'none',
          color: 'inherit',
          transition: 'border-bottom-color 0.2s ease-in-out',
        }}
      />
    </span>
  );
}

/**
 * Container for form inputs outside the code block
 */
export function EditableForm({
  children,
  className = '',
}: {
  children: ReactNode;
  className?: string;
}) {
  return (
    <div
      className={cn(
        'p-4 border rounded-lg bg-fd-secondary/50 dark:bg-fd-secondary/30 mb-6',
        className
      )}
    >
      <h3 className="text-lg font-semibold mb-4">Configuration</h3>
      {children}
    </div>
  );
}

/**
 * Form input for editing values outside code block
 */
interface EditableInputProps {
  /** Placeholder key to bind to */
  placeholder: string;
  /** Label text */
  label: string;
  /** Input type */
  type?: 'text' | 'email' | 'password';
  /** Custom class name */
  className?: string;
}

export function EditableInput({
  placeholder,
  label,
  type = 'text',
  className = '',
}: EditableInputProps) {
  const { values, updateValue } = useEditableCode();
  const value = values[placeholder] || '';

  return (
    <div className={cn('mb-4', className)}>
      <label className="block text-sm font-medium mb-2">{label}</label>
      <input
        type={type}
        value={value}
        onChange={(e) => updateValue(placeholder, e.target.value)}
        placeholder={placeholder}
        className={cn(
          'w-full px-3 py-2 border rounded-md',
          'focus:outline-none focus:ring-2 focus:ring-blue-500',
          'bg-fd-background border-fd-border'
        )}
      />
    </div>
  );
}

```
Page 7/20FirstPrevNextLast