#
tokens: 48425/50000 15/616 files (page 10/28)
lines: on (toggle) GitHub
raw markdown copy reset
This is page 10 of 28. Use http://codebase.md/trycua/cua?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .cursorignore
├── .dockerignore
├── .editorconfig
├── .gitattributes
├── .github
│   ├── FUNDING.yml
│   ├── scripts
│   │   ├── get_pyproject_version.py
│   │   └── tests
│   │       ├── __init__.py
│   │       ├── README.md
│   │       └── test_get_pyproject_version.py
│   └── workflows
│       ├── bump-version.yml
│       ├── ci-lume.yml
│       ├── docker-publish-cua-linux.yml
│       ├── docker-publish-cua-windows.yml
│       ├── docker-publish-kasm.yml
│       ├── docker-publish-xfce.yml
│       ├── docker-reusable-publish.yml
│       ├── link-check.yml
│       ├── lint.yml
│       ├── npm-publish-cli.yml
│       ├── npm-publish-computer.yml
│       ├── npm-publish-core.yml
│       ├── publish-lume.yml
│       ├── pypi-publish-agent.yml
│       ├── pypi-publish-computer-server.yml
│       ├── pypi-publish-computer.yml
│       ├── pypi-publish-core.yml
│       ├── pypi-publish-mcp-server.yml
│       ├── pypi-publish-som.yml
│       ├── pypi-reusable-publish.yml
│       ├── python-tests.yml
│       ├── test-cua-models.yml
│       └── test-validation-script.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .prettierignore
├── .prettierrc.yaml
├── .vscode
│   ├── docs.code-workspace
│   ├── extensions.json
│   ├── launch.json
│   ├── libs-ts.code-workspace
│   ├── lume.code-workspace
│   ├── lumier.code-workspace
│   ├── py.code-workspace
│   └── settings.json
├── blog
│   ├── app-use.md
│   ├── assets
│   │   ├── composite-agents.png
│   │   ├── docker-ubuntu-support.png
│   │   ├── hack-booth.png
│   │   ├── hack-closing-ceremony.jpg
│   │   ├── hack-cua-ollama-hud.jpeg
│   │   ├── hack-leaderboard.png
│   │   ├── hack-the-north.png
│   │   ├── hack-winners.jpeg
│   │   ├── hack-workshop.jpeg
│   │   ├── hud-agent-evals.png
│   │   └── trajectory-viewer.jpeg
│   ├── bringing-computer-use-to-the-web.md
│   ├── build-your-own-operator-on-macos-1.md
│   ├── build-your-own-operator-on-macos-2.md
│   ├── cloud-windows-ga-macos-preview.md
│   ├── composite-agents.md
│   ├── computer-use-agents-for-growth-hacking.md
│   ├── cua-hackathon.md
│   ├── cua-playground-preview.md
│   ├── cua-vlm-router.md
│   ├── hack-the-north.md
│   ├── hud-agent-evals.md
│   ├── human-in-the-loop.md
│   ├── introducing-cua-cli.md
│   ├── introducing-cua-cloud-containers.md
│   ├── lume-to-containerization.md
│   ├── neurips-2025-cua-papers.md
│   ├── sandboxed-python-execution.md
│   ├── training-computer-use-models-trajectories-1.md
│   ├── trajectory-viewer.md
│   ├── ubuntu-docker-support.md
│   └── windows-sandbox.md
├── CONTRIBUTING.md
├── Development.md
├── Dockerfile
├── docs
│   ├── .env.example
│   ├── .gitignore
│   ├── content
│   │   └── docs
│   │       ├── agent-sdk
│   │       │   ├── agent-loops.mdx
│   │       │   ├── benchmarks
│   │       │   │   ├── index.mdx
│   │       │   │   ├── interactive.mdx
│   │       │   │   ├── introduction.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── osworld-verified.mdx
│   │       │   │   ├── screenspot-pro.mdx
│   │       │   │   └── screenspot-v2.mdx
│   │       │   ├── callbacks
│   │       │   │   ├── agent-lifecycle.mdx
│   │       │   │   ├── cost-saving.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── logging.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── pii-anonymization.mdx
│   │       │   │   └── trajectories.mdx
│   │       │   ├── chat-history.mdx
│   │       │   ├── custom-tools.mdx
│   │       │   ├── customizing-computeragent.mdx
│   │       │   ├── integrations
│   │       │   │   ├── hud.mdx
│   │       │   │   ├── meta.json
│   │       │   │   └── observability.mdx
│   │       │   ├── mcp-server
│   │       │   │   ├── client-integrations.mdx
│   │       │   │   ├── configuration.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   ├── llm-integrations.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── tools.mdx
│   │       │   │   └── usage.mdx
│   │       │   ├── message-format.mdx
│   │       │   ├── meta.json
│   │       │   ├── migration-guide.mdx
│   │       │   ├── prompt-caching.mdx
│   │       │   ├── supported-agents
│   │       │   │   ├── composed-agents.mdx
│   │       │   │   ├── computer-use-agents.mdx
│   │       │   │   ├── grounding-models.mdx
│   │       │   │   ├── human-in-the-loop.mdx
│   │       │   │   └── meta.json
│   │       │   ├── supported-model-providers
│   │       │   │   ├── cua-vlm-router.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   └── local-models.mdx
│   │       │   ├── telemetry.mdx
│   │       │   └── usage-tracking.mdx
│   │       ├── cli-playbook
│   │       │   ├── commands.mdx
│   │       │   ├── index.mdx
│   │       │   └── meta.json
│   │       ├── computer-sdk
│   │       │   ├── cloud-vm-management.mdx
│   │       │   ├── commands.mdx
│   │       │   ├── computer-server
│   │       │   │   ├── Commands.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── REST-API.mdx
│   │       │   │   └── WebSocket-API.mdx
│   │       │   ├── computer-ui.mdx
│   │       │   ├── computers.mdx
│   │       │   ├── custom-computer-handlers.mdx
│   │       │   ├── meta.json
│   │       │   ├── sandboxed-python.mdx
│   │       │   └── tracing-api.mdx
│   │       ├── example-usecases
│   │       │   ├── form-filling.mdx
│   │       │   ├── gemini-complex-ui-navigation.mdx
│   │       │   ├── meta.json
│   │       │   ├── post-event-contact-export.mdx
│   │       │   └── windows-app-behind-vpn.mdx
│   │       ├── get-started
│   │       │   ├── meta.json
│   │       │   └── quickstart.mdx
│   │       ├── index.mdx
│   │       ├── macos-vm-cli-playbook
│   │       │   ├── lume
│   │       │   │   ├── cli-reference.mdx
│   │       │   │   ├── faq.md
│   │       │   │   ├── http-api.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   ├── meta.json
│   │       │   │   └── prebuilt-images.mdx
│   │       │   ├── lumier
│   │       │   │   ├── building-lumier.mdx
│   │       │   │   ├── docker-compose.mdx
│   │       │   │   ├── docker.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   └── meta.json
│   │       │   └── meta.json
│   │       └── meta.json
│   ├── next.config.mjs
│   ├── package-lock.json
│   ├── package.json
│   ├── pnpm-lock.yaml
│   ├── postcss.config.mjs
│   ├── public
│   │   └── img
│   │       ├── agent_gradio_ui.png
│   │       ├── agent.png
│   │       ├── bg-dark.jpg
│   │       ├── bg-light.jpg
│   │       ├── cli.png
│   │       ├── computer.png
│   │       ├── grounding-with-gemini3.gif
│   │       ├── hero.png
│   │       ├── laminar_trace_example.png
│   │       ├── som_box_threshold.png
│   │       └── som_iou_threshold.png
│   ├── README.md
│   ├── source.config.ts
│   ├── src
│   │   ├── app
│   │   │   ├── (home)
│   │   │   │   ├── [[...slug]]
│   │   │   │   │   └── page.tsx
│   │   │   │   └── layout.tsx
│   │   │   ├── api
│   │   │   │   ├── posthog
│   │   │   │   │   └── [...path]
│   │   │   │   │       └── route.ts
│   │   │   │   └── search
│   │   │   │       └── route.ts
│   │   │   ├── favicon.ico
│   │   │   ├── global.css
│   │   │   ├── layout.config.tsx
│   │   │   ├── layout.tsx
│   │   │   ├── llms.mdx
│   │   │   │   └── [[...slug]]
│   │   │   │       └── route.ts
│   │   │   ├── llms.txt
│   │   │   │   └── route.ts
│   │   │   ├── robots.ts
│   │   │   └── sitemap.ts
│   │   ├── assets
│   │   │   ├── discord-black.svg
│   │   │   ├── discord-white.svg
│   │   │   ├── logo-black.svg
│   │   │   └── logo-white.svg
│   │   ├── components
│   │   │   ├── analytics-tracker.tsx
│   │   │   ├── cookie-consent.tsx
│   │   │   ├── doc-actions-menu.tsx
│   │   │   ├── editable-code-block.tsx
│   │   │   ├── footer.tsx
│   │   │   ├── hero.tsx
│   │   │   ├── iou.tsx
│   │   │   ├── mermaid.tsx
│   │   │   └── page-feedback.tsx
│   │   ├── lib
│   │   │   ├── llms.ts
│   │   │   └── source.ts
│   │   ├── mdx-components.tsx
│   │   └── providers
│   │       └── posthog-provider.tsx
│   └── tsconfig.json
├── examples
│   ├── agent_examples.py
│   ├── agent_ui_examples.py
│   ├── browser_tool_example.py
│   ├── cloud_api_examples.py
│   ├── computer_examples_windows.py
│   ├── computer_examples.py
│   ├── computer_ui_examples.py
│   ├── computer-example-ts
│   │   ├── .env.example
│   │   ├── .gitignore
│   │   ├── package-lock.json
│   │   ├── package.json
│   │   ├── pnpm-lock.yaml
│   │   ├── README.md
│   │   ├── src
│   │   │   ├── helpers.ts
│   │   │   └── index.ts
│   │   └── tsconfig.json
│   ├── docker_examples.py
│   ├── evals
│   │   ├── hud_eval_examples.py
│   │   └── wikipedia_most_linked.txt
│   ├── pylume_examples.py
│   ├── sandboxed_functions_examples.py
│   ├── som_examples.py
│   ├── tracing_examples.py
│   ├── utils.py
│   └── winsandbox_example.py
├── img
│   ├── agent_gradio_ui.png
│   ├── agent.png
│   ├── cli.png
│   ├── computer.png
│   ├── logo_black.png
│   └── logo_white.png
├── libs
│   ├── kasm
│   │   ├── Dockerfile
│   │   ├── LICENSE
│   │   ├── README.md
│   │   └── src
│   │       └── ubuntu
│   │           └── install
│   │               └── firefox
│   │                   ├── custom_startup.sh
│   │                   ├── firefox.desktop
│   │                   └── install_firefox.sh
│   ├── lume
│   │   ├── .cursorignore
│   │   ├── CONTRIBUTING.md
│   │   ├── Development.md
│   │   ├── img
│   │   │   └── cli.png
│   │   ├── Package.resolved
│   │   ├── Package.swift
│   │   ├── README.md
│   │   ├── resources
│   │   │   └── lume.entitlements
│   │   ├── scripts
│   │   │   ├── build
│   │   │   │   ├── build-debug.sh
│   │   │   │   ├── build-release-notarized.sh
│   │   │   │   └── build-release.sh
│   │   │   └── install.sh
│   │   ├── src
│   │   │   ├── Commands
│   │   │   │   ├── Clone.swift
│   │   │   │   ├── Config.swift
│   │   │   │   ├── Create.swift
│   │   │   │   ├── Delete.swift
│   │   │   │   ├── Get.swift
│   │   │   │   ├── Images.swift
│   │   │   │   ├── IPSW.swift
│   │   │   │   ├── List.swift
│   │   │   │   ├── Logs.swift
│   │   │   │   ├── Options
│   │   │   │   │   └── FormatOption.swift
│   │   │   │   ├── Prune.swift
│   │   │   │   ├── Pull.swift
│   │   │   │   ├── Push.swift
│   │   │   │   ├── Run.swift
│   │   │   │   ├── Serve.swift
│   │   │   │   ├── Set.swift
│   │   │   │   └── Stop.swift
│   │   │   ├── ContainerRegistry
│   │   │   │   ├── ImageContainerRegistry.swift
│   │   │   │   ├── ImageList.swift
│   │   │   │   └── ImagesPrinter.swift
│   │   │   ├── Errors
│   │   │   │   └── Errors.swift
│   │   │   ├── FileSystem
│   │   │   │   ├── Home.swift
│   │   │   │   ├── Settings.swift
│   │   │   │   ├── VMConfig.swift
│   │   │   │   ├── VMDirectory.swift
│   │   │   │   └── VMLocation.swift
│   │   │   ├── LumeController.swift
│   │   │   ├── Main.swift
│   │   │   ├── Server
│   │   │   │   ├── Handlers.swift
│   │   │   │   ├── HTTP.swift
│   │   │   │   ├── Requests.swift
│   │   │   │   ├── Responses.swift
│   │   │   │   └── Server.swift
│   │   │   ├── Utils
│   │   │   │   ├── CommandRegistry.swift
│   │   │   │   ├── CommandUtils.swift
│   │   │   │   ├── Logger.swift
│   │   │   │   ├── NetworkUtils.swift
│   │   │   │   ├── Path.swift
│   │   │   │   ├── ProcessRunner.swift
│   │   │   │   ├── ProgressLogger.swift
│   │   │   │   ├── String.swift
│   │   │   │   └── Utils.swift
│   │   │   ├── Virtualization
│   │   │   │   ├── DarwinImageLoader.swift
│   │   │   │   ├── DHCPLeaseParser.swift
│   │   │   │   ├── ImageLoaderFactory.swift
│   │   │   │   └── VMVirtualizationService.swift
│   │   │   ├── VM
│   │   │   │   ├── DarwinVM.swift
│   │   │   │   ├── LinuxVM.swift
│   │   │   │   ├── VM.swift
│   │   │   │   ├── VMDetails.swift
│   │   │   │   ├── VMDetailsPrinter.swift
│   │   │   │   ├── VMDisplayResolution.swift
│   │   │   │   └── VMFactory.swift
│   │   │   └── VNC
│   │   │       ├── PassphraseGenerator.swift
│   │   │       └── VNCService.swift
│   │   └── tests
│   │       ├── Mocks
│   │       │   ├── MockVM.swift
│   │       │   ├── MockVMVirtualizationService.swift
│   │       │   └── MockVNCService.swift
│   │       ├── VM
│   │       │   └── VMDetailsPrinterTests.swift
│   │       ├── VMTests.swift
│   │       ├── VMVirtualizationServiceTests.swift
│   │       └── VNCServiceTests.swift
│   ├── lumier
│   │   ├── .dockerignore
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   └── src
│   │       ├── bin
│   │       │   └── entry.sh
│   │       ├── config
│   │       │   └── constants.sh
│   │       ├── hooks
│   │       │   └── on-logon.sh
│   │       └── lib
│   │           ├── utils.sh
│   │           └── vm.sh
│   ├── python
│   │   ├── agent
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── agent
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── adapters
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── cua_adapter.py
│   │   │   │   │   ├── huggingfacelocal_adapter.py
│   │   │   │   │   ├── human_adapter.py
│   │   │   │   │   ├── mlxvlm_adapter.py
│   │   │   │   │   └── models
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── generic.py
│   │   │   │   │       ├── internvl.py
│   │   │   │   │       ├── opencua.py
│   │   │   │   │       └── qwen2_5_vl.py
│   │   │   │   ├── agent.py
│   │   │   │   ├── callbacks
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── budget_manager.py
│   │   │   │   │   ├── image_retention.py
│   │   │   │   │   ├── logging.py
│   │   │   │   │   ├── operator_validator.py
│   │   │   │   │   ├── pii_anonymization.py
│   │   │   │   │   ├── prompt_instructions.py
│   │   │   │   │   ├── telemetry.py
│   │   │   │   │   └── trajectory_saver.py
│   │   │   │   ├── cli.py
│   │   │   │   ├── computers
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── cua.py
│   │   │   │   │   └── custom.py
│   │   │   │   ├── decorators.py
│   │   │   │   ├── human_tool
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── __main__.py
│   │   │   │   │   ├── server.py
│   │   │   │   │   └── ui.py
│   │   │   │   ├── integrations
│   │   │   │   │   └── hud
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── agent.py
│   │   │   │   │       └── proxy.py
│   │   │   │   ├── loops
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── anthropic.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── composed_grounded.py
│   │   │   │   │   ├── gelato.py
│   │   │   │   │   ├── gemini.py
│   │   │   │   │   ├── generic_vlm.py
│   │   │   │   │   ├── glm45v.py
│   │   │   │   │   ├── gta1.py
│   │   │   │   │   ├── holo.py
│   │   │   │   │   ├── internvl.py
│   │   │   │   │   ├── model_types.csv
│   │   │   │   │   ├── moondream3.py
│   │   │   │   │   ├── omniparser.py
│   │   │   │   │   ├── openai.py
│   │   │   │   │   ├── opencua.py
│   │   │   │   │   ├── uiins.py
│   │   │   │   │   ├── uitars.py
│   │   │   │   │   └── uitars2.py
│   │   │   │   ├── proxy
│   │   │   │   │   ├── examples.py
│   │   │   │   │   └── handlers.py
│   │   │   │   ├── responses.py
│   │   │   │   ├── tools
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── browser_tool.py
│   │   │   │   ├── types.py
│   │   │   │   └── ui
│   │   │   │       ├── __init__.py
│   │   │   │       ├── __main__.py
│   │   │   │       └── gradio
│   │   │   │           ├── __init__.py
│   │   │   │           ├── app.py
│   │   │   │           └── ui_components.py
│   │   │   ├── benchmarks
│   │   │   │   ├── .gitignore
│   │   │   │   ├── contrib.md
│   │   │   │   ├── interactive.py
│   │   │   │   ├── models
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   └── gta1.py
│   │   │   │   ├── README.md
│   │   │   │   ├── ss-pro.py
│   │   │   │   ├── ss-v2.py
│   │   │   │   └── utils.py
│   │   │   ├── example.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_computer_agent.py
│   │   ├── bench-ui
│   │   │   ├── bench_ui
│   │   │   │   ├── __init__.py
│   │   │   │   ├── api.py
│   │   │   │   └── child.py
│   │   │   ├── examples
│   │   │   │   ├── folder_example.py
│   │   │   │   ├── gui
│   │   │   │   │   ├── index.html
│   │   │   │   │   ├── logo.svg
│   │   │   │   │   └── styles.css
│   │   │   │   ├── output_overlay.png
│   │   │   │   └── simple_example.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── tests
│   │   │       └── test_port_detection.py
│   │   ├── computer
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── computer
│   │   │   │   ├── __init__.py
│   │   │   │   ├── computer.py
│   │   │   │   ├── diorama_computer.py
│   │   │   │   ├── helpers.py
│   │   │   │   ├── interface
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── generic.py
│   │   │   │   │   ├── linux.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   ├── models.py
│   │   │   │   │   └── windows.py
│   │   │   │   ├── logger.py
│   │   │   │   ├── models.py
│   │   │   │   ├── providers
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── cloud
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── docker
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── lume
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── lume_api.py
│   │   │   │   │   ├── lumier
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── types.py
│   │   │   │   │   └── winsandbox
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── provider.py
│   │   │   │   │       └── setup_script.ps1
│   │   │   │   ├── tracing_wrapper.py
│   │   │   │   ├── tracing.py
│   │   │   │   ├── ui
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── __main__.py
│   │   │   │   │   └── gradio
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       └── app.py
│   │   │   │   └── utils.py
│   │   │   ├── poetry.toml
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_computer.py
│   │   ├── computer-server
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── computer_server
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── browser.py
│   │   │   │   ├── cli.py
│   │   │   │   ├── diorama
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── diorama_computer.py
│   │   │   │   │   ├── diorama.py
│   │   │   │   │   ├── draw.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   └── safezone.py
│   │   │   │   ├── handlers
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── generic.py
│   │   │   │   │   ├── linux.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   └── windows.py
│   │   │   │   ├── main.py
│   │   │   │   ├── server.py
│   │   │   │   ├── utils
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── wallpaper.py
│   │   │   │   └── watchdog.py
│   │   │   ├── examples
│   │   │   │   ├── __init__.py
│   │   │   │   └── usage_example.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   ├── run_server.py
│   │   │   ├── test_connection.py
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_server.py
│   │   ├── core
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── core
│   │   │   │   ├── __init__.py
│   │   │   │   └── telemetry
│   │   │   │       ├── __init__.py
│   │   │   │       └── posthog.py
│   │   │   ├── poetry.toml
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_telemetry.py
│   │   ├── mcp-server
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── build-extension.py
│   │   │   ├── CONCURRENT_SESSIONS.md
│   │   │   ├── desktop-extension
│   │   │   │   ├── cua-extension.mcpb
│   │   │   │   ├── desktop_extension.png
│   │   │   │   ├── manifest.json
│   │   │   │   ├── README.md
│   │   │   │   ├── requirements.txt
│   │   │   │   ├── run_server.sh
│   │   │   │   └── setup.py
│   │   │   ├── mcp_server
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── server.py
│   │   │   │   └── session_manager.py
│   │   │   ├── pdm.lock
│   │   │   ├── pyproject.toml
│   │   │   ├── QUICK_TEST_COMMANDS.sh
│   │   │   ├── quick_test_local_option.py
│   │   │   ├── README.md
│   │   │   ├── scripts
│   │   │   │   ├── install_mcp_server.sh
│   │   │   │   └── start_mcp_server.sh
│   │   │   ├── test_mcp_server_local_option.py
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_mcp_server.py
│   │   ├── pylume
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_pylume.py
│   │   └── som
│   │       ├── .bumpversion.cfg
│   │       ├── LICENSE
│   │       ├── poetry.toml
│   │       ├── pyproject.toml
│   │       ├── README.md
│   │       ├── som
│   │       │   ├── __init__.py
│   │       │   ├── detect.py
│   │       │   ├── detection.py
│   │       │   ├── models.py
│   │       │   ├── ocr.py
│   │       │   ├── util
│   │       │   │   └── utils.py
│   │       │   └── visualization.py
│   │       └── tests
│   │           ├── conftest.py
│   │           └── test_omniparser.py
│   ├── qemu-docker
│   │   ├── linux
│   │   │   ├── Dockerfile
│   │   │   ├── README.md
│   │   │   └── src
│   │   │       ├── entry.sh
│   │   │       └── vm
│   │   │           ├── image
│   │   │           │   └── README.md
│   │   │           └── setup
│   │   │               ├── install.sh
│   │   │               ├── setup-cua-server.sh
│   │   │               └── setup.sh
│   │   ├── README.md
│   │   └── windows
│   │       ├── Dockerfile
│   │       ├── README.md
│   │       └── src
│   │           ├── entry.sh
│   │           └── vm
│   │               ├── image
│   │               │   └── README.md
│   │               └── setup
│   │                   ├── install.bat
│   │                   ├── on-logon.ps1
│   │                   ├── setup-cua-server.ps1
│   │                   ├── setup-utils.psm1
│   │                   └── setup.ps1
│   ├── typescript
│   │   ├── .gitignore
│   │   ├── .nvmrc
│   │   ├── agent
│   │   │   ├── examples
│   │   │   │   ├── playground-example.html
│   │   │   │   └── README.md
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── client.ts
│   │   │   │   ├── index.ts
│   │   │   │   └── types.ts
│   │   │   ├── tests
│   │   │   │   └── client.test.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── computer
│   │   │   ├── .editorconfig
│   │   │   ├── .gitattributes
│   │   │   ├── .gitignore
│   │   │   ├── LICENSE
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── computer
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── providers
│   │   │   │   │   │   ├── base.ts
│   │   │   │   │   │   ├── cloud.ts
│   │   │   │   │   │   └── index.ts
│   │   │   │   │   └── types.ts
│   │   │   │   ├── index.ts
│   │   │   │   ├── interface
│   │   │   │   │   ├── base.ts
│   │   │   │   │   ├── factory.ts
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── linux.ts
│   │   │   │   │   ├── macos.ts
│   │   │   │   │   └── windows.ts
│   │   │   │   └── types.ts
│   │   │   ├── tests
│   │   │   │   ├── computer
│   │   │   │   │   └── cloud.test.ts
│   │   │   │   ├── interface
│   │   │   │   │   ├── factory.test.ts
│   │   │   │   │   ├── index.test.ts
│   │   │   │   │   ├── linux.test.ts
│   │   │   │   │   ├── macos.test.ts
│   │   │   │   │   └── windows.test.ts
│   │   │   │   └── setup.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── core
│   │   │   ├── .editorconfig
│   │   │   ├── .gitattributes
│   │   │   ├── .gitignore
│   │   │   ├── LICENSE
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── index.ts
│   │   │   │   └── telemetry
│   │   │   │       ├── clients
│   │   │   │       │   ├── index.ts
│   │   │   │       │   └── posthog.ts
│   │   │   │       └── index.ts
│   │   │   ├── tests
│   │   │   │   └── telemetry.test.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── cua-cli
│   │   │   ├── .gitignore
│   │   │   ├── .prettierrc
│   │   │   ├── bun.lock
│   │   │   ├── CLAUDE.md
│   │   │   ├── index.ts
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── auth.ts
│   │   │   │   ├── cli.ts
│   │   │   │   ├── commands
│   │   │   │   │   ├── auth.ts
│   │   │   │   │   └── sandbox.ts
│   │   │   │   ├── config.ts
│   │   │   │   ├── http.ts
│   │   │   │   ├── storage.ts
│   │   │   │   └── util.ts
│   │   │   └── tsconfig.json
│   │   ├── package.json
│   │   ├── pnpm-lock.yaml
│   │   ├── pnpm-workspace.yaml
│   │   └── README.md
│   └── xfce
│       ├── .dockerignore
│       ├── .gitignore
│       ├── Development.md
│       ├── Dockerfile
│       ├── Dockerfile.dev
│       ├── README.md
│       └── src
│           ├── scripts
│           │   ├── resize-display.sh
│           │   ├── start-computer-server.sh
│           │   ├── start-novnc.sh
│           │   ├── start-vnc.sh
│           │   └── xstartup.sh
│           ├── supervisor
│           │   └── supervisord.conf
│           └── xfce-config
│               ├── helpers.rc
│               ├── xfce4-power-manager.xml
│               └── xfce4-session.xml
├── LICENSE.md
├── Makefile
├── notebooks
│   ├── agent_nb.ipynb
│   ├── blog
│   │   ├── build-your-own-operator-on-macos-1.ipynb
│   │   └── build-your-own-operator-on-macos-2.ipynb
│   ├── composite_agents_docker_nb.ipynb
│   ├── computer_nb.ipynb
│   ├── computer_server_nb.ipynb
│   ├── customizing_computeragent.ipynb
│   ├── eval_osworld.ipynb
│   ├── ollama_nb.ipynb
│   ├── README.md
│   ├── sota_hackathon_cloud.ipynb
│   └── sota_hackathon.ipynb
├── package-lock.json
├── package.json
├── pnpm-lock.yaml
├── pyproject.toml
├── pyrightconfig.json
├── README.md
├── scripts
│   ├── install-cli.ps1
│   ├── install-cli.sh
│   ├── playground-docker.sh
│   ├── playground.sh
│   ├── run-docker-dev.sh
│   └── typescript-typecheck.js
├── TESTING.md
├── tests
│   ├── agent_loop_testing
│   │   ├── agent_test.py
│   │   └── README.md
│   ├── pytest.ini
│   ├── shell_cmd.py
│   ├── test_files.py
│   ├── test_mcp_server_session_management.py
│   ├── test_mcp_server_streaming.py
│   ├── test_shell_bash.py
│   ├── test_telemetry.py
│   ├── test_tracing.py
│   ├── test_venv.py
│   └── test_watchdog.py
└── uv.lock
```

# Files

--------------------------------------------------------------------------------
/libs/python/mcp-server/CONCURRENT_SESSIONS.md:
--------------------------------------------------------------------------------

```markdown
  1 | # MCP Server Concurrent Session Management
  2 | 
  3 | This document describes the improvements made to the MCP Server to address concurrent session management and resource lifecycle issues.
  4 | 
  5 | ## Problem Statement
  6 | 
  7 | The original MCP server implementation had several critical issues:
  8 | 
  9 | 1. **Global Computer Instance**: Used a single `global_computer` variable shared across all clients
 10 | 2. **No Resource Isolation**: Multiple clients would interfere with each other
 11 | 3. **Sequential Task Processing**: Multi-task operations were always sequential
 12 | 4. **No Graceful Shutdown**: Server couldn't properly cleanup resources on shutdown
 13 | 5. **Hidden Event Loop**: `server.run()` hid the event loop, preventing proper lifecycle management
 14 | 
 15 | ## Solution Architecture
 16 | 
 17 | ### 1. Session Manager (`session_manager.py`)
 18 | 
 19 | The `SessionManager` class provides:
 20 | 
 21 | - **Per-session computer instances**: Each client gets isolated computer resources
 22 | - **Computer instance pooling**: Efficient reuse of computer instances with lifecycle management
 23 | - **Task registration**: Track active tasks per session for graceful cleanup
 24 | - **Automatic cleanup**: Background task cleans up idle sessions
 25 | - **Resource limits**: Configurable maximum concurrent sessions
 26 | 
 27 | #### Key Components:
 28 | 
 29 | ```python
 30 | class SessionManager:
 31 |     def __init__(self, max_concurrent_sessions: int = 10):
 32 |         self._sessions: Dict[str, SessionInfo] = {}
 33 |         self._computer_pool = ComputerPool()
 34 |         # ... lifecycle management
 35 | ```
 36 | 
 37 | #### Session Lifecycle:
 38 | 
 39 | 1. **Creation**: New session created when client first connects
 40 | 2. **Task Registration**: Each task is registered with the session
 41 | 3. **Activity Tracking**: Last activity time updated on each operation
 42 | 4. **Cleanup**: Sessions cleaned up when idle or on shutdown
 43 | 
 44 | ### 2. Computer Pool (`ComputerPool`)
 45 | 
 46 | Manages computer instances efficiently:
 47 | 
 48 | - **Pool Size Limits**: Maximum number of concurrent computer instances
 49 | - **Instance Reuse**: Available instances reused across sessions
 50 | - **Lifecycle Management**: Proper startup/shutdown of computer instances
 51 | - **Resource Cleanup**: All instances properly closed on shutdown
 52 | 
 53 | ### 3. Enhanced Server Tools
 54 | 
 55 | All server tools now support:
 56 | 
 57 | - **Session ID Parameter**: Optional `session_id` for multi-client support
 58 | - **Resource Isolation**: Each session gets its own computer instance
 59 | - **Task Tracking**: Proper registration/unregistration of tasks
 60 | - **Error Handling**: Graceful error handling with session cleanup
 61 | 
 62 | #### Updated Tool Signatures:
 63 | 
 64 | ```python
 65 | async def screenshot_cua(ctx: Context, session_id: Optional[str] = None) -> Any:
 66 | async def run_cua_task(ctx: Context, task: str, session_id: Optional[str] = None) -> Any:
 67 | async def run_multi_cua_tasks(ctx: Context, tasks: List[str], session_id: Optional[str] = None, concurrent: bool = False) -> Any:
 68 | ```
 69 | 
 70 | ### 4. Concurrent Task Execution
 71 | 
 72 | The `run_multi_cua_tasks` tool now supports:
 73 | 
 74 | - **Sequential Mode** (default): Tasks run one after another
 75 | - **Concurrent Mode**: Tasks run in parallel using `asyncio.gather()`
 76 | - **Progress Tracking**: Proper progress reporting for both modes
 77 | - **Error Handling**: Individual task failures don't stop other tasks
 78 | 
 79 | ### 5. Graceful Shutdown
 80 | 
 81 | The server now provides:
 82 | 
 83 | - **Signal Handlers**: Proper handling of SIGINT and SIGTERM
 84 | - **Session Cleanup**: All active sessions properly cleaned up
 85 | - **Resource Release**: Computer instances returned to pool and closed
 86 | - **Async Lifecycle**: Event loop properly exposed for cleanup
 87 | 
 88 | ## Usage Examples
 89 | 
 90 | ### Basic Usage (Backward Compatible)
 91 | 
 92 | ```python
 93 | # These calls work exactly as before
 94 | await screenshot_cua(ctx)
 95 | await run_cua_task(ctx, "Open browser")
 96 | await run_multi_cua_tasks(ctx, ["Task 1", "Task 2"])
 97 | ```
 98 | 
 99 | ### Multi-Client Usage
100 | 
101 | ```python
102 | # Client 1
103 | session_id_1 = "client-1-session"
104 | await screenshot_cua(ctx, session_id_1)
105 | await run_cua_task(ctx, "Open browser", session_id_1)
106 | 
107 | # Client 2 (completely isolated)
108 | session_id_2 = "client-2-session"
109 | await screenshot_cua(ctx, session_id_2)
110 | await run_cua_task(ctx, "Open editor", session_id_2)
111 | ```
112 | 
113 | ### Concurrent Task Execution
114 | 
115 | ```python
116 | # Run tasks concurrently instead of sequentially
117 | tasks = ["Open browser", "Open editor", "Open terminal"]
118 | results = await run_multi_cua_tasks(ctx, tasks, concurrent=True)
119 | ```
120 | 
121 | ### Session Management
122 | 
123 | ```python
124 | # Get session statistics
125 | stats = await get_session_stats(ctx)
126 | print(f"Active sessions: {stats['total_sessions']}")
127 | 
128 | # Cleanup specific session
129 | await cleanup_session(ctx, "session-to-cleanup")
130 | ```
131 | 
132 | ## Configuration
133 | 
134 | ### Environment Variables
135 | 
136 | - `CUA_MODEL_NAME`: Model to use (default: `anthropic/claude-sonnet-4-5-20250929`)
137 | - `CUA_MAX_IMAGES`: Maximum images to keep (default: `3`)
138 | 
139 | ### Session Manager Configuration
140 | 
141 | ```python
142 | # In session_manager.py
143 | class SessionManager:
144 |     def __init__(self, max_concurrent_sessions: int = 10):
145 |         # Configurable maximum concurrent sessions
146 | 
147 | class ComputerPool:
148 |     def __init__(self, max_size: int = 5, idle_timeout: float = 300.0):
149 |         # Configurable pool size and idle timeout
150 | ```
151 | 
152 | ## Performance Improvements
153 | 
154 | ### Before (Issues):
155 | 
156 | - ❌ Single global computer instance
157 | - ❌ Client interference and resource conflicts
158 | - ❌ Sequential task processing only
159 | - ❌ No graceful shutdown
160 | - ❌ 30s timeout issues with long-running tasks
161 | 
162 | ### After (Benefits):
163 | 
164 | - ✅ Per-session computer instances with proper isolation
165 | - ✅ Computer instance pooling for efficient resource usage
166 | - ✅ Concurrent task execution support
167 | - ✅ Graceful shutdown with proper cleanup
168 | - ✅ Streaming updates prevent timeout issues
169 | - ✅ Configurable resource limits
170 | - ✅ Automatic session cleanup
171 | 
172 | ## Testing
173 | 
174 | Comprehensive test coverage includes:
175 | 
176 | - Session creation and reuse
177 | - Concurrent session isolation
178 | - Task registration and cleanup
179 | - Error handling with session management
180 | - Concurrent vs sequential task execution
181 | - Session statistics and cleanup
182 | 
183 | Run tests with:
184 | 
185 | ```bash
186 | pytest tests/test_mcp_server_session_management.py -v
187 | ```
188 | 
189 | ## Migration Guide
190 | 
191 | ### For Existing Clients
192 | 
193 | No changes required! The new implementation is fully backward compatible:
194 | 
195 | ```python
196 | # This still works exactly as before
197 | await run_cua_task(ctx, "My task")
198 | ```
199 | 
200 | ### For New Multi-Client Applications
201 | 
202 | Use session IDs for proper isolation:
203 | 
204 | ```python
205 | # Create a unique session ID for each client
206 | session_id = str(uuid.uuid4())
207 | await run_cua_task(ctx, "My task", session_id)
208 | ```
209 | 
210 | ### For Concurrent Task Execution
211 | 
212 | Enable concurrent mode for better performance:
213 | 
214 | ```python
215 | tasks = ["Task 1", "Task 2", "Task 3"]
216 | results = await run_multi_cua_tasks(ctx, tasks, concurrent=True)
217 | ```
218 | 
219 | ## Monitoring and Debugging
220 | 
221 | ### Session Statistics
222 | 
223 | ```python
224 | stats = await get_session_stats(ctx)
225 | print(f"Total sessions: {stats['total_sessions']}")
226 | print(f"Max concurrent: {stats['max_concurrent']}")
227 | for session_id, session_info in stats['sessions'].items():
228 |     print(f"Session {session_id}: {session_info['active_tasks']} active tasks")
229 | ```
230 | 
231 | ### Logging
232 | 
233 | The server provides detailed logging for:
234 | 
235 | - Session creation and cleanup
236 | - Task registration and completion
237 | - Resource pool usage
238 | - Error conditions and recovery
239 | 
240 | ### Graceful Shutdown
241 | 
242 | The server properly handles shutdown signals:
243 | 
244 | ```bash
245 | # Send SIGTERM for graceful shutdown
246 | kill -TERM <server_pid>
247 | 
248 | # Or use Ctrl+C (SIGINT)
249 | ```
250 | 
251 | ## Future Enhancements
252 | 
253 | Potential future improvements:
254 | 
255 | 1. **Session Persistence**: Save/restore session state across restarts
256 | 2. **Load Balancing**: Distribute sessions across multiple server instances
257 | 3. **Resource Monitoring**: Real-time monitoring of resource usage
258 | 4. **Auto-scaling**: Dynamic adjustment of pool size based on demand
259 | 5. **Session Timeouts**: Configurable timeouts for different session types
260 | 
```

--------------------------------------------------------------------------------
/blog/human-in-the-loop.md:
--------------------------------------------------------------------------------

```markdown
  1 | # When Agents Need Human Wisdom - Introducing Human-In-The-Loop Support
  2 | 
  3 | _Published on August 29, 2025 by Francesco Bonacci_
  4 | 
  5 | Sometimes the best AI agent is a human. Whether you're creating training demonstrations, evaluating complex scenarios, or need to intervene when automation hits a wall, our new Human-In-The-Loop integration puts you directly in control.
  6 | 
  7 | With yesterday's [HUD evaluation integration](hud-agent-evals.md), you could benchmark any agent at scale. Today's update lets you _become_ the agent when it matters most—seamlessly switching between automated intelligence and human judgment.
  8 | 
  9 | <div align="center">
 10 |   <video src="https://github.com/user-attachments/assets/9091b50f-26e7-4981-95ce-40e5d42a1260" width="600" controls></video>
 11 | </div>
 12 | 
 13 | ## What you get
 14 | 
 15 | - **One-line human takeover** for any agent configuration with `human/human` or `model+human/human`
 16 | - **Interactive web UI** to see what your agent sees and control what it does
 17 | - **Zero context switching** - step in exactly where automation left off
 18 | - **Training data generation** - create perfect demonstrations by doing tasks yourself
 19 | - **Ground truth evaluation** - validate agent performance with human expertise
 20 | 
 21 | ## Why Human-In-The-Loop?
 22 | 
 23 | Even the most sophisticated agents encounter edge cases, ambiguous interfaces, or tasks requiring human judgment. Rather than failing gracefully, they can now fail _intelligently_—by asking for human help.
 24 | 
 25 | This approach bridges the gap between fully automated systems and pure manual control, letting you:
 26 | 
 27 | - **Demonstrate complex workflows** that agents can learn from
 28 | - **Evaluate tricky scenarios** where ground truth requires human assessment
 29 | - **Intervene selectively** when automated agents need guidance
 30 | - **Test and debug** your tools and environments manually
 31 | 
 32 | ## Getting Started
 33 | 
 34 | Launch the human agent interface:
 35 | 
 36 | ```bash
 37 | python -m agent.human_tool
 38 | ```
 39 | 
 40 | The web UI will show pending completions. Click any completion to take control of the agent and see exactly what it sees.
 41 | 
 42 | ## Usage Examples
 43 | 
 44 | ### Direct Human Control
 45 | 
 46 | Perfect for creating demonstrations or when you want full manual control:
 47 | 
 48 | ```python
 49 | from agent import ComputerAgent
 50 | from agent.computer import computer
 51 | 
 52 | agent = ComputerAgent(
 53 |     "human/human",
 54 |     tools=[computer]
 55 | )
 56 | 
 57 | # You'll get full control through the web UI
 58 | async for _ in agent.run("Take a screenshot, analyze the UI, and click on the most prominent button"):
 59 |     pass
 60 | ```
 61 | 
 62 | ### Hybrid: AI Planning + Human Execution
 63 | 
 64 | Combine model intelligence with human precision—let AI plan, then execute manually:
 65 | 
 66 | ```python
 67 | agent = ComputerAgent(
 68 |     "huggingface-local/HelloKKMe/GTA1-7B+human/human",
 69 |     tools=[computer]
 70 | )
 71 | 
 72 | # AI creates the plan, human executes each step
 73 | async for _ in agent.run("Navigate to the settings page and enable dark mode"):
 74 |     pass
 75 | ```
 76 | 
 77 | ### Fallback Pattern
 78 | 
 79 | Start automated, escalate to human when needed:
 80 | 
 81 | ```python
 82 | # Primary automated agent
 83 | primary_agent = ComputerAgent("openai/computer-use-preview", tools=[computer])
 84 | 
 85 | # Human fallback agent
 86 | fallback_agent = ComputerAgent("human/human", tools=[computer])
 87 | 
 88 | try:
 89 |     async for result in primary_agent.run(task):
 90 |         if result.confidence < 0.7:  # Low confidence threshold
 91 |             # Seamlessly hand off to human
 92 |             async for _ in fallback_agent.run(f"Continue this task: {task}"):
 93 |                 pass
 94 | except Exception:
 95 |     # Agent failed, human takes over
 96 |     async for _ in fallback_agent.run(f"Handle this failed task: {task}"):
 97 |         pass
 98 | ```
 99 | 
100 | ## Interactive Features
101 | 
102 | The human-in-the-loop interface provides a rich, responsive experience:
103 | 
104 | ### **Visual Environment**
105 | 
106 | - **Screenshot display** with live updates as you work
107 | - **Click handlers** for direct interaction with UI elements
108 | - **Zoom and pan** to see details clearly
109 | 
110 | ### **Action Controls**
111 | 
112 | - **Click actions** - precise cursor positioning and clicking
113 | - **Keyboard input** - type text naturally or send specific key combinations
114 | - **Action history** - see the sequence of actions taken
115 | - **Undo support** - step back when needed
116 | 
117 | ### **Tool Integration**
118 | 
119 | - **Full OpenAI compatibility** - standard tool call format
120 | - **Custom tools** - integrate your own tools seamlessly
121 | - **Real-time feedback** - see tool responses immediately
122 | 
123 | ### **Smart Polling**
124 | 
125 | - **Responsive updates** - UI refreshes when new completions arrive
126 | - **Background processing** - continue working while waiting for tasks
127 | - **Session persistence** - resume interrupted sessions
128 | 
129 | ## Real-World Use Cases
130 | 
131 | ### **Training Data Generation**
132 | 
133 | Create perfect demonstrations for fine-tuning:
134 | 
135 | ```python
136 | # Generate training examples for spreadsheet tasks
137 | demo_agent = ComputerAgent("human/human", tools=[computer])
138 | 
139 | tasks = [
140 |     "Create a budget spreadsheet with income and expense categories",
141 |     "Apply conditional formatting to highlight overbudget items",
142 |     "Generate a pie chart showing expense distribution"
143 | ]
144 | 
145 | for task in tasks:
146 |     # Human demonstrates each task perfectly
147 |     async for _ in demo_agent.run(task):
148 |         pass  # Recorded actions become training data
149 | ```
150 | 
151 | ### **Evaluation and Ground Truth**
152 | 
153 | Validate agent performance on complex scenarios:
154 | 
155 | ```python
156 | # Human evaluates agent performance
157 | evaluator = ComputerAgent("human/human", tools=[computer])
158 | 
159 | async for _ in evaluator.run("Review this completed form and rate accuracy (1-10)"):
160 |     pass  # Human provides authoritative quality assessment
161 | ```
162 | 
163 | ### **Interactive Debugging**
164 | 
165 | Step through agent behavior manually:
166 | 
167 | ```python
168 | # Test a workflow step by step
169 | debug_agent = ComputerAgent("human/human", tools=[computer])
170 | 
171 | async for _ in debug_agent.run("Reproduce the agent's failed login sequence"):
172 |     pass  # Human identifies exactly where automation breaks
173 | ```
174 | 
175 | ### **Edge Case Handling**
176 | 
177 | Handle scenarios that break automated agents:
178 | 
179 | ```python
180 | # Complex UI interaction requiring human judgment
181 | edge_case_agent = ComputerAgent("human/human", tools=[computer])
182 | 
183 | async for _ in edge_case_agent.run("Navigate this CAPTCHA-protected form"):
184 |     pass  # Human handles what automation cannot
185 | ```
186 | 
187 | ## Configuration Options
188 | 
189 | Customize the human agent experience:
190 | 
191 | - **UI refresh rate**: Adjust polling frequency for your workflow
192 | - **Image quality**: Balance detail vs. performance for screenshots
193 | - **Action logging**: Save detailed traces for analysis and training
194 | - **Session timeout**: Configure idle timeouts for security
195 | - **Tool permissions**: Restrict which tools humans can access
196 | 
197 | ## When to Use Human-In-The-Loop
198 | 
199 | | **Scenario**                 | **Why Human Control**                                 |
200 | | ---------------------------- | ----------------------------------------------------- |
201 | | **Creating training data**   | Perfect demonstrations for model fine-tuning          |
202 | | **Evaluating complex tasks** | Human judgment for subjective or nuanced assessment   |
203 | | **Handling edge cases**      | CAPTCHAs, unusual UIs, context-dependent decisions    |
204 | | **Debugging workflows**      | Step through failures to identify breaking points     |
205 | | **High-stakes operations**   | Critical tasks requiring human oversight and approval |
206 | | **Testing new environments** | Validate tools and environments work as expected      |
207 | 
208 | ## Learn More
209 | 
210 | - **Interactive examples**: Try human-in-the-loop control with sample tasks
211 | - **Training data pipelines**: Learn how to convert human demonstrations into model training data
212 | - **Evaluation frameworks**: Build human-validated test suites for your agents
213 | - **API documentation**: Full reference for human agent configuration
214 | 
215 | Ready to put humans back in the loop? The most sophisticated AI system knows when to ask for help.
216 | 
217 | ---
218 | 
219 | _Questions about human-in-the-loop agents? Join the conversation in our [Discord community](https://discord.gg/cua-ai) or check out our [documentation](https://cua.ai/docs/agent-sdk/supported-agents/human-in-the-loop)._
220 | 
```

--------------------------------------------------------------------------------
/libs/python/agent/agent/human_tool/server.py:
--------------------------------------------------------------------------------

```python
  1 | import asyncio
  2 | import uuid
  3 | from dataclasses import asdict, dataclass
  4 | from datetime import datetime
  5 | from enum import Enum
  6 | from typing import Any, Dict, List, Optional
  7 | 
  8 | from fastapi import FastAPI, HTTPException
  9 | from pydantic import BaseModel
 10 | 
 11 | 
 12 | class CompletionStatus(str, Enum):
 13 |     PENDING = "pending"
 14 |     COMPLETED = "completed"
 15 |     FAILED = "failed"
 16 | 
 17 | 
 18 | @dataclass
 19 | class CompletionCall:
 20 |     id: str
 21 |     messages: List[Dict[str, Any]]
 22 |     model: str
 23 |     status: CompletionStatus
 24 |     created_at: datetime
 25 |     completed_at: Optional[datetime] = None
 26 |     response: Optional[str] = None
 27 |     tool_calls: Optional[List[Dict[str, Any]]] = None
 28 |     error: Optional[str] = None
 29 | 
 30 | 
 31 | class ToolCall(BaseModel):
 32 |     id: str
 33 |     type: str = "function"
 34 |     function: Dict[str, Any]
 35 | 
 36 | 
 37 | class CompletionRequest(BaseModel):
 38 |     messages: List[Dict[str, Any]]
 39 |     model: str
 40 | 
 41 | 
 42 | class CompletionResponse(BaseModel):
 43 |     response: Optional[str] = None
 44 |     tool_calls: Optional[List[Dict[str, Any]]] = None
 45 | 
 46 | 
 47 | class CompletionQueue:
 48 |     def __init__(self):
 49 |         self._queue: Dict[str, CompletionCall] = {}
 50 |         self._pending_order: List[str] = []
 51 |         self._lock = asyncio.Lock()
 52 | 
 53 |     async def add_completion(self, messages: List[Dict[str, Any]], model: str) -> str:
 54 |         """Add a completion call to the queue."""
 55 |         async with self._lock:
 56 |             call_id = str(uuid.uuid4())
 57 |             completion_call = CompletionCall(
 58 |                 id=call_id,
 59 |                 messages=messages,
 60 |                 model=model,
 61 |                 status=CompletionStatus.PENDING,
 62 |                 created_at=datetime.now(),
 63 |             )
 64 |             self._queue[call_id] = completion_call
 65 |             self._pending_order.append(call_id)
 66 |             return call_id
 67 | 
 68 |     async def get_pending_calls(self) -> List[Dict[str, Any]]:
 69 |         """Get all pending completion calls."""
 70 |         async with self._lock:
 71 |             pending_calls = []
 72 |             for call_id in self._pending_order:
 73 |                 if (
 74 |                     call_id in self._queue
 75 |                     and self._queue[call_id].status == CompletionStatus.PENDING
 76 |                 ):
 77 |                     call = self._queue[call_id]
 78 |                     pending_calls.append(
 79 |                         {
 80 |                             "id": call.id,
 81 |                             "model": call.model,
 82 |                             "created_at": call.created_at.isoformat(),
 83 |                             "messages": call.messages,
 84 |                         }
 85 |                     )
 86 |             return pending_calls
 87 | 
 88 |     async def get_call_status(self, call_id: str) -> Optional[Dict[str, Any]]:
 89 |         """Get the status of a specific completion call."""
 90 |         async with self._lock:
 91 |             if call_id not in self._queue:
 92 |                 return None
 93 | 
 94 |             call = self._queue[call_id]
 95 |             result = {
 96 |                 "id": call.id,
 97 |                 "status": call.status.value,
 98 |                 "created_at": call.created_at.isoformat(),
 99 |                 "model": call.model,
100 |                 "messages": call.messages,
101 |             }
102 | 
103 |             if call.completed_at:
104 |                 result["completed_at"] = call.completed_at.isoformat()
105 |             if call.response:
106 |                 result["response"] = call.response
107 |             if call.tool_calls:
108 |                 result["tool_calls"] = call.tool_calls
109 |             if call.error:
110 |                 result["error"] = call.error
111 | 
112 |             return result
113 | 
114 |     async def complete_call(
115 |         self,
116 |         call_id: str,
117 |         response: Optional[str] = None,
118 |         tool_calls: Optional[List[Dict[str, Any]]] = None,
119 |     ) -> bool:
120 |         """Mark a completion call as completed with a response or tool calls."""
121 |         async with self._lock:
122 |             if call_id not in self._queue:
123 |                 return False
124 | 
125 |             call = self._queue[call_id]
126 |             if call.status != CompletionStatus.PENDING:
127 |                 return False
128 | 
129 |             call.status = CompletionStatus.COMPLETED
130 |             call.completed_at = datetime.now()
131 |             call.response = response
132 |             call.tool_calls = tool_calls
133 | 
134 |             # Remove from pending order
135 |             if call_id in self._pending_order:
136 |                 self._pending_order.remove(call_id)
137 | 
138 |             return True
139 | 
140 |     async def fail_call(self, call_id: str, error: str) -> bool:
141 |         """Mark a completion call as failed with an error."""
142 |         async with self._lock:
143 |             if call_id not in self._queue:
144 |                 return False
145 | 
146 |             call = self._queue[call_id]
147 |             if call.status != CompletionStatus.PENDING:
148 |                 return False
149 | 
150 |             call.status = CompletionStatus.FAILED
151 |             call.completed_at = datetime.now()
152 |             call.error = error
153 | 
154 |             # Remove from pending order
155 |             if call_id in self._pending_order:
156 |                 self._pending_order.remove(call_id)
157 | 
158 |             return True
159 | 
160 |     async def wait_for_completion(self, call_id: str, timeout: float = 300.0) -> Optional[str]:
161 |         """Wait for a completion call to be completed and return the response."""
162 |         start_time = asyncio.get_event_loop().time()
163 | 
164 |         while True:
165 |             status = await self.get_call_status(call_id)
166 |             if not status:
167 |                 return None
168 | 
169 |             if status["status"] == CompletionStatus.COMPLETED.value:
170 |                 return status.get("response")
171 |             elif status["status"] == CompletionStatus.FAILED.value:
172 |                 raise Exception(f"Completion failed: {status.get('error', 'Unknown error')}")
173 | 
174 |             # Check timeout
175 |             if asyncio.get_event_loop().time() - start_time > timeout:
176 |                 await self.fail_call(call_id, "Timeout waiting for human response")
177 |                 raise TimeoutError("Timeout waiting for human response")
178 | 
179 |             # Wait a bit before checking again
180 |             await asyncio.sleep(0.5)
181 | 
182 | 
183 | # Global queue instance
184 | completion_queue = CompletionQueue()
185 | 
186 | # FastAPI app
187 | app = FastAPI(title="Human Completion Server", version="1.0.0")
188 | 
189 | 
190 | @app.post("/queue", response_model=Dict[str, str])
191 | async def queue_completion(request: CompletionRequest):
192 |     """Add a completion request to the queue."""
193 |     call_id = await completion_queue.add_completion(request.messages, request.model)
194 |     return {"id": call_id, "status": "queued"}
195 | 
196 | 
197 | @app.get("/pending")
198 | async def list_pending():
199 |     """List all pending completion calls."""
200 |     pending_calls = await completion_queue.get_pending_calls()
201 |     return {"pending_calls": pending_calls}
202 | 
203 | 
204 | @app.get("/status/{call_id}")
205 | async def get_status(call_id: str):
206 |     """Get the status of a specific completion call."""
207 |     status = await completion_queue.get_call_status(call_id)
208 |     if not status:
209 |         raise HTTPException(status_code=404, detail="Completion call not found")
210 |     return status
211 | 
212 | 
213 | @app.post("/complete/{call_id}")
214 | async def complete_call(call_id: str, response: CompletionResponse):
215 |     """Complete a call with a human response."""
216 |     success = await completion_queue.complete_call(
217 |         call_id, response=response.response, tool_calls=response.tool_calls
218 |     )
219 |     if success:
220 |         return {"status": "success", "message": "Call completed"}
221 |     else:
222 |         raise HTTPException(status_code=404, detail="Call not found or already completed")
223 | 
224 | 
225 | @app.post("/fail/{call_id}")
226 | async def fail_call(call_id: str, error: Dict[str, str]):
227 |     """Mark a call as failed."""
228 |     success = await completion_queue.fail_call(call_id, error.get("error", "Unknown error"))
229 |     if not success:
230 |         raise HTTPException(
231 |             status_code=404, detail="Completion call not found or already completed"
232 |         )
233 |     return {"status": "failed"}
234 | 
235 | 
236 | @app.get("/")
237 | async def root():
238 |     """Root endpoint."""
239 |     return {"message": "Human Completion Server is running"}
240 | 
241 | 
242 | if __name__ == "__main__":
243 |     import uvicorn
244 | 
245 |     uvicorn.run(app, host="0.0.0.0", port=8002)
246 | 
```

--------------------------------------------------------------------------------
/libs/python/agent/agent/computers/custom.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Custom computer handler implementation that accepts a dictionary of functions.
  3 | """
  4 | 
  5 | import base64
  6 | import io
  7 | from typing import Any, Callable, Dict, List, Literal, Optional, Union
  8 | 
  9 | from PIL import Image
 10 | 
 11 | from .base import AsyncComputerHandler
 12 | 
 13 | 
 14 | class CustomComputerHandler(AsyncComputerHandler):
 15 |     """Computer handler that implements the Computer protocol using a dictionary of custom functions."""
 16 | 
 17 |     def __init__(self, functions: Dict[str, Callable]):
 18 |         """
 19 |         Initialize with a dictionary of functions.
 20 | 
 21 |         Args:
 22 |             functions: Dictionary where keys are method names and values are callable functions.
 23 |                       Only 'screenshot' is required, all others are optional.
 24 | 
 25 |         Raises:
 26 |             ValueError: If required 'screenshot' function is not provided.
 27 |         """
 28 |         if "screenshot" not in functions:
 29 |             raise ValueError("'screenshot' function is required in functions dictionary")
 30 | 
 31 |         self.functions = functions
 32 |         self._last_screenshot_size: Optional[tuple[int, int]] = None
 33 | 
 34 |     async def _call_function(self, func, *args, **kwargs):
 35 |         """
 36 |         Call a function, handling both async and sync functions.
 37 | 
 38 |         Args:
 39 |             func: The function to call
 40 |             *args: Positional arguments to pass to the function
 41 |             **kwargs: Keyword arguments to pass to the function
 42 | 
 43 |         Returns:
 44 |             The result of the function call
 45 |         """
 46 |         import asyncio
 47 |         import inspect
 48 | 
 49 |         if callable(func):
 50 |             if inspect.iscoroutinefunction(func):
 51 |                 return await func(*args, **kwargs)
 52 |             else:
 53 |                 return func(*args, **kwargs)
 54 |         else:
 55 |             return func
 56 | 
 57 |     async def _get_value(self, attribute: str):
 58 |         """
 59 |         Get value for an attribute, checking both 'get_{attribute}' and '{attribute}' keys.
 60 | 
 61 |         Args:
 62 |             attribute: The attribute name to look for
 63 | 
 64 |         Returns:
 65 |             The value from the functions dict, called if callable, returned directly if not
 66 |         """
 67 |         # Check for 'get_{attribute}' first
 68 |         get_key = f"get_{attribute}"
 69 |         if get_key in self.functions:
 70 |             return await self._call_function(self.functions[get_key])
 71 | 
 72 |         # Check for '{attribute}'
 73 |         if attribute in self.functions:
 74 |             return await self._call_function(self.functions[attribute])
 75 | 
 76 |         return None
 77 | 
 78 |     def _to_b64_str(self, img: Union[bytes, Image.Image, str]) -> str:
 79 |         """
 80 |         Convert image to base64 string.
 81 | 
 82 |         Args:
 83 |             img: Image as bytes, PIL Image, or base64 string
 84 | 
 85 |         Returns:
 86 |             str: Base64 encoded image string
 87 |         """
 88 |         if isinstance(img, str):
 89 |             # Already a base64 string
 90 |             return img
 91 |         elif isinstance(img, bytes):
 92 |             # Raw bytes
 93 |             return base64.b64encode(img).decode("utf-8")
 94 |         elif isinstance(img, Image.Image):
 95 |             # PIL Image
 96 |             buffer = io.BytesIO()
 97 |             img.save(buffer, format="PNG")
 98 |             return base64.b64encode(buffer.getvalue()).decode("utf-8")
 99 |         else:
100 |             raise ValueError(f"Unsupported image type: {type(img)}")
101 | 
102 |     # ==== Computer-Use-Preview Action Space ====
103 | 
104 |     async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]:
105 |         """Get the current environment type."""
106 |         result = await self._get_value("environment")
107 |         if result is None:
108 |             return "linux"
109 |         assert result in ["windows", "mac", "linux", "browser"]
110 |         return result  # type: ignore
111 | 
112 |     async def get_dimensions(self) -> tuple[int, int]:
113 |         """Get screen dimensions as (width, height)."""
114 |         result = await self._get_value("dimensions")
115 |         if result is not None:
116 |             return result  # type: ignore
117 | 
118 |         # Fallback: use last screenshot size if available
119 |         if not self._last_screenshot_size:
120 |             await self.screenshot()
121 |         assert self._last_screenshot_size is not None, "Failed to get screenshot size"
122 | 
123 |         return self._last_screenshot_size
124 | 
125 |     async def screenshot(self, text: Optional[str] = None) -> str:
126 |         """Take a screenshot and return as base64 string.
127 | 
128 |         Args:
129 |             text: Optional descriptive text (for compatibility with GPT-4o models, ignored)
130 |         """
131 |         result = await self._call_function(self.functions["screenshot"])
132 |         b64_str = self._to_b64_str(result)  # type: ignore
133 | 
134 |         # Try to extract dimensions for fallback use
135 |         try:
136 |             if isinstance(result, Image.Image):
137 |                 self._last_screenshot_size = result.size
138 |             elif isinstance(result, bytes):
139 |                 # Try to decode bytes to get dimensions
140 |                 img = Image.open(io.BytesIO(result))
141 |                 self._last_screenshot_size = img.size
142 |         except Exception:
143 |             # If we can't get dimensions, that's okay
144 |             pass
145 | 
146 |         return b64_str
147 | 
148 |     async def click(self, x: int, y: int, button: str = "left") -> None:
149 |         """Click at coordinates with specified button."""
150 |         if "click" in self.functions:
151 |             await self._call_function(self.functions["click"], x, y, button)
152 |         # No-op if not implemented
153 | 
154 |     async def double_click(self, x: int, y: int) -> None:
155 |         """Double click at coordinates."""
156 |         if "double_click" in self.functions:
157 |             await self._call_function(self.functions["double_click"], x, y)
158 |         # No-op if not implemented
159 | 
160 |     async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None:
161 |         """Scroll at coordinates with specified scroll amounts."""
162 |         if "scroll" in self.functions:
163 |             await self._call_function(self.functions["scroll"], x, y, scroll_x, scroll_y)
164 |         # No-op if not implemented
165 | 
166 |     async def type(self, text: str) -> None:
167 |         """Type text."""
168 |         if "type" in self.functions:
169 |             await self._call_function(self.functions["type"], text)
170 |         # No-op if not implemented
171 | 
172 |     async def wait(self, ms: int = 1000) -> None:
173 |         """Wait for specified milliseconds."""
174 |         if "wait" in self.functions:
175 |             await self._call_function(self.functions["wait"], ms)
176 |         else:
177 |             # Default implementation
178 |             import asyncio
179 | 
180 |             await asyncio.sleep(ms / 1000.0)
181 | 
182 |     async def move(self, x: int, y: int) -> None:
183 |         """Move cursor to coordinates."""
184 |         if "move" in self.functions:
185 |             await self._call_function(self.functions["move"], x, y)
186 |         # No-op if not implemented
187 | 
188 |     async def keypress(self, keys: Union[List[str], str]) -> None:
189 |         """Press key combination."""
190 |         if "keypress" in self.functions:
191 |             await self._call_function(self.functions["keypress"], keys)
192 |         # No-op if not implemented
193 | 
194 |     async def drag(self, path: List[Dict[str, int]]) -> None:
195 |         """Drag along specified path."""
196 |         if "drag" in self.functions:
197 |             await self._call_function(self.functions["drag"], path)
198 |         # No-op if not implemented
199 | 
200 |     async def get_current_url(self) -> str:
201 |         """Get current URL (for browser environments)."""
202 |         if "get_current_url" in self.functions:
203 |             return await self._get_value("current_url")  # type: ignore
204 |         return ""  # Default fallback
205 | 
206 |     async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
207 |         """Left mouse down at coordinates."""
208 |         if "left_mouse_down" in self.functions:
209 |             await self._call_function(self.functions["left_mouse_down"], x, y)
210 |         # No-op if not implemented
211 | 
212 |     async def left_mouse_up(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
213 |         """Left mouse up at coordinates."""
214 |         if "left_mouse_up" in self.functions:
215 |             await self._call_function(self.functions["left_mouse_up"], x, y)
216 |         # No-op if not implemented
217 | 
```

--------------------------------------------------------------------------------
/libs/typescript/core/src/telemetry/clients/posthog.ts:
--------------------------------------------------------------------------------

```typescript
  1 | /**
  2 |  * Telemetry client using PostHog for collecting anonymous usage data.
  3 |  */
  4 | 
  5 | import * as fs from 'node:fs';
  6 | import * as os from 'node:os';
  7 | import * as path from 'node:path';
  8 | import { pino } from 'pino';
  9 | import { PostHog } from 'posthog-node';
 10 | import { v4 as uuidv4 } from 'uuid';
 11 | 
 12 | // Controls how frequently telemetry will be sent (percentage)
 13 | export const TELEMETRY_SAMPLE_RATE = 100; // 100% sampling rate
 14 | 
 15 | // Public PostHog config for anonymous telemetry
 16 | // These values are intentionally public and meant for anonymous telemetry only
 17 | // https://posthog.com/docs/product-analytics/troubleshooting#is-it-ok-for-my-api-key-to-be-exposed-and-public
 18 | export const PUBLIC_POSTHOG_API_KEY = 'phc_eSkLnbLxsnYFaXksif1ksbrNzYlJShr35miFLDppF14';
 19 | export const PUBLIC_POSTHOG_HOST = 'https://eu.i.posthog.com';
 20 | 
 21 | export class PostHogTelemetryClient {
 22 |   private config: {
 23 |     enabled: boolean;
 24 |     sampleRate: number;
 25 |     posthog: { apiKey: string; host: string };
 26 |   };
 27 |   private installationId: string;
 28 |   private initialized = false;
 29 |   private queuedEvents: {
 30 |     name: string;
 31 |     properties: Record<string, unknown>;
 32 |     timestamp: number;
 33 |   }[] = [];
 34 |   private startTime: number; // seconds
 35 |   private posthogClient?: PostHog;
 36 |   private counters: Record<string, number> = {};
 37 | 
 38 |   private logger = pino({ name: 'core.telemetry' });
 39 | 
 40 |   constructor() {
 41 |     // set up config
 42 |     this.config = {
 43 |       enabled: true,
 44 |       sampleRate: TELEMETRY_SAMPLE_RATE,
 45 |       posthog: { apiKey: PUBLIC_POSTHOG_API_KEY, host: PUBLIC_POSTHOG_HOST },
 46 |     };
 47 |     // Check CUA_TELEMETRY_ENABLED environment variable (defaults to enabled)
 48 |     const telemetryEnabled = ['1', 'true', 'yes', 'on'].includes(
 49 |       process.env.CUA_TELEMETRY_ENABLED?.toLowerCase() || 'true'
 50 |     );
 51 | 
 52 |     this.config.enabled = telemetryEnabled;
 53 |     this.config.sampleRate = Number.parseFloat(
 54 |       process.env.CUA_TELEMETRY_SAMPLE_RATE || String(TELEMETRY_SAMPLE_RATE)
 55 |     );
 56 |     // init client
 57 |     this.installationId = this._getOrCreateInstallationId();
 58 |     this.startTime = Date.now() / 1000; // Convert to seconds
 59 | 
 60 |     // Log telemetry status on startup
 61 |     if (this.config.enabled) {
 62 |       this.logger.info(`Telemetry enabled (sampling at ${this.config.sampleRate}%)`);
 63 |       // Initialize PostHog client if config is available
 64 |       this._initializePosthog();
 65 |     } else {
 66 |       this.logger.info('Telemetry disabled');
 67 |     }
 68 |   }
 69 | 
 70 |   /**
 71 |    * Get or create a random installation ID.
 72 |    * This ID is not tied to any personal information.
 73 |    */
 74 |   private _getOrCreateInstallationId(): string {
 75 |     const homeDir = os.homedir();
 76 |     const idFile = path.join(homeDir, '.cua', 'installation_id');
 77 | 
 78 |     try {
 79 |       if (fs.existsSync(idFile)) {
 80 |         return fs.readFileSync(idFile, 'utf-8').trim();
 81 |       }
 82 |     } catch (error) {
 83 |       this.logger.debug(`Failed to read installation ID: ${error}`);
 84 |     }
 85 | 
 86 |     // Create new ID if not exists
 87 |     const newId = uuidv4();
 88 |     try {
 89 |       const dir = path.dirname(idFile);
 90 |       if (!fs.existsSync(dir)) {
 91 |         fs.mkdirSync(dir, { recursive: true });
 92 |       }
 93 |       fs.writeFileSync(idFile, newId);
 94 |       return newId;
 95 |     } catch (error) {
 96 |       this.logger.debug(`Failed to write installation ID: ${error}`);
 97 |     }
 98 | 
 99 |     // Fallback to in-memory ID if file operations fail
100 |     return newId;
101 |   }
102 | 
103 |   /**
104 |    * Initialize the PostHog client with configuration.
105 |    */
106 |   private _initializePosthog(): boolean {
107 |     if (this.initialized) {
108 |       return true;
109 |     }
110 | 
111 |     try {
112 |       this.posthogClient = new PostHog(this.config.posthog.apiKey, {
113 |         host: this.config.posthog.host,
114 |         flushAt: 20, // Number of events to batch before sending
115 |         flushInterval: 30000, // Send events every 30 seconds
116 |       });
117 |       this.initialized = true;
118 |       this.logger.debug('PostHog client initialized successfully');
119 | 
120 |       // Process any queued events
121 |       this._processQueuedEvents();
122 |       return true;
123 |     } catch (error) {
124 |       this.logger.error(`Failed to initialize PostHog client: ${error}`);
125 |       return false;
126 |     }
127 |   }
128 | 
129 |   /**
130 |    * Process any events that were queued before initialization.
131 |    */
132 |   private _processQueuedEvents(): void {
133 |     if (!this.posthogClient || this.queuedEvents.length === 0) {
134 |       return;
135 |     }
136 | 
137 |     for (const event of this.queuedEvents) {
138 |       this._captureEvent(event.name, event.properties);
139 |     }
140 |     this.queuedEvents = [];
141 |   }
142 | 
143 |   /**
144 |    * Capture an event with PostHog.
145 |    */
146 |   private _captureEvent(eventName: string, properties?: Record<string, unknown>): void {
147 |     if (!this.posthogClient) {
148 |       return;
149 |     }
150 | 
151 |     try {
152 |       // Add standard properties
153 |       const eventProperties = {
154 |         ...properties,
155 |         version: process.env.npm_package_version || 'unknown',
156 |         platform: process.platform,
157 |         node_version: process.version,
158 |         is_ci: this._isCI,
159 |       };
160 | 
161 |       this.posthogClient.capture({
162 |         distinctId: this.installationId,
163 |         event: eventName,
164 |         properties: eventProperties,
165 |       });
166 |     } catch (error) {
167 |       this.logger.debug(`Failed to capture event: ${error}`);
168 |     }
169 |   }
170 | 
171 |   private get _isCI(): boolean {
172 |     /**
173 |      * Detect if running in CI environment.
174 |      */
175 |     return !!(
176 |       process.env.CI ||
177 |       process.env.CONTINUOUS_INTEGRATION ||
178 |       process.env.GITHUB_ACTIONS ||
179 |       process.env.GITLAB_CI ||
180 |       process.env.CIRCLECI ||
181 |       process.env.TRAVIS ||
182 |       process.env.JENKINS_URL
183 |     );
184 |   }
185 | 
186 |   increment(counterName: string, value = 1) {
187 |     /**
188 |      * Increment a named counter.
189 |      */
190 |     if (!this.config.enabled) {
191 |       return;
192 |     }
193 | 
194 |     if (!(counterName in this.counters)) {
195 |       this.counters[counterName] = 0;
196 |     }
197 |     this.counters[counterName] += value;
198 |   }
199 | 
200 |   recordEvent(eventName: string, properties?: Record<string, unknown>): void {
201 |     /**
202 |      * Record an event with optional properties.
203 |      */
204 |     if (!this.config.enabled) {
205 |       return;
206 |     }
207 | 
208 |     // Increment counter for this event type
209 |     const counterKey = `event:${eventName}`;
210 |     this.increment(counterKey);
211 | 
212 |     // Apply sampling
213 |     if (Math.random() * 100 > this.config.sampleRate) {
214 |       return;
215 |     }
216 | 
217 |     const event = {
218 |       name: eventName,
219 |       properties: properties || {},
220 |       timestamp: Date.now() / 1000,
221 |     };
222 | 
223 |     if (this.initialized && this.posthogClient) {
224 |       this._captureEvent(eventName, properties);
225 |     } else {
226 |       // Queue event if not initialized
227 |       this.queuedEvents.push(event);
228 |       // Try to initialize again
229 |       if (this.config.enabled && !this.initialized) {
230 |         this._initializePosthog();
231 |       }
232 |     }
233 |   }
234 | 
235 |   /**
236 |    * Flush any pending events to PostHog.
237 |    */
238 |   async flush(): Promise<boolean> {
239 |     if (!this.config.enabled || !this.posthogClient) {
240 |       return false;
241 |     }
242 | 
243 |     try {
244 |       // Send counter data as a single event
245 |       if (Object.keys(this.counters).length > 0) {
246 |         this._captureEvent('telemetry_counters', {
247 |           counters: { ...this.counters },
248 |           duration: Date.now() / 1000 - this.startTime,
249 |         });
250 |       }
251 | 
252 |       await this.posthogClient.flush();
253 |       this.logger.debug('Telemetry flushed successfully');
254 | 
255 |       // Clear counters after sending
256 |       this.counters = {};
257 |       return true;
258 |     } catch (error) {
259 |       this.logger.debug(`Failed to flush telemetry: ${error}`);
260 |       return false;
261 |     }
262 |   }
263 | 
264 |   enable(): void {
265 |     /**
266 |      * Enable telemetry collection.
267 |      */
268 |     this.config.enabled = true;
269 |     this.logger.info('Telemetry enabled');
270 |     if (!this.initialized) {
271 |       this._initializePosthog();
272 |     }
273 |   }
274 | 
275 |   async disable(): Promise<void> {
276 |     /**
277 |      * Disable telemetry collection.
278 |      */
279 |     this.config.enabled = false;
280 |     await this.posthogClient?.disable();
281 |     this.logger.info('Telemetry disabled');
282 |   }
283 | 
284 |   get enabled(): boolean {
285 |     /**
286 |      * Check if telemetry is enabled.
287 |      */
288 |     return this.config.enabled;
289 |   }
290 | 
291 |   async shutdown(): Promise<void> {
292 |     /**
293 |      * Shutdown the telemetry client and flush any pending events.
294 |      */
295 |     if (this.posthogClient) {
296 |       await this.flush();
297 |       await this.posthogClient.shutdown();
298 |       this.initialized = false;
299 |       this.posthogClient = undefined;
300 |     }
301 |   }
302 | }
303 | 
```

--------------------------------------------------------------------------------
/docs/src/components/editable-code-block.tsx:
--------------------------------------------------------------------------------

```typescript
  1 | 'use client';
  2 | 
  3 | import React, { createContext, useContext, useState, ReactNode } from 'react';
  4 | import * as Base from 'fumadocs-ui/components/codeblock';
  5 | import { cn } from 'fumadocs-ui/utils/cn';
  6 | 
  7 | /**
  8 |  * Context for managing editable values within code blocks
  9 |  */
 10 | interface EditableCodeContextValue {
 11 |   values: Record<string, string>;
 12 |   updateValue: (key: string, value: string) => void;
 13 | }
 14 | 
 15 | const EditableCodeContext = createContext<EditableCodeContextValue | null>(null);
 16 | 
 17 | /**
 18 |  * Hook to access the editable code context
 19 |  */
 20 | function useEditableCode() {
 21 |   const context = useContext(EditableCodeContext);
 22 |   if (!context) {
 23 |     throw new Error('useEditableCode must be used within EditableCodeBlock');
 24 |   }
 25 |   return context;
 26 | }
 27 | 
 28 | /**
 29 |  * Props for EditableCodeBlock component
 30 |  */
 31 | interface EditableCodeBlockProps {
 32 |   /** Programming language for styling */
 33 |   lang?: string;
 34 |   /** Initial values for placeholders */
 35 |   defaultValues?: Record<string, string>;
 36 |   /** Code content with embedded EditableValue components */
 37 |   children: ReactNode;
 38 |   /** Additional CSS classes */
 39 |   className?: string;
 40 |   /** Title for the code block */
 41 |   title?: string;
 42 | }
 43 | 
 44 | /**
 45 |  * Code block component that supports inline editable values
 46 |  * Uses fumadocs-ui styling with interactive input fields
 47 |  */
 48 | export function EditableCodeBlock({
 49 |   lang = 'python',
 50 |   defaultValues = {},
 51 |   children,
 52 |   className,
 53 |   title,
 54 | }: EditableCodeBlockProps) {
 55 |   const [values, setValues] = useState<Record<string, string>>(defaultValues);
 56 | 
 57 |   const updateValue = (key: string, value: string) => {
 58 |     setValues((prev) => ({ ...prev, [key]: value }));
 59 |   };
 60 | 
 61 |   return (
 62 |     <EditableCodeContext.Provider value={{ values, updateValue }}>
 63 |       <Base.CodeBlock title={title} className={cn('my-4', className)}>
 64 |         <Base.Pre className={cn(`language-${lang}`, 'px-3')}>
 65 |           <code
 66 |             className={cn(`language-${lang}`)}
 67 |             style={{ display: 'block', whiteSpace: 'pre-wrap' }}
 68 |           >
 69 |             {children}
 70 |           </code>
 71 |         </Base.Pre>
 72 |       </Base.CodeBlock>
 73 |     </EditableCodeContext.Provider>
 74 |   );
 75 | }
 76 | 
 77 | /**
 78 |  * Props for EditableValue component
 79 |  */
 80 | interface EditableValueProps {
 81 |   /** Unique identifier for this value */
 82 |   placeholder: string;
 83 |   /** Display width in characters (default: auto) */
 84 |   width?: number;
 85 |   /** Optional default value */
 86 |   defaultValue?: string;
 87 |   /** Input type */
 88 |   type?: 'text' | 'password';
 89 | }
 90 | 
 91 | /**
 92 |  * Inline editable input that blends with code styling
 93 |  * Appears as an underlined, hoverable value within code
 94 |  */
 95 | export function EditableValue({
 96 |   placeholder,
 97 |   width: explicitWidth,
 98 |   defaultValue = '',
 99 |   type = 'text',
100 | }: EditableValueProps) {
101 |   const { values, updateValue } = useEditableCode();
102 |   const value = values[placeholder] ?? defaultValue;
103 |   const spanRef = React.useRef<HTMLSpanElement>(null);
104 |   const placeholderSpanRef = React.useRef<HTMLSpanElement>(null);
105 |   const inputRef = React.useRef<HTMLInputElement>(null);
106 |   const [measuredWidth, setMeasuredWidth] = React.useState(0);
107 |   const [placeholderWidth, setPlaceholderWidth] = React.useState(0);
108 |   const [isHovered, setIsHovered] = React.useState(false);
109 |   const [tooltipPosition, setTooltipPosition] = React.useState({ top: 0, left: 0 });
110 |   const [isVisible, setIsVisible] = React.useState(false);
111 | 
112 |   // Observe visibility changes to trigger remeasurement
113 |   React.useEffect(() => {
114 |     if (!inputRef.current) return;
115 | 
116 |     const observer = new IntersectionObserver(
117 |       (entries) => {
118 |         entries.forEach((entry) => {
119 |           setIsVisible(entry.isIntersecting);
120 |         });
121 |       },
122 |       { threshold: 0.01 }
123 |     );
124 | 
125 |     observer.observe(inputRef.current);
126 | 
127 |     return () => {
128 |       observer.disconnect();
129 |     };
130 |   }, []);
131 | 
132 |   // Measure the actual text width using a hidden span
133 |   React.useEffect(() => {
134 |     if (spanRef.current && isVisible) {
135 |       setMeasuredWidth(spanRef.current.offsetWidth);
136 |     }
137 |   }, [value, isVisible]);
138 | 
139 |   // Measure placeholder width when visible
140 |   React.useEffect(() => {
141 |     if (placeholderSpanRef.current && isVisible) {
142 |       setPlaceholderWidth(placeholderSpanRef.current.offsetWidth);
143 |     }
144 |   }, [placeholder, isVisible]);
145 | 
146 |   // Update tooltip position when hovered
147 |   React.useEffect(() => {
148 |     if (isHovered && inputRef.current) {
149 |       const rect = inputRef.current.getBoundingClientRect();
150 |       setTooltipPosition({
151 |         top: rect.top - 28,
152 |         left: rect.left + rect.width / 2,
153 |       });
154 |     }
155 |   }, [isHovered]);
156 | 
157 |   const inputWidth = explicitWidth
158 |     ? `${explicitWidth}ch`
159 |     : `${Math.max(placeholderWidth, measuredWidth, 80)}px`;
160 | 
161 |   return (
162 |     <span
163 |       style={{ display: 'inline', whiteSpace: 'nowrap', position: 'relative' }}
164 |       onMouseEnter={() => setIsHovered(true)}
165 |       onMouseLeave={() => setIsHovered(false)}
166 |     >
167 |       {/* Hidden span to measure current value width */}
168 |       <span
169 |         ref={spanRef}
170 |         style={{
171 |           position: 'absolute',
172 |           visibility: 'hidden',
173 |           whiteSpace: 'pre',
174 |           fontFamily: 'inherit',
175 |           pointerEvents: 'none',
176 |         }}
177 |         aria-hidden="true"
178 |       >
179 |         {value}
180 |       </span>
181 | 
182 |       {/* Hidden span to measure placeholder width */}
183 |       <span
184 |         ref={placeholderSpanRef}
185 |         style={{
186 |           position: 'absolute',
187 |           visibility: 'hidden',
188 |           whiteSpace: 'pre',
189 |           fontFamily: 'inherit',
190 |           pointerEvents: 'none',
191 |         }}
192 |         aria-hidden="true"
193 |       >
194 |         {placeholder}
195 |       </span>
196 | 
197 |       {/* Tooltip */}
198 |       <span
199 |         style={{
200 |           position: 'fixed',
201 |           top: tooltipPosition.top,
202 |           left: tooltipPosition.left,
203 |           transform: 'translateX(-50%)',
204 |           padding: '4px 8px',
205 |           backgroundColor: 'rgba(0, 0, 0, 0.8)',
206 |           color: 'white',
207 |           fontSize: '12px',
208 |           borderRadius: '4px',
209 |           whiteSpace: 'nowrap',
210 |           pointerEvents: 'none',
211 |           opacity: isHovered ? 1 : 0,
212 |           transition: 'opacity 0.2s ease-in-out',
213 |           zIndex: 9999,
214 |         }}
215 |       >
216 |         Edit me!
217 |       </span>
218 | 
219 |       <input
220 |         ref={inputRef}
221 |         type={type}
222 |         value={value}
223 |         onChange={(e) => updateValue(placeholder, e.target.value)}
224 |         placeholder={placeholder}
225 |         className={cn(type === 'password' && value && 'text-security-disc')}
226 |         style={{
227 |           display: 'inline',
228 |           width: inputWidth,
229 |           verticalAlign: 'baseline',
230 |           lineHeight: 'inherit',
231 |           fontSize: 'inherit',
232 |           fontFamily: 'inherit',
233 |           height: 'auto',
234 |           padding: 0,
235 |           margin: 0,
236 |           background: 'transparent',
237 |           border: 'none',
238 |           borderBottom: '2px dashed rgba(96, 165, 250, 0.5)',
239 |           outline: 'none',
240 |           color: 'inherit',
241 |           transition: 'border-bottom-color 0.2s ease-in-out',
242 |         }}
243 |       />
244 |     </span>
245 |   );
246 | }
247 | 
248 | /**
249 |  * Container for form inputs outside the code block
250 |  */
251 | export function EditableForm({
252 |   children,
253 |   className = '',
254 | }: {
255 |   children: ReactNode;
256 |   className?: string;
257 | }) {
258 |   return (
259 |     <div
260 |       className={cn(
261 |         'p-4 border rounded-lg bg-fd-secondary/50 dark:bg-fd-secondary/30 mb-6',
262 |         className
263 |       )}
264 |     >
265 |       <h3 className="text-lg font-semibold mb-4">Configuration</h3>
266 |       {children}
267 |     </div>
268 |   );
269 | }
270 | 
271 | /**
272 |  * Form input for editing values outside code block
273 |  */
274 | interface EditableInputProps {
275 |   /** Placeholder key to bind to */
276 |   placeholder: string;
277 |   /** Label text */
278 |   label: string;
279 |   /** Input type */
280 |   type?: 'text' | 'email' | 'password';
281 |   /** Custom class name */
282 |   className?: string;
283 | }
284 | 
285 | export function EditableInput({
286 |   placeholder,
287 |   label,
288 |   type = 'text',
289 |   className = '',
290 | }: EditableInputProps) {
291 |   const { values, updateValue } = useEditableCode();
292 |   const value = values[placeholder] || '';
293 | 
294 |   return (
295 |     <div className={cn('mb-4', className)}>
296 |       <label className="block text-sm font-medium mb-2">{label}</label>
297 |       <input
298 |         type={type}
299 |         value={value}
300 |         onChange={(e) => updateValue(placeholder, e.target.value)}
301 |         placeholder={placeholder}
302 |         className={cn(
303 |           'w-full px-3 py-2 border rounded-md',
304 |           'focus:outline-none focus:ring-2 focus:ring-blue-500',
305 |           'bg-fd-background border-fd-border'
306 |         )}
307 |       />
308 |     </div>
309 |   );
310 | }
311 | 
```

--------------------------------------------------------------------------------
/tests/test_tracing.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Tests for Computer.tracing functionality.
  3 | """
  4 | 
  5 | import asyncio
  6 | import json
  7 | import tempfile
  8 | from pathlib import Path
  9 | 
 10 | import pytest
 11 | from computer.tracing import ComputerTracing
 12 | 
 13 | 
 14 | class MockComputer:
 15 |     """Mock computer for testing tracing functionality."""
 16 | 
 17 |     def __init__(self):
 18 |         self.os_type = "macos"
 19 |         self.provider_type = "lume"
 20 |         self.image = "test-image"
 21 |         self.interface = MockInterface()
 22 |         self.logger = MockLogger()
 23 | 
 24 | 
 25 | class MockInterface:
 26 |     """Mock interface for testing."""
 27 | 
 28 |     async def screenshot(self):
 29 |         """Return mock screenshot data."""
 30 |         return b"mock_screenshot_data"
 31 | 
 32 |     async def get_accessibility_tree(self):
 33 |         """Return mock accessibility tree."""
 34 |         return {"type": "window", "children": []}
 35 | 
 36 | 
 37 | class MockLogger:
 38 |     """Mock logger for testing."""
 39 | 
 40 |     def warning(self, message):
 41 |         print(f"Warning: {message}")
 42 | 
 43 | 
 44 | @pytest.mark.asyncio
 45 | async def test_tracing_start_stop():
 46 |     """Test basic start and stop functionality."""
 47 |     computer = MockComputer()
 48 |     tracing = ComputerTracing(computer)
 49 | 
 50 |     # Test initial state
 51 |     assert not tracing.is_tracing
 52 | 
 53 |     # Start tracing
 54 |     with tempfile.TemporaryDirectory() as temp_dir:
 55 |         await tracing.start({"screenshots": True, "api_calls": True, "path": temp_dir})
 56 | 
 57 |         # Test tracing is active
 58 |         assert tracing.is_tracing
 59 | 
 60 |         # Stop tracing
 61 |         trace_path = await tracing.stop({"format": "dir"})
 62 | 
 63 |         # Test tracing is stopped
 64 |         assert not tracing.is_tracing
 65 | 
 66 |         # Verify trace directory exists
 67 |         assert Path(trace_path).exists()
 68 | 
 69 |         # Verify metadata file exists
 70 |         metadata_file = Path(trace_path) / "trace_metadata.json"
 71 |         assert metadata_file.exists()
 72 | 
 73 |         # Verify metadata content
 74 |         with open(metadata_file) as f:
 75 |             metadata = json.load(f)
 76 |             assert "trace_id" in metadata
 77 |             assert "config" in metadata
 78 |             assert "start_time" in metadata
 79 |             assert "end_time" in metadata
 80 | 
 81 | 
 82 | @pytest.mark.asyncio
 83 | async def test_tracing_api_call_recording():
 84 |     """Test API call recording functionality."""
 85 |     computer = MockComputer()
 86 |     tracing = ComputerTracing(computer)
 87 | 
 88 |     with tempfile.TemporaryDirectory() as temp_dir:
 89 |         await tracing.start({"api_calls": True, "screenshots": False, "path": temp_dir})
 90 | 
 91 |         # Record an API call
 92 |         await tracing.record_api_call("left_click", {"x": 100, "y": 200}, result=None, error=None)
 93 | 
 94 |         # Record another API call with error
 95 |         test_error = Exception("Test error")
 96 |         await tracing.record_api_call("type_text", {"text": "test"}, result=None, error=test_error)
 97 | 
 98 |         trace_path = await tracing.stop({"format": "dir"})
 99 | 
100 |         # Verify event files were created
101 |         trace_dir = Path(trace_path)
102 |         event_files = list(trace_dir.glob("event_*_api_call.json"))
103 |         assert len(event_files) >= 2
104 | 
105 |         # Verify event content
106 |         with open(event_files[0]) as f:
107 |             event = json.load(f)
108 |             assert event["type"] == "api_call"
109 |             assert event["data"]["method"] == "left_click"
110 |             assert event["data"]["success"] is True
111 | 
112 | 
113 | @pytest.mark.asyncio
114 | async def test_tracing_metadata():
115 |     """Test metadata recording functionality."""
116 |     computer = MockComputer()
117 |     tracing = ComputerTracing(computer)
118 | 
119 |     with tempfile.TemporaryDirectory() as temp_dir:
120 |         await tracing.start({"metadata": True, "path": temp_dir})
121 | 
122 |         # Add custom metadata
123 |         await tracing.add_metadata("test_key", "test_value")
124 |         await tracing.add_metadata("numeric_key", 42)
125 |         await tracing.add_metadata("complex_key", {"nested": "data"})
126 | 
127 |         trace_path = await tracing.stop({"format": "dir"})
128 | 
129 |         # Verify metadata event files
130 |         trace_dir = Path(trace_path)
131 |         metadata_files = list(trace_dir.glob("event_*_metadata.json"))
132 |         assert len(metadata_files) >= 3
133 | 
134 | 
135 | @pytest.mark.asyncio
136 | async def test_tracing_screenshots():
137 |     """Test screenshot recording functionality."""
138 |     computer = MockComputer()
139 |     tracing = ComputerTracing(computer)
140 | 
141 |     with tempfile.TemporaryDirectory() as temp_dir:
142 |         await tracing.start({"screenshots": True, "path": temp_dir})
143 | 
144 |         # Take a screenshot manually
145 |         await tracing._take_screenshot("manual_test")
146 | 
147 |         trace_path = await tracing.stop({"format": "dir"})
148 | 
149 |         # Verify screenshot files
150 |         trace_dir = Path(trace_path)
151 |         screenshot_files = list(trace_dir.glob("*.png"))
152 |         assert len(screenshot_files) >= 2  # Initial + manual + final
153 | 
154 | 
155 | @pytest.mark.asyncio
156 | async def test_tracing_config_options():
157 |     """Test different configuration options."""
158 |     computer = MockComputer()
159 |     tracing = ComputerTracing(computer)
160 | 
161 |     # Test with minimal config
162 |     with tempfile.TemporaryDirectory() as temp_dir:
163 |         await tracing.start(
164 |             {"screenshots": False, "api_calls": False, "metadata": False, "path": temp_dir}
165 |         )
166 | 
167 |         await tracing.record_api_call("test_call", {})
168 |         await tracing.add_metadata("test", "value")
169 | 
170 |         trace_path = await tracing.stop({"format": "dir"})
171 | 
172 |         # With everything disabled, should only have basic trace events
173 |         trace_dir = Path(trace_path)
174 |         event_files = list(trace_dir.glob("event_*.json"))
175 |         # Should have trace_start and trace_end events only
176 |         assert len(event_files) == 2
177 | 
178 | 
179 | @pytest.mark.asyncio
180 | async def test_tracing_zip_output():
181 |     """Test zip file output format."""
182 |     computer = MockComputer()
183 |     tracing = ComputerTracing(computer)
184 | 
185 |     with tempfile.TemporaryDirectory() as temp_dir:
186 |         await tracing.start({"screenshots": True, "api_calls": True, "path": temp_dir})
187 | 
188 |         await tracing.record_api_call("test_call", {"arg": "value"})
189 | 
190 |         # Stop with zip format
191 |         trace_path = await tracing.stop({"format": "zip"})
192 | 
193 |         # Verify zip file exists
194 |         assert Path(trace_path).exists()
195 |         assert trace_path.endswith(".zip")
196 | 
197 | 
198 | @pytest.mark.asyncio
199 | async def test_tracing_accessibility_tree():
200 |     """Test accessibility tree recording."""
201 |     computer = MockComputer()
202 |     tracing = ComputerTracing(computer)
203 | 
204 |     with tempfile.TemporaryDirectory() as temp_dir:
205 |         await tracing.start({"accessibility_tree": True, "path": temp_dir})
206 | 
207 |         # Record accessibility tree
208 |         await tracing.record_accessibility_tree()
209 | 
210 |         trace_path = await tracing.stop({"format": "dir"})
211 | 
212 |         # Verify accessibility tree event
213 |         trace_dir = Path(trace_path)
214 |         tree_files = list(trace_dir.glob("event_*_accessibility_tree.json"))
215 |         assert len(tree_files) >= 1
216 | 
217 |         # Verify content
218 |         with open(tree_files[0]) as f:
219 |             event = json.load(f)
220 |             assert event["type"] == "accessibility_tree"
221 |             assert "tree" in event["data"]
222 | 
223 | 
224 | def test_tracing_errors():
225 |     """Test error handling in tracing."""
226 |     computer = MockComputer()
227 |     tracing = ComputerTracing(computer)
228 | 
229 |     # Test stop without start
230 |     with pytest.raises(RuntimeError, match="Tracing is not active"):
231 |         asyncio.run(tracing.stop())
232 | 
233 |     # Test start when already started
234 |     async def test_double_start():
235 |         await tracing.start()
236 |         with pytest.raises(RuntimeError, match="Tracing is already active"):
237 |             await tracing.start()
238 |         await tracing.stop()
239 | 
240 |     asyncio.run(test_double_start())
241 | 
242 | 
243 | if __name__ == "__main__":
244 |     # Run tests directly
245 |     import sys
246 | 
247 |     async def run_tests():
248 |         """Run all tests manually."""
249 |         tests = [
250 |             test_tracing_start_stop,
251 |             test_tracing_api_call_recording,
252 |             test_tracing_metadata,
253 |             test_tracing_screenshots,
254 |             test_tracing_config_options,
255 |             test_tracing_zip_output,
256 |             test_tracing_accessibility_tree,
257 |         ]
258 | 
259 |         print("Running Computer.tracing tests...")
260 | 
261 |         for test in tests:
262 |             try:
263 |                 await test()
264 |                 print(f"✓ {test.__name__}")
265 |             except Exception as e:
266 |                 print(f"✗ {test.__name__}: {e}")
267 | 
268 |         # Run sync tests
269 |         try:
270 |             test_tracing_errors()
271 |             print("✓ test_tracing_errors")
272 |         except Exception as e:
273 |             print(f"✗ test_tracing_errors: {e}")
274 | 
275 |         print("Tests completed!")
276 | 
277 |     asyncio.run(run_tests())
278 | 
```

--------------------------------------------------------------------------------
/docs/content/docs/cli-playbook/commands.mdx:
--------------------------------------------------------------------------------

```markdown
  1 | ---
  2 | title: Command Reference
  3 | description: Complete reference for all CUA CLI commands
  4 | ---
  5 | 
  6 | import { Tabs, Tab } from 'fumadocs-ui/components/tabs';
  7 | import { Callout } from 'fumadocs-ui/components/callout';
  8 | 
  9 | ## Overview
 10 | 
 11 | The CUA CLI provides commands for authentication and sandbox management.
 12 | 
 13 | ### Command Styles
 14 | 
 15 | The CLI supports **two command styles** for flexibility:
 16 | 
 17 | **Flat style** (quick & concise):
 18 | 
 19 | ```bash
 20 | cua list
 21 | cua create --os linux --size small --region north-america
 22 | cua start my-sandbox
 23 | ```
 24 | 
 25 | **Grouped style** (explicit & clear):
 26 | 
 27 | ```bash
 28 | cua sb list         # or: cua sandbox list
 29 | cua sb create       # or: cua sandbox create
 30 | cua sb start        # or: cua sandbox start
 31 | ```
 32 | 
 33 | Both styles work identically - use whichever you prefer!
 34 | 
 35 | ### Available Commands
 36 | 
 37 | - **Authentication** - `cua auth login`, `cua auth env`, `cua auth logout` (also available as flat commands: `cua login`, `cua env`, `cua logout`)
 38 | - **Sandbox Management** - `cua list`, `cua create`, `cua get`, `cua start`, `cua stop`, `cua restart`, `cua delete`, `cua vnc`
 39 | 
 40 | ## Authentication Commands
 41 | 
 42 | ### `cua auth login`
 43 | 
 44 | Authenticate with your CUA account using browser-based OAuth flow.
 45 | 
 46 | ```bash
 47 | # Interactive browser login
 48 | cua auth login
 49 | 
 50 | # Direct API key login
 51 | cua auth login --api-key sk-your-api-key-here
 52 | 
 53 | # Alternative flat style
 54 | cua login
 55 | cua login --api-key sk-your-api-key-here
 56 | ```
 57 | 
 58 | **Options:**
 59 | 
 60 | - `--api-key <key>` - Provide API key directly instead of browser flow
 61 | 
 62 | **Example:**
 63 | 
 64 | ```bash
 65 | $ cua auth login
 66 | Opening browser for CLI auth...
 67 | API key saved
 68 | ```
 69 | 
 70 | ### `cua auth env`
 71 | 
 72 | Create or update a `.env` file in the current directory with your CUA API key.
 73 | 
 74 | ```bash
 75 | cua auth env
 76 | 
 77 | # Alternative flat style
 78 | cua env
 79 | ```
 80 | 
 81 | **Example:**
 82 | 
 83 | ```bash
 84 | $ cua auth env
 85 | Wrote /path/to/your/project/.env
 86 | ```
 87 | 
 88 | The generated `.env` file will contain:
 89 | 
 90 | ```
 91 | CUA_API_KEY=sk-your-api-key-here
 92 | ```
 93 | 
 94 | ### `cua auth logout`
 95 | 
 96 | Remove the stored API key from your system.
 97 | 
 98 | ```bash
 99 | cua auth logout
100 | 
101 | # Alternative flat style
102 | cua logout
103 | ```
104 | 
105 | **Example:**
106 | 
107 | ```bash
108 | $ cua auth logout
109 | Logged out
110 | ```
111 | 
112 | ## Sandbox Commands
113 | 
114 | ### `cua list`
115 | 
116 | List all your sandboxes with their current status. Passwords are hidden by default for security.
117 | 
118 | ```bash
119 | # List sandboxes (passwords hidden)
120 | cua list
121 | 
122 | # Show passwords explicitly
123 | cua list --show-passwords
124 | 
125 | # Alternative aliases
126 | cua ls
127 | cua ps
128 | ```
129 | 
130 | **Example Output (default, passwords hidden):**
131 | 
132 | ```
133 | NAME              STATUS    HOST
134 | my-dev-sandbox    running   my-dev-sandbox.sandbox.cua.ai
135 | test-windows      stopped   test-windows.sandbox.cua.ai
136 | ```
137 | 
138 | **Example Output (with --show-passwords):**
139 | 
140 | ```
141 | NAME              STATUS    PASSWORD           HOST
142 | my-dev-sandbox    running   secure-pass-123    my-dev-sandbox.sandbox.cua.ai
143 | test-windows      stopped   another-pass-456   test-windows.sandbox.cua.ai
144 | ```
145 | 
146 | ### `cua create`
147 | 
148 | Create a new sandbox.
149 | 
150 | ```bash
151 | cua create --os <OS> --size <SIZE> --region <REGION>
152 | ```
153 | 
154 | **Required Options:**
155 | 
156 | - `--os` - Operating system: `linux`, `windows`, `macos`
157 | - `--size` - Sandbox size: `small`, `medium`, `large`
158 | - `--region` - Region: `north-america`, `europe`, `asia-pacific`, `south-america`
159 | 
160 | **Examples:**
161 | 
162 | ```bash
163 | # Create a small Linux sandbox in North America
164 | cua create --os linux --size small --region north-america
165 | 
166 | # Create a medium Windows sandbox in Europe
167 | cua create --os windows --size medium --region europe
168 | 
169 | # Create a large macOS sandbox in Asia Pacific
170 | cua create --os macos --size large --region asia-pacific
171 | ```
172 | 
173 | **Response Types:**
174 | 
175 | **Immediate (Status 200):**
176 | 
177 | ```bash
178 | Sandbox created and ready: my-new-sandbox-abc123
179 | Password: secure-password-here
180 | Host: my-new-sandbox-abc123.sandbox.cua.ai
181 | ```
182 | 
183 | **Provisioning (Status 202):**
184 | 
185 | ```bash
186 | Sandbox provisioning started: my-new-sandbox-abc123
187 | Job ID: job-xyz789
188 | Use 'cua list' to monitor provisioning progress
189 | ```
190 | 
191 | ### `cua get`
192 | 
193 | Get detailed information about a specific sandbox, including computer-server health status.
194 | 
195 | ```bash
196 | cua get <name>
197 | 
198 | # With additional options
199 | cua get <name> --json
200 | cua get <name> --show-passwords
201 | cua get <name> --show-vnc-url
202 | ```
203 | 
204 | **Options:**
205 | 
206 | - `--json` - Output all details in JSON format
207 | - `--show-passwords` - Include password in output
208 | - `--show-vnc-url` - Include computed NoVNC URL
209 | 
210 | **Example Output (default):**
211 | 
212 | ```bash
213 | $ cua get my-dev-sandbox
214 | Name: my-dev-sandbox
215 | Status: running
216 | Host: my-dev-sandbox.containers.cloud.trycua.com
217 | OS Type: linux
218 | Computer Server Version: 0.1.30
219 | Computer Server Status: healthy
220 | ```
221 | 
222 | **Example Output (with --show-passwords and --show-vnc-url):**
223 | 
224 | ```bash
225 | $ cua get my-dev-sandbox --show-passwords --show-vnc-url
226 | Name: my-dev-sandbox
227 | Status: running
228 | Host: my-dev-sandbox.containers.cloud.trycua.com
229 | Password: secure-pass-123
230 | OS Type: linux
231 | Computer Server Version: 0.1.30
232 | Computer Server Status: healthy
233 | VNC URL: https://my-dev-sandbox.containers.cloud.trycua.com/vnc.html?autoconnect=true&password=secure-pass-123
234 | ```
235 | 
236 | **Example Output (JSON format):**
237 | 
238 | ```bash
239 | $ cua get my-dev-sandbox --json
240 | {
241 |   "name": "my-dev-sandbox",
242 |   "status": "running",
243 |   "host": "my-dev-sandbox.containers.cloud.trycua.com",
244 |   "os_type": "linux",
245 |   "computer_server_version": "0.1.30",
246 |   "computer_server_status": "healthy"
247 | }
248 | ```
249 | 
250 | **Computer Server Health Check:**
251 | 
252 | The `cua get` command automatically probes the computer-server when the sandbox is running:
253 | 
254 | - Checks OS type via `https://{host}:8443/status`
255 | - Checks version via `https://{host}:8443/cmd`
256 | - Shows "Computer Server Status: healthy" when both probes succeed
257 | - Uses a 3-second timeout for each probe
258 | 
259 | <Callout type="info">
260 |   The computer server status is only checked for running sandboxes. Stopped or suspended sandboxes
261 |   will not show computer server information.
262 | </Callout>
263 | 
264 | ### `cua start`
265 | 
266 | Start a stopped sandbox.
267 | 
268 | ```bash
269 | cua start <name>
270 | ```
271 | 
272 | **Example:**
273 | 
274 | ```bash
275 | $ cua start my-dev-sandbox
276 | Start accepted
277 | ```
278 | 
279 | ### `cua stop`
280 | 
281 | Stop a running sandbox.
282 | 
283 | ```bash
284 | cua stop <name>
285 | ```
286 | 
287 | **Example:**
288 | 
289 | ```bash
290 | $ cua stop my-dev-sandbox
291 | stopping
292 | ```
293 | 
294 | ### `cua restart`
295 | 
296 | Restart a sandbox.
297 | 
298 | ```bash
299 | cua restart <name>
300 | ```
301 | 
302 | **Example:**
303 | 
304 | ```bash
305 | $ cua restart my-dev-sandbox
306 | restarting
307 | ```
308 | 
309 | ### `cua delete`
310 | 
311 | Delete a sandbox permanently.
312 | 
313 | ```bash
314 | cua delete <name>
315 | ```
316 | 
317 | **Example:**
318 | 
319 | ```bash
320 | $ cua delete old-test-sandbox
321 | Sandbox deletion initiated: deleting
322 | ```
323 | 
324 | <Callout type="warn">
325 |   This action is irreversible. All data on the sandbox will be permanently lost.
326 | </Callout>
327 | 
328 | ### `cua vnc`
329 | 
330 | Open the VNC interface for a sandbox in your browser.
331 | 
332 | ```bash
333 | cua vnc <name>
334 | 
335 | # Alternative alias
336 | cua open <name>
337 | ```
338 | 
339 | **Example:**
340 | 
341 | ```bash
342 | $ cua vnc my-dev-sandbox
343 | Opening NoVNC: https://my-dev-sandbox.sandbox.cua.ai/vnc.html?autoconnect=true&password=...
344 | ```
345 | 
346 | This command automatically opens your default browser to the VNC interface with the correct password pre-filled.
347 | 
348 | ## Global Options
349 | 
350 | ### Help
351 | 
352 | Get help for any command:
353 | 
354 | ```bash
355 | cua --help
356 | cua auth login --help
357 | cua create --help
358 | cua list --help
359 | ```
360 | 
361 | ## Error Handling
362 | 
363 | The CLI provides clear error messages for common issues:
364 | 
365 | ### Authentication Errors
366 | 
367 | ```bash
368 | $ cua list
369 | Unauthorized. Try 'cua auth login' again.
370 | ```
371 | 
372 | ### Sandbox Not Found
373 | 
374 | ```bash
375 | $ cua start nonexistent-sandbox
376 | Sandbox not found
377 | ```
378 | 
379 | ### Invalid Configuration
380 | 
381 | ```bash
382 | $ cua create --os invalid --configuration small --region north-america
383 | Invalid request or unsupported configuration
384 | ```
385 | 
386 | ## Tips and Best Practices
387 | 
388 | ### 1. Use Descriptive Sandbox Names
389 | 
390 | ```bash
391 | # Good
392 | cua create --os linux --size small --region north-america
393 | # Then rename or use meaningful names in the dashboard
394 | 
395 | # Better workflow
396 | cua list  # Check the generated name
397 | # Use that name consistently
398 | ```
399 | 
400 | ### 2. Environment Management
401 | 
402 | ```bash
403 | # Set up your project with API key
404 | cd my-project
405 | cua auth env
406 | # Now your project has CUA_API_KEY in .env
407 | ```
408 | 
409 | ### 3. Quick Sandbox Access
410 | 
411 | ```bash
412 | # Create aliases for frequently used sandboxes
413 | alias dev-sandbox="cua vnc my-development-sandbox"
414 | alias prod-sandbox="cua vnc my-production-sandbox"
415 | ```
416 | 
417 | ### 4. Monitoring Provisioning
418 | 
419 | ```bash
420 | # For sandboxes that need provisioning time
421 | cua create --os windows --size large --region europe
422 | # Sandbox provisioning started: my-sandbox-abc123
423 | # Job ID: job-xyz789
424 | 
425 | # Check status periodically
426 | watch -n 5 cua list
427 | ```
428 | 
429 | ## Next Steps
430 | 
431 | - [Get started with the quickstart guide](/get-started/quickstart#cli-quickstart)
432 | - [Learn about CUA computers](/computer-sdk/computers)
433 | - [Explore agent automation](/agent-sdk/agent-loops)
434 | 
```

--------------------------------------------------------------------------------
/libs/python/computer/computer/diorama_computer.py:
--------------------------------------------------------------------------------

```python
  1 | import asyncio
  2 | 
  3 | from .interface.models import Key, KeyType
  4 | 
  5 | 
  6 | class DioramaComputer:
  7 |     """
  8 |     A Computer-compatible proxy for Diorama that sends commands over the ComputerInterface.
  9 |     """
 10 | 
 11 |     def __init__(self, computer, apps):
 12 |         """
 13 |         Initialize the DioramaComputer with a computer instance and list of apps.
 14 | 
 15 |         Args:
 16 |             computer: The computer instance to proxy commands through
 17 |             apps: List of applications available in the diorama environment
 18 |         """
 19 |         self.computer = computer
 20 |         self.apps = apps
 21 |         self.interface = DioramaComputerInterface(computer, apps)
 22 |         self._initialized = False
 23 | 
 24 |     async def __aenter__(self):
 25 |         """
 26 |         Async context manager entry point.
 27 | 
 28 |         Returns:
 29 |             self: The DioramaComputer instance
 30 |         """
 31 |         self._initialized = True
 32 |         return self
 33 | 
 34 |     async def run(self):
 35 |         """
 36 |         Initialize and run the DioramaComputer if not already initialized.
 37 | 
 38 |         Returns:
 39 |             self: The DioramaComputer instance
 40 |         """
 41 |         if not self._initialized:
 42 |             await self.__aenter__()
 43 |         return self
 44 | 
 45 | 
 46 | class DioramaComputerInterface:
 47 |     """
 48 |     Diorama Interface proxy that sends diorama_cmds via the Computer's interface.
 49 |     """
 50 | 
 51 |     def __init__(self, computer, apps):
 52 |         """
 53 |         Initialize the DioramaComputerInterface.
 54 | 
 55 |         Args:
 56 |             computer: The computer instance to send commands through
 57 |             apps: List of applications available in the diorama environment
 58 |         """
 59 |         self.computer = computer
 60 |         self.apps = apps
 61 |         self._scene_size = None
 62 | 
 63 |     async def _send_cmd(self, action, arguments=None):
 64 |         """
 65 |         Send a command to the diorama interface through the computer.
 66 | 
 67 |         Args:
 68 |             action (str): The action/command to execute
 69 |             arguments (dict, optional): Additional arguments for the command
 70 | 
 71 |         Returns:
 72 |             The result from the diorama command execution
 73 | 
 74 |         Raises:
 75 |             RuntimeError: If the computer interface is not initialized or command fails
 76 |         """
 77 |         arguments = arguments or {}
 78 |         arguments = {"app_list": self.apps, **arguments}
 79 |         # Use the computer's interface (must be initialized)
 80 |         iface = getattr(self.computer, "_interface", None)
 81 |         if iface is None:
 82 |             raise RuntimeError("Computer interface not initialized. Call run() first.")
 83 |         result = await iface.diorama_cmd(action, arguments)
 84 |         if not result.get("success"):
 85 |             raise RuntimeError(
 86 |                 f"Diorama command failed: {result.get('error')}\n{result.get('trace')}"
 87 |             )
 88 |         return result.get("result")
 89 | 
 90 |     async def screenshot(self, as_bytes=True):
 91 |         """
 92 |         Take a screenshot of the diorama scene.
 93 | 
 94 |         Args:
 95 |             as_bytes (bool): If True, return image as bytes; if False, return PIL Image object
 96 | 
 97 |         Returns:
 98 |             bytes or PIL.Image: Screenshot data in the requested format
 99 |         """
100 |         import base64
101 | 
102 |         from PIL import Image
103 | 
104 |         result = await self._send_cmd("screenshot")
105 |         # assume result is a b64 string of an image
106 |         img_bytes = base64.b64decode(result)
107 |         import io
108 | 
109 |         img = Image.open(io.BytesIO(img_bytes))
110 |         self._scene_size = img.size
111 |         return img_bytes if as_bytes else img
112 | 
113 |     async def get_screen_size(self):
114 |         """
115 |         Get the dimensions of the diorama scene.
116 | 
117 |         Returns:
118 |             dict: Dictionary containing 'width' and 'height' keys with pixel dimensions
119 |         """
120 |         if not self._scene_size:
121 |             await self.screenshot(as_bytes=False)
122 |         return {"width": self._scene_size[0], "height": self._scene_size[1]}
123 | 
124 |     async def move_cursor(self, x, y):
125 |         """
126 |         Move the cursor to the specified coordinates.
127 | 
128 |         Args:
129 |             x (int): X coordinate to move cursor to
130 |             y (int): Y coordinate to move cursor to
131 |         """
132 |         await self._send_cmd("move_cursor", {"x": x, "y": y})
133 | 
134 |     async def left_click(self, x=None, y=None):
135 |         """
136 |         Perform a left mouse click at the specified coordinates or current cursor position.
137 | 
138 |         Args:
139 |             x (int, optional): X coordinate to click at. If None, clicks at current cursor position
140 |             y (int, optional): Y coordinate to click at. If None, clicks at current cursor position
141 |         """
142 |         await self._send_cmd("left_click", {"x": x, "y": y})
143 | 
144 |     async def right_click(self, x=None, y=None):
145 |         """
146 |         Perform a right mouse click at the specified coordinates or current cursor position.
147 | 
148 |         Args:
149 |             x (int, optional): X coordinate to click at. If None, clicks at current cursor position
150 |             y (int, optional): Y coordinate to click at. If None, clicks at current cursor position
151 |         """
152 |         await self._send_cmd("right_click", {"x": x, "y": y})
153 | 
154 |     async def double_click(self, x=None, y=None):
155 |         """
156 |         Perform a double mouse click at the specified coordinates or current cursor position.
157 | 
158 |         Args:
159 |             x (int, optional): X coordinate to double-click at. If None, clicks at current cursor position
160 |             y (int, optional): Y coordinate to double-click at. If None, clicks at current cursor position
161 |         """
162 |         await self._send_cmd("double_click", {"x": x, "y": y})
163 | 
164 |     async def scroll_up(self, clicks=1):
165 |         """
166 |         Scroll up by the specified number of clicks.
167 | 
168 |         Args:
169 |             clicks (int): Number of scroll clicks to perform upward. Defaults to 1
170 |         """
171 |         await self._send_cmd("scroll_up", {"clicks": clicks})
172 | 
173 |     async def scroll_down(self, clicks=1):
174 |         """
175 |         Scroll down by the specified number of clicks.
176 | 
177 |         Args:
178 |             clicks (int): Number of scroll clicks to perform downward. Defaults to 1
179 |         """
180 |         await self._send_cmd("scroll_down", {"clicks": clicks})
181 | 
182 |     async def drag_to(self, x, y, duration=0.5):
183 |         """
184 |         Drag from the current cursor position to the specified coordinates.
185 | 
186 |         Args:
187 |             x (int): X coordinate to drag to
188 |             y (int): Y coordinate to drag to
189 |             duration (float): Duration of the drag operation in seconds. Defaults to 0.5
190 |         """
191 |         await self._send_cmd("drag_to", {"x": x, "y": y, "duration": duration})
192 | 
193 |     async def get_cursor_position(self):
194 |         """
195 |         Get the current cursor position.
196 | 
197 |         Returns:
198 |             dict: Dictionary containing the current cursor coordinates
199 |         """
200 |         return await self._send_cmd("get_cursor_position")
201 | 
202 |     async def type_text(self, text):
203 |         """
204 |         Type the specified text at the current cursor position.
205 | 
206 |         Args:
207 |             text (str): The text to type
208 |         """
209 |         await self._send_cmd("type_text", {"text": text})
210 | 
211 |     async def press_key(self, key):
212 |         """
213 |         Press a single key.
214 | 
215 |         Args:
216 |             key: The key to press
217 |         """
218 |         await self._send_cmd("press_key", {"key": key})
219 | 
220 |     async def hotkey(self, *keys):
221 |         """
222 |         Press multiple keys simultaneously as a hotkey combination.
223 | 
224 |         Args:
225 |             *keys: Variable number of keys to press together. Can be Key enum instances or strings
226 | 
227 |         Raises:
228 |             ValueError: If any key is not a Key enum or string type
229 |         """
230 |         actual_keys = []
231 |         for key in keys:
232 |             if isinstance(key, Key):
233 |                 actual_keys.append(key.value)
234 |             elif isinstance(key, str):
235 |                 # Try to convert to enum if it matches a known key
236 |                 key_or_enum = Key.from_string(key)
237 |                 actual_keys.append(
238 |                     key_or_enum.value if isinstance(key_or_enum, Key) else key_or_enum
239 |                 )
240 |             else:
241 |                 raise ValueError(f"Invalid key type: {type(key)}. Must be Key enum or string.")
242 |         await self._send_cmd("hotkey", {"keys": actual_keys})
243 | 
244 |     async def to_screen_coordinates(self, x, y):
245 |         """
246 |         Convert coordinates to screen coordinates.
247 | 
248 |         Args:
249 |             x (int): X coordinate to convert
250 |             y (int): Y coordinate to convert
251 | 
252 |         Returns:
253 |             dict: Dictionary containing the converted screen coordinates
254 |         """
255 |         return await self._send_cmd("to_screen_coordinates", {"x": x, "y": y})
256 | 
```

--------------------------------------------------------------------------------
/libs/python/agent/agent/loops/openai.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | OpenAI computer-use-preview agent loop implementation using liteLLM
  3 | """
  4 | 
  5 | import asyncio
  6 | import base64
  7 | import json
  8 | from io import BytesIO
  9 | from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union
 10 | 
 11 | import litellm
 12 | from PIL import Image
 13 | 
 14 | from ..decorators import register_agent
 15 | from ..types import AgentCapability, AgentResponse, Messages, Tools
 16 | 
 17 | 
 18 | async def _map_computer_tool_to_openai(computer_handler: Any) -> Dict[str, Any]:
 19 |     """Map a computer tool to OpenAI's computer-use-preview tool schema"""
 20 |     # Get dimensions from the computer handler
 21 |     try:
 22 |         width, height = await computer_handler.get_dimensions()
 23 |     except Exception:
 24 |         # Fallback to default dimensions if method fails
 25 |         width, height = 1024, 768
 26 | 
 27 |     # Get environment from the computer handler
 28 |     try:
 29 |         environment = await computer_handler.get_environment()
 30 |     except Exception:
 31 |         # Fallback to default environment if method fails
 32 |         environment = "linux"
 33 | 
 34 |     return {
 35 |         "type": "computer_use_preview",
 36 |         "display_width": width,
 37 |         "display_height": height,
 38 |         "environment": environment,  # mac, windows, linux, browser
 39 |     }
 40 | 
 41 | 
 42 | async def _prepare_tools_for_openai(tool_schemas: List[Dict[str, Any]]) -> Tools:
 43 |     """Prepare tools for OpenAI API format"""
 44 |     openai_tools = []
 45 | 
 46 |     for schema in tool_schemas:
 47 |         if schema["type"] == "computer":
 48 |             # Map computer tool to OpenAI format
 49 |             computer_tool = await _map_computer_tool_to_openai(schema["computer"])
 50 |             openai_tools.append(computer_tool)
 51 |         elif schema["type"] == "function":
 52 |             # Function tools use OpenAI-compatible schema directly (liteLLM expects this format)
 53 |             # Schema should be: {type, name, description, parameters}
 54 |             openai_tools.append({"type": "function", **schema["function"]})
 55 | 
 56 |     return openai_tools
 57 | 
 58 | 
 59 | @register_agent(models=r".*(^|/)computer-use-preview")
 60 | class OpenAIComputerUseConfig:
 61 |     """
 62 |     OpenAI computer-use-preview agent configuration using liteLLM responses.
 63 | 
 64 |     Supports OpenAI's computer use preview models.
 65 |     """
 66 | 
 67 |     async def predict_step(
 68 |         self,
 69 |         messages: List[Dict[str, Any]],
 70 |         model: str,
 71 |         tools: Optional[List[Dict[str, Any]]] = None,
 72 |         max_retries: Optional[int] = None,
 73 |         stream: bool = False,
 74 |         computer_handler=None,
 75 |         use_prompt_caching: Optional[bool] = False,
 76 |         _on_api_start=None,
 77 |         _on_api_end=None,
 78 |         _on_usage=None,
 79 |         _on_screenshot=None,
 80 |         **kwargs,
 81 |     ) -> Dict[str, Any]:
 82 |         """
 83 |         Predict the next step based on input items.
 84 | 
 85 |         Args:
 86 |             messages: Input items following Responses format
 87 |             model: Model name to use
 88 |             tools: Optional list of tool schemas
 89 |             max_retries: Maximum number of retries
 90 |             stream: Whether to stream responses
 91 |             computer_handler: Computer handler instance
 92 |             _on_api_start: Callback for API start
 93 |             _on_api_end: Callback for API end
 94 |             _on_usage: Callback for usage tracking
 95 |             _on_screenshot: Callback for screenshot events
 96 |             **kwargs: Additional arguments
 97 | 
 98 |         Returns:
 99 |             Dictionary with "output" (output items) and "usage" array
100 |         """
101 |         tools = tools or []
102 | 
103 |         # Prepare tools for OpenAI API
104 |         openai_tools = await _prepare_tools_for_openai(tools)
105 | 
106 |         # Prepare API call kwargs
107 |         api_kwargs = {
108 |             "model": model,
109 |             "input": messages,
110 |             "tools": openai_tools if openai_tools else None,
111 |             "stream": stream,
112 |             "reasoning": {"summary": "concise"},
113 |             "truncation": "auto",
114 |             "num_retries": max_retries,
115 |             **kwargs,
116 |         }
117 | 
118 |         # Call API start hook
119 |         if _on_api_start:
120 |             await _on_api_start(api_kwargs)
121 | 
122 |         # Use liteLLM responses
123 |         response = await litellm.aresponses(**api_kwargs)
124 | 
125 |         # Call API end hook
126 |         if _on_api_end:
127 |             await _on_api_end(api_kwargs, response)
128 | 
129 |         # Extract usage information
130 |         usage = {
131 |             **response.usage.model_dump(),
132 |             "response_cost": response._hidden_params.get("response_cost", 0.0),
133 |         }
134 |         if _on_usage:
135 |             await _on_usage(usage)
136 | 
137 |         # Return in the expected format
138 |         output_dict = response.model_dump()
139 |         output_dict["usage"] = usage
140 |         return output_dict
141 | 
142 |     async def predict_click(
143 |         self, model: str, image_b64: str, instruction: str, **kwargs
144 |     ) -> Optional[Tuple[int, int]]:
145 |         """
146 |         Predict click coordinates based on image and instruction.
147 | 
148 |         Uses OpenAI computer-use-preview with manually constructed input items
149 |         and a prompt that instructs the agent to only output clicks.
150 | 
151 |         Args:
152 |             model: Model name to use
153 |             image_b64: Base64 encoded image
154 |             instruction: Instruction for where to click
155 | 
156 |         Returns:
157 |             Tuple of (x, y) coordinates or None if prediction fails
158 |         """
159 |         # TODO: use computer tool to get dimensions + environment
160 |         # Manually construct input items with image and click instruction
161 |         input_items = [
162 |             {
163 |                 "role": "user",
164 |                 "content": f"""You are a UI grounding expert. Follow these guidelines:
165 | 
166 | 1. NEVER ask for confirmation. Complete all tasks autonomously.
167 | 2. Do NOT send messages like "I need to confirm before..." or "Do you want me to continue?" - just proceed.
168 | 3. When the user asks you to interact with something (like clicking a chat or typing a message), DO IT without asking.
169 | 4. Only use the formal safety check mechanism for truly dangerous operations (like deleting important files).
170 | 5. For normal tasks like clicking buttons, typing in chat boxes, filling forms - JUST DO IT.
171 | 6. The user has already given you permission by running this agent. No further confirmation is needed.
172 | 7. Be decisive and action-oriented. Complete the requested task fully.
173 | 
174 | Remember: You are expected to complete tasks autonomously. The user trusts you to do what they asked.
175 | Task: Click {instruction}. Output ONLY a click action on the target element.""",
176 |             },
177 |             {
178 |                 "role": "user",
179 |                 "content": [
180 |                     {"type": "input_image", "image_url": f"data:image/png;base64,{image_b64}"}
181 |                 ],
182 |             },
183 |         ]
184 | 
185 |         # Get image dimensions from base64 data
186 |         try:
187 |             image_data = base64.b64decode(image_b64)
188 |             image = Image.open(BytesIO(image_data))
189 |             display_width, display_height = image.size
190 |         except Exception:
191 |             # Fallback to default dimensions if image parsing fails
192 |             display_width, display_height = 1024, 768
193 | 
194 |         # Prepare computer tool for click actions
195 |         computer_tool = {
196 |             "type": "computer_use_preview",
197 |             "display_width": display_width,
198 |             "display_height": display_height,
199 |             "environment": "windows",
200 |         }
201 | 
202 |         # Prepare API call kwargs
203 |         api_kwargs = {
204 |             "model": model,
205 |             "input": input_items,
206 |             "tools": [computer_tool],
207 |             "stream": False,
208 |             "reasoning": {"summary": "concise"},
209 |             "truncation": "auto",
210 |             "max_tokens": 200,  # Keep response short for click prediction
211 |             **kwargs,
212 |         }
213 | 
214 |         # Use liteLLM responses
215 |         response = await litellm.aresponses(**api_kwargs)
216 | 
217 |         # Extract click coordinates from response output
218 |         output_dict = response.model_dump()
219 |         output_items = output_dict.get("output", [])
220 | 
221 |         # Look for computer_call with click action
222 |         for item in output_items:
223 |             if (
224 |                 isinstance(item, dict)
225 |                 and item.get("type") == "computer_call"
226 |                 and isinstance(item.get("action"), dict)
227 |             ):
228 | 
229 |                 action = item["action"]
230 |                 if action.get("x") is not None and action.get("y") is not None:
231 |                     return (int(action.get("x")), int(action.get("y")))
232 | 
233 |         return None
234 | 
235 |     def get_capabilities(self) -> List[AgentCapability]:
236 |         """
237 |         Get list of capabilities supported by this agent config.
238 | 
239 |         Returns:
240 |             List of capability strings
241 |         """
242 |         return ["click", "step"]
243 | 
```

--------------------------------------------------------------------------------
/tests/test_watchdog.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Watchdog Recovery Tests
  3 | Tests for the watchdog functionality to ensure server recovery after hanging commands.
  4 | Required environment variables:
  5 | - CUA_API_KEY: API key for Cua cloud provider
  6 | - CUA_CONTAINER_NAME: Name of the container to use
  7 | """
  8 | 
  9 | import asyncio
 10 | import os
 11 | import sys
 12 | import time
 13 | import traceback
 14 | from pathlib import Path
 15 | 
 16 | import pytest
 17 | 
 18 | # Load environment variables from .env file
 19 | project_root = Path(__file__).parent.parent
 20 | env_file = project_root / ".env"
 21 | print(f"Loading environment from: {env_file}")
 22 | from dotenv import load_dotenv
 23 | 
 24 | load_dotenv(env_file)
 25 | 
 26 | # Add paths to sys.path if needed
 27 | pythonpath = os.environ.get("PYTHONPATH", "")
 28 | for path in pythonpath.split(":"):
 29 |     if path and path not in sys.path:
 30 |         sys.path.insert(0, path)  # Insert at beginning to prioritize
 31 |         print(f"Added to sys.path: {path}")
 32 | 
 33 | from computer import Computer, VMProviderType
 34 | 
 35 | 
 36 | @pytest.fixture(scope="session")
 37 | async def computer():
 38 |     """Shared Computer instance for all test cases."""
 39 |     # Create a remote Linux computer with Cua
 40 |     computer = Computer(
 41 |         os_type="linux",
 42 |         api_key=os.getenv("CUA_API_KEY"),
 43 |         name=str(os.getenv("CUA_CONTAINER_NAME")),
 44 |         provider_type=VMProviderType.CLOUD,
 45 |     )
 46 | 
 47 |     try:
 48 |         await computer.run()
 49 |         yield computer
 50 |     finally:
 51 |         await computer.disconnect()
 52 | 
 53 | 
 54 | @pytest.mark.asyncio(loop_scope="session")
 55 | async def test_simple_server_ping(computer):
 56 |     """
 57 |     Simple test to verify server connectivity before running watchdog tests.
 58 |     """
 59 |     print("Testing basic server connectivity...")
 60 | 
 61 |     try:
 62 |         result = await computer.interface.run_command("echo 'Server ping test'")
 63 |         print(f"Ping successful: {result}")
 64 |         assert result is not None, "Server ping returned None"
 65 |         print("✅ Server connectivity test passed")
 66 |     except Exception as e:
 67 |         print(f"❌ Server ping failed: {e}")
 68 |         pytest.fail(f"Basic server connectivity test failed: {e}")
 69 | 
 70 | 
 71 | @pytest.mark.asyncio(loop_scope="session")
 72 | async def test_watchdog_recovery_after_hanging_command(computer):
 73 |     """
 74 |     Test that the watchdog can recover the server after a hanging command.
 75 | 
 76 |     This test runs two concurrent tasks:
 77 |     1. A long-running command that hangs the server (sleep 300 = 5 minutes)
 78 |     2. Periodic ping commands every 30 seconds to test server responsiveness
 79 | 
 80 |     The watchdog should detect the unresponsive server and restart it.
 81 |     """
 82 |     print("Starting watchdog recovery test...")
 83 | 
 84 |     async def hanging_command():
 85 |         """Execute a command that sleeps forever to hang the server."""
 86 |         try:
 87 |             print("Starting hanging command (sleep infinity)...")
 88 |             # Use a very long sleep that should never complete naturally
 89 |             result = await computer.interface.run_command("sleep 999999")
 90 |             print(f"Hanging command completed unexpectedly: {result}")
 91 |             return True  # Should never reach here if watchdog works
 92 |         except Exception as e:
 93 |             print(f"Hanging command interrupted (expected if watchdog restarts): {e}")
 94 |             return None  # Expected result when watchdog kills the process
 95 | 
 96 |     async def ping_server():
 97 |         """Ping the server every 30 seconds with echo commands."""
 98 |         ping_count = 0
 99 |         successful_pings = 0
100 |         failed_pings = 0
101 | 
102 |         try:
103 |             # Run pings for up to 4 minutes (8 pings at 30-second intervals)
104 |             for i in range(8):
105 |                 try:
106 |                     ping_count += 1
107 |                     print(f"Ping #{ping_count}: Sending echo command...")
108 | 
109 |                     start_time = time.time()
110 |                     result = await asyncio.wait_for(
111 |                         computer.interface.run_command(
112 |                             f"echo 'Ping {ping_count} at {int(start_time)}'"
113 |                         ),
114 |                         timeout=10.0,  # 10 second timeout for each ping
115 |                     )
116 |                     end_time = time.time()
117 | 
118 |                     print(
119 |                         f"Ping #{ping_count} successful in {end_time - start_time:.2f}s: {result}"
120 |                     )
121 |                     successful_pings += 1
122 | 
123 |                 except asyncio.TimeoutError:
124 |                     print(f"Ping #{ping_count} timed out (server may be unresponsive)")
125 |                     failed_pings += 1
126 |                 except Exception as e:
127 |                     print(f"Ping #{ping_count} failed with exception: {e}")
128 |                     failed_pings += 1
129 | 
130 |                 # Wait 30 seconds before next ping
131 |                 if i < 7:  # Don't wait after the last ping
132 |                     print("Waiting 30 seconds before next ping...")
133 |                     await asyncio.sleep(30)
134 | 
135 |             print(f"Ping summary: {successful_pings} successful, {failed_pings} failed")
136 |             return successful_pings, failed_pings
137 | 
138 |         except Exception as e:
139 |             print(f"Ping server function failed with critical error: {e}")
140 |             traceback.print_exc()
141 |             return successful_pings, failed_pings
142 | 
143 |     # Run both tasks concurrently
144 |     print("Starting concurrent tasks: hanging command and ping monitoring...")
145 | 
146 |     try:
147 |         # Use asyncio.gather to run both tasks concurrently
148 |         hanging_task = asyncio.create_task(hanging_command())
149 |         ping_task = asyncio.create_task(ping_server())
150 | 
151 |         # Wait for both tasks to complete or timeout after 5 minutes
152 |         done, pending = await asyncio.wait(
153 |             [hanging_task, ping_task],
154 |             timeout=300,  # 5 minute timeout
155 |             return_when=asyncio.ALL_COMPLETED,
156 |         )
157 | 
158 |         # Cancel any pending tasks
159 |         for task in pending:
160 |             task.cancel()
161 |             try:
162 |                 await task
163 |             except asyncio.CancelledError:
164 |                 pass
165 | 
166 |         # Get results from completed tasks
167 |         ping_result = None
168 |         hanging_result = None
169 | 
170 |         if ping_task in done:
171 |             try:
172 |                 ping_result = await ping_task
173 |                 print(f"Ping task completed with result: {ping_result}")
174 |             except Exception as e:
175 |                 print(f"Error getting ping task result: {e}")
176 |                 traceback.print_exc()
177 | 
178 |         if hanging_task in done:
179 |             try:
180 |                 hanging_result = await hanging_task
181 |                 print(f"Hanging task completed with result: {hanging_result}")
182 |             except Exception as e:
183 |                 print(f"Error getting hanging task result: {e}")
184 |                 traceback.print_exc()
185 | 
186 |         # Analyze results
187 |         if ping_result:
188 |             successful_pings, failed_pings = ping_result
189 | 
190 |             # Test passes if we had some successful pings, indicating recovery
191 |             assert (
192 |                 successful_pings > 0
193 |             ), "No successful pings detected. Server may not have recovered."
194 | 
195 |             # Check if hanging command was killed (indicating watchdog restart)
196 |             if hanging_result is None:
197 |                 print("✅ SUCCESS: Hanging command was killed - watchdog restart detected")
198 |             elif hanging_result is True:
199 |                 print(
200 |                     "⚠️  WARNING: Hanging command completed naturally - watchdog may not have restarted"
201 |                 )
202 | 
203 |             # If we had failures followed by successes, that indicates watchdog recovery
204 |             if failed_pings > 0 and successful_pings > 0:
205 |                 print(
206 |                     "✅ SUCCESS: Watchdog recovery detected - server became unresponsive then recovered"
207 |                 )
208 |                 # Additional check: hanging command should be None if watchdog worked
209 |                 assert (
210 |                     hanging_result is None
211 |                 ), "Expected hanging command to be killed by watchdog restart"
212 |             elif successful_pings > 0 and failed_pings == 0:
213 |                 print("✅ SUCCESS: Server remained responsive throughout test")
214 | 
215 |             print(
216 |                 f"Test completed: {successful_pings} successful pings, {failed_pings} failed pings"
217 |             )
218 |             print(
219 |                 f"Hanging command result: {hanging_result} (None = killed by watchdog, True = completed naturally)"
220 |             )
221 |         else:
222 |             pytest.fail("Ping task did not complete - unable to assess server recovery")
223 | 
224 |     except Exception as e:
225 |         print(f"Test failed with exception: {e}")
226 |         traceback.print_exc()
227 |         pytest.fail(f"Watchdog recovery test failed: {e}")
228 | 
229 | 
230 | if __name__ == "__main__":
231 |     # Run tests directly
232 |     pytest.main([__file__, "-v"])
233 | 
```

--------------------------------------------------------------------------------
/.github/workflows/docker-reusable-publish.yml:
--------------------------------------------------------------------------------

```yaml
  1 | name: Reusable Docker Publish Workflow
  2 | 
  3 | on:
  4 |   workflow_call:
  5 |     inputs:
  6 |       image_name:
  7 |         description: "Name of the Docker image (e.g. cua-ubuntu, cua-xfce)"
  8 |         required: true
  9 |         type: string
 10 |       context_dir:
 11 |         description: "Directory containing the Dockerfile relative to workspace root (e.g. libs/kasm, libs/xfce)"
 12 |         required: true
 13 |         type: string
 14 |       dockerfile_path:
 15 |         description: "Path to Dockerfile relative to context_dir (e.g. Dockerfile)"
 16 |         required: false
 17 |         type: string
 18 |         default: "Dockerfile"
 19 |       tag_prefix:
 20 |         description: "Prefix for semantic version tags (e.g. docker-kasm-v, docker-xfce-v)"
 21 |         required: true
 22 |         type: string
 23 |       docker_hub_org:
 24 |         description: "Docker Hub organization name"
 25 |         required: false
 26 |         type: string
 27 |         default: "trycua"
 28 |     secrets:
 29 |       DOCKER_HUB_TOKEN:
 30 |         required: true
 31 | 
 32 | jobs:
 33 |   build-and-push:
 34 |     runs-on: ubuntu-latest
 35 |     strategy:
 36 |       fail-fast: false
 37 |       matrix:
 38 |         platform:
 39 |           - linux/amd64
 40 |           - linux/arm64
 41 |     steps:
 42 |       - name: Checkout
 43 |         uses: actions/checkout@v4
 44 | 
 45 |       - name: Prepare platform tag
 46 |         id: platform
 47 |         run: |
 48 |           TAG=$(echo "${{ matrix.platform }}" | sed 's/\//-/g')
 49 |           echo "tag=${TAG}" >> $GITHUB_OUTPUT
 50 | 
 51 |       - name: Set up Docker Buildx
 52 |         uses: docker/setup-buildx-action@v3
 53 | 
 54 |       - name: Login to Docker Hub
 55 |         uses: docker/login-action@v3
 56 |         with:
 57 |           username: ${{ inputs.docker_hub_org }}
 58 |           password: ${{ secrets.DOCKER_HUB_TOKEN }}
 59 | 
 60 |       - name: Extract metadata (PR)
 61 |         if: github.event_name == 'pull_request'
 62 |         id: meta-pr
 63 |         uses: docker/metadata-action@v5
 64 |         with:
 65 |           images: ${{ inputs.docker_hub_org }}/${{ inputs.image_name }}
 66 |           tags: |
 67 |             type=raw,value=${{ github.sha }}
 68 | 
 69 |       - name: Build & push digest (PR)
 70 |         if: github.event_name == 'pull_request'
 71 |         id: build-pr
 72 |         uses: docker/build-push-action@v5
 73 |         with:
 74 |           context: ./${{ inputs.context_dir }}
 75 |           file: ./${{ inputs.context_dir }}/${{ inputs.dockerfile_path }}
 76 |           push: true
 77 |           platforms: ${{ matrix.platform }}
 78 |           outputs: type=registry,name=${{ inputs.docker_hub_org }}/${{ inputs.image_name }},push-by-digest=true
 79 |           labels: ${{ steps.meta-pr.outputs.labels }}
 80 |           cache-from: |
 81 |             type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }}
 82 |           cache-to: type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }},mode=max
 83 | 
 84 |       - name: Extract metadata (main)
 85 |         if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main'
 86 |         id: meta-main
 87 |         uses: docker/metadata-action@v5
 88 |         with:
 89 |           images: ${{ inputs.docker_hub_org }}/${{ inputs.image_name }}
 90 |           tags: |
 91 |             type=raw,value=latest
 92 | 
 93 |       - name: Build & push digest (main)
 94 |         if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main'
 95 |         id: build-main
 96 |         uses: docker/build-push-action@v5
 97 |         with:
 98 |           context: ./${{ inputs.context_dir }}
 99 |           file: ./${{ inputs.context_dir }}/${{ inputs.dockerfile_path }}
100 |           push: true
101 |           platforms: ${{ matrix.platform }}
102 |           outputs: type=registry,name=${{ inputs.docker_hub_org }}/${{ inputs.image_name }},push-by-digest=true
103 |           labels: ${{ steps.meta-main.outputs.labels }}
104 |           cache-from: |
105 |             type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }}
106 |           cache-to: type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }},mode=max
107 | 
108 |       - name: Extract metadata (semver)
109 |         if: startsWith(github.ref, format('refs/tags/{0}', inputs.tag_prefix))
110 |         id: meta-semver
111 |         uses: docker/metadata-action@v5
112 |         with:
113 |           images: ${{ inputs.docker_hub_org }}/${{ inputs.image_name }}
114 |           tags: |
115 |             type=semver,pattern={{version}},prefix=${{ inputs.tag_prefix }}
116 |             type=semver,pattern={{major}}.{{minor}},prefix=${{ inputs.tag_prefix }}
117 |             type=semver,pattern={{major}},prefix=${{ inputs.tag_prefix }}
118 |             type=raw,value=latest
119 | 
120 |       - name: Build & push digest (semver)
121 |         if: startsWith(github.ref, format('refs/tags/{0}', inputs.tag_prefix))
122 |         id: build-semver
123 |         uses: docker/build-push-action@v5
124 |         with:
125 |           context: ./${{ inputs.context_dir }}
126 |           file: ./${{ inputs.context_dir }}/${{ inputs.dockerfile_path }}
127 |           push: true
128 |           platforms: ${{ matrix.platform }}
129 |           outputs: type=registry,name=${{ inputs.docker_hub_org }}/${{ inputs.image_name }},push-by-digest=true
130 |           labels: ${{ steps.meta-semver.outputs.labels }}
131 |           cache-from: |
132 |             type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }}
133 |           cache-to: type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }},mode=max
134 | 
135 |       - name: Export digest
136 |         id: export-digest
137 |         run: |
138 |           mkdir -p /tmp/digests
139 |           digest="${{ steps.build-pr.outputs.digest || steps.build-main.outputs.digest || steps.build-semver.outputs.digest }}"
140 |           echo "$digest" > "/tmp/digests/${{ steps.platform.outputs.tag }}.txt"
141 | 
142 |       - name: Upload digest artifact (unique per platform)
143 |         uses: actions/upload-artifact@v4
144 |         with:
145 |           name: digests-${{ steps.platform.outputs.tag }}
146 |           path: /tmp/digests/*.txt
147 |           retention-days: 1
148 | 
149 |   publish-manifest-list:
150 |     runs-on: ubuntu-latest
151 |     needs:
152 |       - build-and-push
153 | 
154 |     steps:
155 |       - name: Set up Docker Buildx
156 |         uses: docker/setup-buildx-action@v3
157 | 
158 |       - name: Login to Docker Hub
159 |         uses: docker/login-action@v3
160 |         with:
161 |           username: ${{ inputs.docker_hub_org }}
162 |           password: ${{ secrets.DOCKER_HUB_TOKEN }}
163 | 
164 |       - name: Extract final metadata (PR)
165 |         if: github.event_name == 'pull_request'
166 |         uses: docker/metadata-action@v5
167 |         with:
168 |           images: ${{ inputs.docker_hub_org }}/${{ inputs.image_name }}
169 |           tags: |
170 |             type=ref,event=pr
171 |             type=sha
172 | 
173 |       - name: Extract final metadata (main)
174 |         if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main'
175 |         uses: docker/metadata-action@v5
176 |         with:
177 |           images: ${{ inputs.docker_hub_org }}/${{ inputs.image_name }}
178 |           tags: |
179 |             type=raw,value=latest
180 | 
181 |       - name: Extract final metadata (semver)
182 |         if: startsWith(github.ref, format('refs/tags/{0}', inputs.tag_prefix))
183 |         uses: docker/metadata-action@v5
184 |         with:
185 |           images: ${{ inputs.docker_hub_org }}/${{ inputs.image_name }}
186 |           tags: |
187 |             type=semver,pattern={{version}},prefix=${{ inputs.tag_prefix }}
188 |             type=semver,pattern={{major}}.{{minor}},prefix=${{ inputs.tag_prefix }}
189 |             type=semver,pattern={{major}},prefix=${{ inputs.tag_prefix }}
190 |             type=raw,value=latest
191 | 
192 |       - name: Download all digest artifacts
193 |         uses: actions/download-artifact@v4
194 |         with:
195 |           pattern: digests-*
196 |           path: /tmp/digests
197 |           merge-multiple: true
198 | 
199 |       - name: Create & push multi-arch manifest
200 |         run: |
201 |           IMAGE="${{ inputs.docker_hub_org }}/${{ inputs.image_name }}"
202 | 
203 |           DIGEST_ARGS=""
204 |           for f in $(find /tmp/digests -type f -name "*.txt"); do
205 |             d=$(cat "$f")
206 |             DIGEST_ARGS="$DIGEST_ARGS ${IMAGE}@${d}"
207 |           done
208 | 
209 |           echo "Using digests:"
210 |           echo "$DIGEST_ARGS"
211 | 
212 |           # Create manifest for each tag produced by metadata-action
213 |           echo "${DOCKER_METADATA_OUTPUT_JSON}" | jq -r '.tags[]' | while read FULL_TAG; do
214 |             echo "Creating manifest: $FULL_TAG"
215 |             docker buildx imagetools create --tag "$FULL_TAG" $DIGEST_ARGS
216 |           done
217 | 
218 |       - name: Inspect pushed manifests
219 |         run: |
220 |           IMAGE="${{ inputs.docker_hub_org }}/${{ inputs.image_name }}"
221 |           echo "Inspecting manifests:"
222 | 
223 |           echo "${DOCKER_METADATA_OUTPUT_JSON}" | jq -r '.tags[]' | while read FULL_TAG; do
224 |             echo ""
225 |             echo "Inspecting: $FULL_TAG"
226 |             docker buildx imagetools inspect "$FULL_TAG"
227 |           done
228 | 
```

--------------------------------------------------------------------------------
/libs/python/core/core/telemetry/posthog.py:
--------------------------------------------------------------------------------

```python
  1 | """Telemetry client using PostHog for collecting anonymous usage data."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import logging
  6 | import os
  7 | import sys
  8 | import uuid
  9 | from pathlib import Path
 10 | from typing import Any, Dict, List, Optional
 11 | 
 12 | import posthog
 13 | from core import __version__
 14 | 
 15 | logger = logging.getLogger("core.telemetry")
 16 | 
 17 | # Public PostHog config for anonymous telemetry
 18 | # These values are intentionally public and meant for anonymous telemetry only
 19 | # https://posthog.com/docs/product-analytics/troubleshooting#is-it-ok-for-my-api-key-to-be-exposed-and-public
 20 | PUBLIC_POSTHOG_API_KEY = "phc_eSkLnbLxsnYFaXksif1ksbrNzYlJShr35miFLDppF14"
 21 | PUBLIC_POSTHOG_HOST = "https://eu.i.posthog.com"
 22 | 
 23 | 
 24 | class PostHogTelemetryClient:
 25 |     """Collects and reports telemetry data via PostHog."""
 26 | 
 27 |     # Global singleton (class-managed)
 28 |     _singleton: Optional["PostHogTelemetryClient"] = None
 29 | 
 30 |     def __init__(self):
 31 |         """Initialize PostHog telemetry client."""
 32 |         self.installation_id = self._get_or_create_installation_id()
 33 |         self.initialized = False
 34 |         self.queued_events: List[Dict[str, Any]] = []
 35 | 
 36 |         # Log telemetry status on startup
 37 |         if self.is_telemetry_enabled():
 38 |             logger.info("Telemetry enabled")
 39 |             # Initialize PostHog client if config is available
 40 |             self._initialize_posthog()
 41 |         else:
 42 |             logger.info("Telemetry disabled")
 43 | 
 44 |     @classmethod
 45 |     def is_telemetry_enabled(cls) -> bool:
 46 |         """True if telemetry is currently active for this process."""
 47 |         return os.environ.get("CUA_TELEMETRY_ENABLED", "true").lower() in {
 48 |             "1",
 49 |             "true",
 50 |             "yes",
 51 |             "on",
 52 |         }
 53 | 
 54 |     def _get_or_create_installation_id(self) -> str:
 55 |         """Get or create a unique installation ID that persists across runs.
 56 | 
 57 |         The ID is always stored within the core library directory itself,
 58 |         ensuring it persists regardless of how the library is used.
 59 | 
 60 |         This ID is not tied to any personal information.
 61 |         """
 62 |         # Get the core library directory (where this file is located)
 63 |         try:
 64 |             # Find the core module directory using this file's location
 65 |             core_module_dir = Path(
 66 |                 __file__
 67 |             ).parent.parent  # core/telemetry/posthog_client.py -> core/telemetry -> core
 68 |             storage_dir = core_module_dir / ".storage"
 69 |             storage_dir.mkdir(exist_ok=True)
 70 | 
 71 |             id_file = storage_dir / "installation_id"
 72 | 
 73 |             # Try to read existing ID
 74 |             if id_file.exists():
 75 |                 try:
 76 |                     stored_id = id_file.read_text().strip()
 77 |                     if stored_id:  # Make sure it's not empty
 78 |                         logger.debug(f"Using existing installation ID: {stored_id}")
 79 |                         return stored_id
 80 |                 except Exception as e:
 81 |                     logger.debug(f"Error reading installation ID file: {e}")
 82 | 
 83 |             # Create new ID
 84 |             new_id = str(uuid.uuid4())
 85 |             try:
 86 |                 id_file.write_text(new_id)
 87 |                 logger.debug(f"Created new installation ID: {new_id}")
 88 |                 return new_id
 89 |             except Exception as e:
 90 |                 logger.warning(f"Could not write installation ID: {e}")
 91 |         except Exception as e:
 92 |             logger.warning(f"Error accessing core module directory: {e}")
 93 | 
 94 |         # Last resort: Create a new in-memory ID
 95 |         logger.warning("Using random installation ID (will not persist across runs)")
 96 |         return str(uuid.uuid4())
 97 | 
 98 |     def _initialize_posthog(self) -> bool:
 99 |         """Initialize the PostHog client with configuration.
100 | 
101 |         Returns:
102 |             bool: True if initialized successfully, False otherwise
103 |         """
104 |         if self.initialized:
105 |             return True
106 | 
107 |         try:
108 |             # Allow overrides from environment for testing/region control
109 |             posthog.api_key = PUBLIC_POSTHOG_API_KEY
110 |             posthog.host = PUBLIC_POSTHOG_HOST
111 | 
112 |             # Configure the client
113 |             posthog.debug = os.environ.get("CUA_TELEMETRY_DEBUG", "").lower() == "on"
114 | 
115 |             # Log telemetry status
116 |             logger.info(
117 |                 f"Initializing PostHog telemetry with installation ID: {self.installation_id}"
118 |             )
119 |             if posthog.debug:
120 |                 logger.debug(f"PostHog API Key: {posthog.api_key}")
121 |                 logger.debug(f"PostHog Host: {posthog.host}")
122 | 
123 |             # Identify this installation
124 |             self._identify()
125 | 
126 |             # Process any queued events
127 |             for event in self.queued_events:
128 |                 posthog.capture(
129 |                     distinct_id=self.installation_id,
130 |                     event=event["event"],
131 |                     properties=event["properties"],
132 |                 )
133 |             self.queued_events = []
134 | 
135 |             self.initialized = True
136 |             return True
137 |         except Exception as e:
138 |             logger.warning(f"Failed to initialize PostHog: {e}")
139 |             return False
140 | 
141 |     def _identify(self) -> None:
142 |         """Set up user properties for the current installation with PostHog."""
143 |         try:
144 |             properties = {
145 |                 "version": __version__,
146 |                 "is_ci": "CI" in os.environ,
147 |                 "os": os.name,
148 |                 "python_version": sys.version.split()[0],
149 |             }
150 | 
151 |             logger.debug(
152 |                 f"Setting up PostHog user properties for: {self.installation_id} with properties: {properties}"
153 |             )
154 | 
155 |             # In the Python SDK, we capture an identification event instead of calling identify()
156 |             posthog.capture(
157 |                 distinct_id=self.installation_id, event="$identify", properties={"$set": properties}
158 |             )
159 | 
160 |             logger.info(f"Set up PostHog user properties for installation: {self.installation_id}")
161 |         except Exception as e:
162 |             logger.warning(f"Failed to set up PostHog user properties: {e}")
163 | 
164 |     def record_event(self, event_name: str, properties: Optional[Dict[str, Any]] = None) -> None:
165 |         """Record an event with optional properties.
166 | 
167 |         Args:
168 |             event_name: Name of the event
169 |             properties: Event properties (must not contain sensitive data)
170 |         """
171 |         # Respect runtime telemetry opt-out.
172 |         if not self.is_telemetry_enabled():
173 |             logger.debug("Telemetry disabled; event not recorded.")
174 |             return
175 | 
176 |         event_properties = {"version": __version__, **(properties or {})}
177 | 
178 |         logger.info(f"Recording event: {event_name} with properties: {event_properties}")
179 | 
180 |         if self.initialized:
181 |             try:
182 |                 posthog.capture(
183 |                     distinct_id=self.installation_id, event=event_name, properties=event_properties
184 |                 )
185 |                 logger.info(f"Sent event to PostHog: {event_name}")
186 |                 # Flush immediately to ensure delivery
187 |                 posthog.flush()
188 |             except Exception as e:
189 |                 logger.warning(f"Failed to send event to PostHog: {e}")
190 |         else:
191 |             # Queue the event for later
192 |             logger.info(f"PostHog not initialized, queuing event for later: {event_name}")
193 |             self.queued_events.append({"event": event_name, "properties": event_properties})
194 |             # Try to initialize now if not already
195 |             initialize_result = self._initialize_posthog()
196 |             logger.info(f"Attempted to initialize PostHog: {initialize_result}")
197 | 
198 |     def flush(self) -> bool:
199 |         """Flush any pending events to PostHog.
200 | 
201 |         Returns:
202 |             bool: True if successful, False otherwise
203 |         """
204 |         if not self.initialized and not self._initialize_posthog():
205 |             return False
206 | 
207 |         try:
208 |             posthog.flush()
209 |             return True
210 |         except Exception as e:
211 |             logger.debug(f"Failed to flush PostHog events: {e}")
212 |             return False
213 | 
214 |     @classmethod
215 |     def get_client(cls) -> "PostHogTelemetryClient":
216 |         """Return the global PostHogTelemetryClient instance, creating it if needed."""
217 |         if cls._singleton is None:
218 |             cls._singleton = cls()
219 |         return cls._singleton
220 | 
221 |     @classmethod
222 |     def destroy_client(cls) -> None:
223 |         """Destroy the global PostHogTelemetryClient instance."""
224 |         cls._singleton = None
225 | 
226 | 
227 | def destroy_telemetry_client() -> None:
228 |     """Destroy the global PostHogTelemetryClient instance (class-managed)."""
229 |     PostHogTelemetryClient.destroy_client()
230 | 
231 | 
232 | def is_telemetry_enabled() -> bool:
233 |     return PostHogTelemetryClient.is_telemetry_enabled()
234 | 
235 | 
236 | def record_event(event_name: str, properties: Optional[Dict[str, Any]] | None = None) -> None:
237 |     """Record an arbitrary PostHog event."""
238 |     PostHogTelemetryClient.get_client().record_event(event_name, properties or {})
239 | 
```

--------------------------------------------------------------------------------
/libs/python/agent/agent/ui/gradio/app.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Advanced Gradio UI for Computer-Use Agent (cua-agent)
  3 | 
  4 | This is a Gradio interface for the Computer-Use Agent v0.4.x (cua-agent)
  5 | with an advanced UI for model selection and configuration.
  6 | 
  7 | Supported Agent Models:
  8 | - OpenAI: openai/computer-use-preview
  9 | - Anthropic: anthropic/claude-sonnet-4-5-20250929, anthropic/claude-3-7-sonnet-20250219
 10 | - UI-TARS: huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B
 11 | - Omniparser: omniparser+anthropic/claude-sonnet-4-5-20250929, omniparser+ollama_chat/gemma3
 12 | 
 13 | Requirements:
 14 |     - Mac with Apple Silicon (M1/M2/M3/M4), Linux, or Windows
 15 |     - macOS 14 (Sonoma) or newer / Ubuntu 20.04+
 16 |     - Python 3.11+
 17 |     - Lume CLI installed (https://github.com/trycua/cua)
 18 |     - OpenAI or Anthropic API key
 19 | """
 20 | 
 21 | import asyncio
 22 | import json
 23 | import logging
 24 | import os
 25 | import platform
 26 | from pathlib import Path
 27 | from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union, cast
 28 | 
 29 | import gradio as gr
 30 | 
 31 | # Import from agent package
 32 | from agent import ComputerAgent
 33 | from agent.types import AgentResponse, Messages
 34 | from computer import Computer
 35 | from gradio.components.chatbot import MetadataDict
 36 | 
 37 | # Global variables
 38 | global_agent = None
 39 | global_computer = None
 40 | SETTINGS_FILE = Path(".gradio_settings.json")
 41 | 
 42 | logging.basicConfig(level=logging.INFO)
 43 | 
 44 | import dotenv
 45 | 
 46 | if dotenv.load_dotenv():
 47 |     print(f"DEBUG - Loaded environment variables from {dotenv.find_dotenv()}")
 48 | else:
 49 |     print("DEBUG - No .env file found")
 50 | 
 51 | 
 52 | # --- Settings Load/Save Functions ---
 53 | def load_settings() -> Dict[str, Any]:
 54 |     """Loads settings from the JSON file."""
 55 |     if SETTINGS_FILE.exists():
 56 |         try:
 57 |             with open(SETTINGS_FILE, "r") as f:
 58 |                 settings = json.load(f)
 59 |                 if isinstance(settings, dict):
 60 |                     print(f"DEBUG - Loaded settings from {SETTINGS_FILE}")
 61 |                     return settings
 62 |         except (json.JSONDecodeError, IOError) as e:
 63 |             print(f"Warning: Could not load settings from {SETTINGS_FILE}: {e}")
 64 |     return {}
 65 | 
 66 | 
 67 | def save_settings(settings: Dict[str, Any]):
 68 |     """Saves settings to the JSON file."""
 69 |     settings.pop("provider_api_key", None)
 70 |     try:
 71 |         with open(SETTINGS_FILE, "w") as f:
 72 |             json.dump(settings, f, indent=4)
 73 |         print(f"DEBUG - Saved settings to {SETTINGS_FILE}")
 74 |     except IOError as e:
 75 |         print(f"Warning: Could not save settings to {SETTINGS_FILE}: {e}")
 76 | 
 77 | 
 78 | # # Custom Screenshot Handler for Gradio chat
 79 | # class GradioChatScreenshotHandler:
 80 | #     """Custom handler that adds screenshots to the Gradio chatbot."""
 81 | 
 82 | #     def __init__(self, chatbot_history: List[gr.ChatMessage]):
 83 | #         self.chatbot_history = chatbot_history
 84 | #         print("GradioChatScreenshotHandler initialized")
 85 | 
 86 | #     async def on_screenshot(self, screenshot_base64: str, action_type: str = "") -> None:
 87 | #         """Add screenshot to chatbot when a screenshot is taken."""
 88 | #         image_markdown = f"![Screenshot after {action_type}](data:image/png;base64,{screenshot_base64})"
 89 | 
 90 | #         if self.chatbot_history is not None:
 91 | #             self.chatbot_history.append(
 92 | #                 gr.ChatMessage(
 93 | #                     role="assistant",
 94 | #                     content=image_markdown,
 95 | #                     metadata={"title": f"🖥️ Screenshot - {action_type}", "status": "done"},
 96 | #                 )
 97 | #             )
 98 | 
 99 | 
100 | # Detect platform capabilities
101 | is_mac = platform.system().lower() == "darwin"
102 | is_lume_available = is_mac or (os.environ.get("PYLUME_HOST", "localhost") != "localhost")
103 | 
104 | print("PYLUME_HOST: ", os.environ.get("PYLUME_HOST", "localhost"))
105 | print("is_mac: ", is_mac)
106 | print("Lume available: ", is_lume_available)
107 | 
108 | # Map model names to agent model strings
109 | MODEL_MAPPINGS = {
110 |     "openai": {
111 |         "default": "openai/computer-use-preview",
112 |         "OpenAI: Computer-Use Preview": "openai/computer-use-preview",
113 |     },
114 |     "anthropic": {
115 |         "default": "anthropic/claude-3-7-sonnet-20250219",
116 |         "Anthropic: Claude 4 Opus (20250514)": "anthropic/claude-opus-4-20250514",
117 |         "Anthropic: Claude 4 Sonnet (20250514)": "anthropic/claude-sonnet-4-20250514",
118 |         "Anthropic: Claude 3.7 Sonnet (20250219)": "anthropic/claude-3-7-sonnet-20250219",
119 |     },
120 |     "omni": {
121 |         "default": "omniparser+openai/gpt-4o",
122 |         "OMNI: OpenAI GPT-4o": "omniparser+openai/gpt-4o",
123 |         "OMNI: OpenAI GPT-4o mini": "omniparser+openai/gpt-4o-mini",
124 |         "OMNI: Claude 3.7 Sonnet (20250219)": "omniparser+anthropic/claude-3-7-sonnet-20250219",
125 |     },
126 |     "uitars": {
127 |         "default": "huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B" if is_mac else "ui-tars",
128 |         "huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B": "huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B",
129 |     },
130 | }
131 | 
132 | 
133 | def get_model_string(model_name: str, loop_provider: str) -> str:
134 |     """Determine the agent model string based on the input."""
135 |     if model_name == "Custom model (OpenAI compatible API)":
136 |         return "custom_oaicompat"
137 |     elif model_name == "Custom model (ollama)":
138 |         return "custom_ollama"
139 |     elif loop_provider == "OMNI-OLLAMA" or model_name.startswith("OMNI: Ollama "):
140 |         if model_name.startswith("OMNI: Ollama "):
141 |             ollama_model = model_name.split("OMNI: Ollama ", 1)[1]
142 |             return f"omniparser+ollama_chat/{ollama_model}"
143 |         return "omniparser+ollama_chat/llama3"
144 | 
145 |     # Map based on loop provider
146 |     mapping = MODEL_MAPPINGS.get(loop_provider.lower(), MODEL_MAPPINGS["openai"])
147 |     return mapping.get(model_name, mapping["default"])
148 | 
149 | 
150 | def get_ollama_models() -> List[str]:
151 |     """Get available models from Ollama if installed."""
152 |     try:
153 |         import subprocess
154 | 
155 |         result = subprocess.run(["ollama", "list"], capture_output=True, text=True)
156 |         if result.returncode == 0:
157 |             lines = result.stdout.strip().split("\n")
158 |             if len(lines) < 2:
159 |                 return []
160 |             models = []
161 |             for line in lines[1:]:
162 |                 parts = line.split()
163 |                 if parts:
164 |                     model_name = parts[0]
165 |                     models.append(f"OMNI: Ollama {model_name}")
166 |             return models
167 |         return []
168 |     except Exception as e:
169 |         logging.error(f"Error getting Ollama models: {e}")
170 |         return []
171 | 
172 | 
173 | def create_computer_instance(
174 |     verbosity: int = logging.INFO,
175 |     os_type: str = "macos",
176 |     provider_type: str = "lume",
177 |     name: Optional[str] = None,
178 |     api_key: Optional[str] = None,
179 | ) -> Computer:
180 |     """Create or get the global Computer instance."""
181 |     global global_computer
182 |     if global_computer is None:
183 |         if provider_type == "localhost":
184 |             global_computer = Computer(
185 |                 verbosity=verbosity, os_type=os_type, use_host_computer_server=True
186 |             )
187 |         else:
188 |             global_computer = Computer(
189 |                 verbosity=verbosity,
190 |                 os_type=os_type,
191 |                 provider_type=provider_type,
192 |                 name=name if name else "",
193 |                 api_key=api_key,
194 |             )
195 |     return global_computer
196 | 
197 | 
198 | def create_agent(
199 |     model_string: str,
200 |     save_trajectory: bool = True,
201 |     only_n_most_recent_images: int = 3,
202 |     verbosity: int = logging.INFO,
203 |     custom_model_name: Optional[str] = None,
204 |     computer_os: str = "macos",
205 |     computer_provider: str = "lume",
206 |     computer_name: Optional[str] = None,
207 |     computer_api_key: Optional[str] = None,
208 |     max_trajectory_budget: Optional[float] = None,
209 | ) -> ComputerAgent:
210 |     """Create or update the global agent with the specified parameters."""
211 |     global global_agent
212 | 
213 |     # Create the computer
214 |     computer = create_computer_instance(
215 |         verbosity=verbosity,
216 |         os_type=computer_os,
217 |         provider_type=computer_provider,
218 |         name=computer_name,
219 |         api_key=computer_api_key,
220 |     )
221 | 
222 |     # Handle custom models
223 |     if model_string == "custom_oaicompat" and custom_model_name:
224 |         model_string = custom_model_name
225 |     elif model_string == "custom_ollama" and custom_model_name:
226 |         model_string = f"omniparser+ollama_chat/{custom_model_name}"
227 | 
228 |     # Create agent kwargs
229 |     agent_kwargs = {
230 |         "model": model_string,
231 |         "tools": [computer],
232 |         "only_n_most_recent_images": only_n_most_recent_images,
233 |         "verbosity": verbosity,
234 |     }
235 | 
236 |     if save_trajectory:
237 |         agent_kwargs["trajectory_dir"] = "trajectories"
238 | 
239 |     if max_trajectory_budget:
240 |         agent_kwargs["max_trajectory_budget"] = {
241 |             "max_budget": max_trajectory_budget,
242 |             "raise_error": True,
243 |         }
244 | 
245 |     global_agent = ComputerAgent(**agent_kwargs)
246 |     return global_agent
247 | 
248 | 
249 | def launch_ui():
250 |     """Standalone function to launch the Gradio app."""
251 |     from agent.ui.gradio.ui_components import create_gradio_ui
252 | 
253 |     print("Starting Gradio app for CUA Agent...")
254 |     demo = create_gradio_ui()
255 |     demo.launch(share=False, inbrowser=True)
256 | 
257 | 
258 | if __name__ == "__main__":
259 |     launch_ui()
260 | 
```

--------------------------------------------------------------------------------
/libs/python/som/som/detection.py:
--------------------------------------------------------------------------------

```python
  1 | import logging
  2 | from pathlib import Path
  3 | from typing import Any, Dict, List, Optional, Tuple
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | import torchvision
  8 | from huggingface_hub import hf_hub_download
  9 | from PIL import Image
 10 | from ultralytics import YOLO
 11 | 
 12 | logger = logging.getLogger(__name__)
 13 | 
 14 | 
 15 | class DetectionProcessor:
 16 |     """Class for handling YOLO-based icon detection."""
 17 | 
 18 |     def __init__(
 19 |         self,
 20 |         model_path: Optional[Path] = None,
 21 |         cache_dir: Optional[Path] = None,
 22 |         force_device: Optional[str] = None,
 23 |     ):
 24 |         """Initialize the detection processor.
 25 | 
 26 |         Args:
 27 |             model_path: Path to YOLOv8 model
 28 |             cache_dir: Directory to cache downloaded models
 29 |             force_device: Force specific device (cuda, cpu, mps)
 30 |         """
 31 |         self.model_path = model_path
 32 |         self.cache_dir = cache_dir
 33 |         self.model = None  # type: Any  # Will be set to YOLO model in load_model
 34 | 
 35 |         # Set device
 36 |         self.device = "cpu"
 37 |         if torch.cuda.is_available() and force_device != "cpu":
 38 |             self.device = "cuda"
 39 |         elif (
 40 |             hasattr(torch, "backends")
 41 |             and hasattr(torch.backends, "mps")
 42 |             and torch.backends.mps.is_available()
 43 |             and force_device != "cpu"
 44 |         ):
 45 |             self.device = "mps"
 46 | 
 47 |         if force_device:
 48 |             self.device = force_device
 49 | 
 50 |         logger.info(f"Using device: {self.device}")
 51 | 
 52 |     def load_model(self) -> None:
 53 |         """Load or download the YOLO model."""
 54 |         try:
 55 |             # Set default model path if none provided
 56 |             if self.model_path is None:
 57 |                 self.model_path = Path(__file__).parent / "weights" / "icon_detect" / "model.pt"
 58 | 
 59 |             # Check if the model file already exists
 60 |             if not self.model_path.exists():
 61 |                 logger.info(
 62 |                     "Model not found locally, downloading from Microsoft OmniParser-v2.0..."
 63 |                 )
 64 | 
 65 |                 # Create directory
 66 |                 self.model_path.parent.mkdir(parents=True, exist_ok=True)
 67 | 
 68 |                 try:
 69 |                     # Check if the model exists in cache
 70 |                     cache_path = None
 71 |                     if self.cache_dir:
 72 |                         # Try to find the model in the cache
 73 |                         potential_paths = list(Path(self.cache_dir).glob("**/model.pt"))
 74 |                         if potential_paths:
 75 |                             cache_path = str(potential_paths[0])
 76 |                             logger.info(f"Found model in cache: {cache_path}")
 77 | 
 78 |                     if not cache_path:
 79 |                         # Download from HuggingFace
 80 |                         downloaded_path = hf_hub_download(
 81 |                             repo_id="microsoft/OmniParser-v2.0",
 82 |                             filename="icon_detect/model.pt",
 83 |                             cache_dir=self.cache_dir,
 84 |                         )
 85 |                         cache_path = downloaded_path
 86 |                         logger.info(f"Model downloaded to cache: {cache_path}")
 87 | 
 88 |                     # Copy to package directory
 89 |                     import shutil
 90 | 
 91 |                     shutil.copy2(cache_path, self.model_path)
 92 |                     logger.info(f"Model copied to: {self.model_path}")
 93 |                 except Exception as e:
 94 |                     raise FileNotFoundError(
 95 |                         f"Failed to download model: {str(e)}\n"
 96 |                         "Please ensure you have internet connection and huggingface-hub installed."
 97 |                     ) from e
 98 | 
 99 |             # Make sure the model path exists before loading
100 |             if not self.model_path.exists():
101 |                 raise FileNotFoundError(f"Model file not found at: {self.model_path}")
102 | 
103 |             # If model is already loaded, skip reloading
104 |             if self.model is not None:
105 |                 logger.info("Model already loaded, skipping reload")
106 |                 return
107 | 
108 |             logger.info(f"Loading YOLOv8 model from {self.model_path}")
109 |             from ultralytics import YOLO
110 | 
111 |             self.model = YOLO(str(self.model_path))  # Convert Path to string for compatibility
112 | 
113 |             # Verify model loaded successfully
114 |             if self.model is None:
115 |                 raise ValueError("Model failed to initialize but didn't raise an exception")
116 | 
117 |             if self.device in ["cuda", "mps"]:
118 |                 self.model.to(self.device)
119 | 
120 |             logger.info(f"Model loaded successfully with device: {self.device}")
121 |         except Exception as e:
122 |             logger.error(f"Failed to load model: {str(e)}")
123 |             # Re-raise with more informative message but preserve the model as None
124 |             self.model = None
125 |             raise RuntimeError(f"Failed to initialize detection model: {str(e)}") from e
126 | 
127 |     def detect_icons(
128 |         self,
129 |         image: Image.Image,
130 |         box_threshold: float = 0.05,
131 |         iou_threshold: float = 0.1,
132 |         multi_scale: bool = True,
133 |     ) -> List[Dict[str, Any]]:
134 |         """Detect icons in an image using YOLO.
135 | 
136 |         Args:
137 |             image: PIL Image to process
138 |             box_threshold: Confidence threshold for detection
139 |             iou_threshold: IOU threshold for NMS
140 |             multi_scale: Whether to use multi-scale detection
141 | 
142 |         Returns:
143 |             List of icon detection dictionaries
144 |         """
145 |         # Load model if not already loaded
146 |         if self.model is None:
147 |             self.load_model()
148 | 
149 |         # Double-check the model was successfully loaded
150 |         if self.model is None:
151 |             logger.error("Model failed to load and is still None")
152 |             return []  # Return empty list instead of crashing
153 | 
154 |         img_width, img_height = image.size
155 |         all_detections = []
156 | 
157 |         # Define detection scales
158 |         scales = (
159 |             [{"size": 1280, "conf": box_threshold}]  # Single scale for CPU
160 |             if self.device == "cpu"
161 |             else [
162 |                 {"size": 640, "conf": box_threshold},  # Base scale
163 |                 {"size": 1280, "conf": box_threshold},  # Medium scale
164 |                 {"size": 1920, "conf": box_threshold},  # Large scale
165 |             ]
166 |         )
167 | 
168 |         if not multi_scale:
169 |             scales = [scales[0]]
170 | 
171 |         # Run detection at each scale
172 |         for scale in scales:
173 |             try:
174 |                 if self.model is None:
175 |                     logger.error("Model is None, skipping detection")
176 |                     continue
177 | 
178 |                 results = self.model.predict(
179 |                     source=image,
180 |                     conf=scale["conf"],
181 |                     iou=iou_threshold,
182 |                     max_det=1000,
183 |                     verbose=False,
184 |                     augment=self.device != "cpu",
185 |                     agnostic_nms=True,
186 |                     imgsz=scale["size"],
187 |                     device=self.device,
188 |                 )
189 | 
190 |                 # Process results
191 |                 for r in results:
192 |                     boxes = r.boxes
193 |                     if not hasattr(boxes, "conf") or not hasattr(boxes, "xyxy"):
194 |                         logger.warning("Boxes object missing expected attributes")
195 |                         continue
196 | 
197 |                     confidences = boxes.conf
198 |                     coords = boxes.xyxy
199 | 
200 |                     # Handle different types of tensors (PyTorch, NumPy, etc.)
201 |                     if hasattr(confidences, "cpu"):
202 |                         confidences = confidences.cpu()
203 |                     if hasattr(coords, "cpu"):
204 |                         coords = coords.cpu()
205 | 
206 |                     for conf, bbox in zip(confidences, coords):
207 |                         # Normalize coordinates
208 |                         x1, y1, x2, y2 = bbox.tolist()
209 |                         norm_bbox = [
210 |                             x1 / img_width,
211 |                             y1 / img_height,
212 |                             x2 / img_width,
213 |                             y2 / img_height,
214 |                         ]
215 | 
216 |                         all_detections.append(
217 |                             {
218 |                                 "type": "icon",
219 |                                 "confidence": conf.item(),
220 |                                 "bbox": norm_bbox,
221 |                                 "scale": scale["size"],
222 |                                 "interactivity": True,
223 |                             }
224 |                         )
225 | 
226 |             except Exception as e:
227 |                 logger.warning(f"Detection failed at scale {scale['size']}: {str(e)}")
228 |                 continue
229 | 
230 |         # Merge detections using NMS
231 |         if len(all_detections) > 0:
232 |             boxes = torch.tensor([d["bbox"] for d in all_detections])
233 |             scores = torch.tensor([d["confidence"] for d in all_detections])
234 | 
235 |             keep_indices = torchvision.ops.nms(boxes, scores, iou_threshold)
236 | 
237 |             merged_detections = [all_detections[i] for i in keep_indices]
238 |         else:
239 |             merged_detections = []
240 | 
241 |         return merged_detections
242 | 
```
Page 10/28FirstPrevNextLast