trycua/cua # codebase.md

This is page 14 of 21. Use http://codebase.md/trycua/cua?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .all-contributorsrc
├── .cursorignore
├── .devcontainer
│   ├── devcontainer.json
│   ├── post-install.sh
│   └── README.md
├── .dockerignore
├── .gitattributes
├── .github
│   ├── FUNDING.yml
│   ├── scripts
│   │   ├── get_pyproject_version.py
│   │   └── tests
│   │       ├── __init__.py
│   │       ├── README.md
│   │       └── test_get_pyproject_version.py
│   └── workflows
│       ├── ci-lume.yml
│       ├── docker-publish-kasm.yml
│       ├── docker-publish-xfce.yml
│       ├── docker-reusable-publish.yml
│       ├── npm-publish-computer.yml
│       ├── npm-publish-core.yml
│       ├── publish-lume.yml
│       ├── pypi-publish-agent.yml
│       ├── pypi-publish-computer-server.yml
│       ├── pypi-publish-computer.yml
│       ├── pypi-publish-core.yml
│       ├── pypi-publish-mcp-server.yml
│       ├── pypi-publish-pylume.yml
│       ├── pypi-publish-som.yml
│       ├── pypi-reusable-publish.yml
│       └── test-validation-script.yml
├── .gitignore
├── .vscode
│   ├── docs.code-workspace
│   ├── launch.json
│   ├── libs-ts.code-workspace
│   ├── lume.code-workspace
│   ├── lumier.code-workspace
│   ├── py.code-workspace
│   └── settings.json
├── blog
│   ├── app-use.md
│   ├── assets
│   │   ├── composite-agents.png
│   │   ├── docker-ubuntu-support.png
│   │   ├── hack-booth.png
│   │   ├── hack-closing-ceremony.jpg
│   │   ├── hack-cua-ollama-hud.jpeg
│   │   ├── hack-leaderboard.png
│   │   ├── hack-the-north.png
│   │   ├── hack-winners.jpeg
│   │   ├── hack-workshop.jpeg
│   │   ├── hud-agent-evals.png
│   │   └── trajectory-viewer.jpeg
│   ├── bringing-computer-use-to-the-web.md
│   ├── build-your-own-operator-on-macos-1.md
│   ├── build-your-own-operator-on-macos-2.md
│   ├── composite-agents.md
│   ├── cua-hackathon.md
│   ├── hack-the-north.md
│   ├── hud-agent-evals.md
│   ├── human-in-the-loop.md
│   ├── introducing-cua-cloud-containers.md
│   ├── lume-to-containerization.md
│   ├── sandboxed-python-execution.md
│   ├── training-computer-use-models-trajectories-1.md
│   ├── trajectory-viewer.md
│   ├── ubuntu-docker-support.md
│   └── windows-sandbox.md
├── CONTRIBUTING.md
├── Development.md
├── Dockerfile
├── docs
│   ├── .gitignore
│   ├── .prettierrc
│   ├── content
│   │   └── docs
│   │       ├── agent-sdk
│   │       │   ├── agent-loops.mdx
│   │       │   ├── benchmarks
│   │       │   │   ├── index.mdx
│   │       │   │   ├── interactive.mdx
│   │       │   │   ├── introduction.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── osworld-verified.mdx
│   │       │   │   ├── screenspot-pro.mdx
│   │       │   │   └── screenspot-v2.mdx
│   │       │   ├── callbacks
│   │       │   │   ├── agent-lifecycle.mdx
│   │       │   │   ├── cost-saving.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── logging.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── pii-anonymization.mdx
│   │       │   │   └── trajectories.mdx
│   │       │   ├── chat-history.mdx
│   │       │   ├── custom-computer-handlers.mdx
│   │       │   ├── custom-tools.mdx
│   │       │   ├── customizing-computeragent.mdx
│   │       │   ├── integrations
│   │       │   │   ├── hud.mdx
│   │       │   │   └── meta.json
│   │       │   ├── message-format.mdx
│   │       │   ├── meta.json
│   │       │   ├── migration-guide.mdx
│   │       │   ├── prompt-caching.mdx
│   │       │   ├── supported-agents
│   │       │   │   ├── composed-agents.mdx
│   │       │   │   ├── computer-use-agents.mdx
│   │       │   │   ├── grounding-models.mdx
│   │       │   │   ├── human-in-the-loop.mdx
│   │       │   │   └── meta.json
│   │       │   ├── supported-model-providers
│   │       │   │   ├── index.mdx
│   │       │   │   └── local-models.mdx
│   │       │   └── usage-tracking.mdx
│   │       ├── computer-sdk
│   │       │   ├── cloud-vm-management.mdx
│   │       │   ├── commands.mdx
│   │       │   ├── computer-ui.mdx
│   │       │   ├── computers.mdx
│   │       │   ├── meta.json
│   │       │   └── sandboxed-python.mdx
│   │       ├── index.mdx
│   │       ├── libraries
│   │       │   ├── agent
│   │       │   │   └── index.mdx
│   │       │   ├── computer
│   │       │   │   └── index.mdx
│   │       │   ├── computer-server
│   │       │   │   ├── Commands.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── REST-API.mdx
│   │       │   │   └── WebSocket-API.mdx
│   │       │   ├── core
│   │       │   │   └── index.mdx
│   │       │   ├── lume
│   │       │   │   ├── cli-reference.mdx
│   │       │   │   ├── faq.md
│   │       │   │   ├── http-api.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   ├── meta.json
│   │       │   │   └── prebuilt-images.mdx
│   │       │   ├── lumier
│   │       │   │   ├── building-lumier.mdx
│   │       │   │   ├── docker-compose.mdx
│   │       │   │   ├── docker.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   └── meta.json
│   │       │   ├── mcp-server
│   │       │   │   ├── client-integrations.mdx
│   │       │   │   ├── configuration.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   ├── llm-integrations.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── tools.mdx
│   │       │   │   └── usage.mdx
│   │       │   └── som
│   │       │       ├── configuration.mdx
│   │       │       └── index.mdx
│   │       ├── meta.json
│   │       ├── quickstart-cli.mdx
│   │       ├── quickstart-devs.mdx
│   │       └── telemetry.mdx
│   ├── next.config.mjs
│   ├── package-lock.json
│   ├── package.json
│   ├── pnpm-lock.yaml
│   ├── postcss.config.mjs
│   ├── public
│   │   └── img
│   │       ├── agent_gradio_ui.png
│   │       ├── agent.png
│   │       ├── cli.png
│   │       ├── computer.png
│   │       ├── som_box_threshold.png
│   │       └── som_iou_threshold.png
│   ├── README.md
│   ├── source.config.ts
│   ├── src
│   │   ├── app
│   │   │   ├── (home)
│   │   │   │   ├── [[...slug]]
│   │   │   │   │   └── page.tsx
│   │   │   │   └── layout.tsx
│   │   │   ├── api
│   │   │   │   └── search
│   │   │   │       └── route.ts
│   │   │   ├── favicon.ico
│   │   │   ├── global.css
│   │   │   ├── layout.config.tsx
│   │   │   ├── layout.tsx
│   │   │   ├── llms.mdx
│   │   │   │   └── [[...slug]]
│   │   │   │       └── route.ts
│   │   │   └── llms.txt
│   │   │       └── route.ts
│   │   ├── assets
│   │   │   ├── discord-black.svg
│   │   │   ├── discord-white.svg
│   │   │   ├── logo-black.svg
│   │   │   └── logo-white.svg
│   │   ├── components
│   │   │   ├── iou.tsx
│   │   │   └── mermaid.tsx
│   │   ├── lib
│   │   │   ├── llms.ts
│   │   │   └── source.ts
│   │   └── mdx-components.tsx
│   └── tsconfig.json
├── examples
│   ├── agent_examples.py
│   ├── agent_ui_examples.py
│   ├── cloud_api_examples.py
│   ├── computer_examples_windows.py
│   ├── computer_examples.py
│   ├── computer_ui_examples.py
│   ├── computer-example-ts
│   │   ├── .env.example
│   │   ├── .gitignore
│   │   ├── .prettierrc
│   │   ├── package-lock.json
│   │   ├── package.json
│   │   ├── pnpm-lock.yaml
│   │   ├── README.md
│   │   ├── src
│   │   │   ├── helpers.ts
│   │   │   └── index.ts
│   │   └── tsconfig.json
│   ├── docker_examples.py
│   ├── evals
│   │   ├── hud_eval_examples.py
│   │   └── wikipedia_most_linked.txt
│   ├── pylume_examples.py
│   ├── sandboxed_functions_examples.py
│   ├── som_examples.py
│   ├── utils.py
│   └── winsandbox_example.py
├── img
│   ├── agent_gradio_ui.png
│   ├── agent.png
│   ├── cli.png
│   ├── computer.png
│   ├── logo_black.png
│   └── logo_white.png
├── libs
│   ├── kasm
│   │   ├── Dockerfile
│   │   ├── LICENSE
│   │   ├── README.md
│   │   └── src
│   │       └── ubuntu
│   │           └── install
│   │               └── firefox
│   │                   ├── custom_startup.sh
│   │                   ├── firefox.desktop
│   │                   └── install_firefox.sh
│   ├── lume
│   │   ├── .cursorignore
│   │   ├── CONTRIBUTING.md
│   │   ├── Development.md
│   │   ├── img
│   │   │   └── cli.png
│   │   ├── Package.resolved
│   │   ├── Package.swift
│   │   ├── README.md
│   │   ├── resources
│   │   │   └── lume.entitlements
│   │   ├── scripts
│   │   │   ├── build
│   │   │   │   ├── build-debug.sh
│   │   │   │   ├── build-release-notarized.sh
│   │   │   │   └── build-release.sh
│   │   │   └── install.sh
│   │   ├── src
│   │   │   ├── Commands
│   │   │   │   ├── Clone.swift
│   │   │   │   ├── Config.swift
│   │   │   │   ├── Create.swift
│   │   │   │   ├── Delete.swift
│   │   │   │   ├── Get.swift
│   │   │   │   ├── Images.swift
│   │   │   │   ├── IPSW.swift
│   │   │   │   ├── List.swift
│   │   │   │   ├── Logs.swift
│   │   │   │   ├── Options
│   │   │   │   │   └── FormatOption.swift
│   │   │   │   ├── Prune.swift
│   │   │   │   ├── Pull.swift
│   │   │   │   ├── Push.swift
│   │   │   │   ├── Run.swift
│   │   │   │   ├── Serve.swift
│   │   │   │   ├── Set.swift
│   │   │   │   └── Stop.swift
│   │   │   ├── ContainerRegistry
│   │   │   │   ├── ImageContainerRegistry.swift
│   │   │   │   ├── ImageList.swift
│   │   │   │   └── ImagesPrinter.swift
│   │   │   ├── Errors
│   │   │   │   └── Errors.swift
│   │   │   ├── FileSystem
│   │   │   │   ├── Home.swift
│   │   │   │   ├── Settings.swift
│   │   │   │   ├── VMConfig.swift
│   │   │   │   ├── VMDirectory.swift
│   │   │   │   └── VMLocation.swift
│   │   │   ├── LumeController.swift
│   │   │   ├── Main.swift
│   │   │   ├── Server
│   │   │   │   ├── Handlers.swift
│   │   │   │   ├── HTTP.swift
│   │   │   │   ├── Requests.swift
│   │   │   │   ├── Responses.swift
│   │   │   │   └── Server.swift
│   │   │   ├── Utils
│   │   │   │   ├── CommandRegistry.swift
│   │   │   │   ├── CommandUtils.swift
│   │   │   │   ├── Logger.swift
│   │   │   │   ├── NetworkUtils.swift
│   │   │   │   ├── Path.swift
│   │   │   │   ├── ProcessRunner.swift
│   │   │   │   ├── ProgressLogger.swift
│   │   │   │   ├── String.swift
│   │   │   │   └── Utils.swift
│   │   │   ├── Virtualization
│   │   │   │   ├── DarwinImageLoader.swift
│   │   │   │   ├── DHCPLeaseParser.swift
│   │   │   │   ├── ImageLoaderFactory.swift
│   │   │   │   └── VMVirtualizationService.swift
│   │   │   ├── VM
│   │   │   │   ├── DarwinVM.swift
│   │   │   │   ├── LinuxVM.swift
│   │   │   │   ├── VM.swift
│   │   │   │   ├── VMDetails.swift
│   │   │   │   ├── VMDetailsPrinter.swift
│   │   │   │   ├── VMDisplayResolution.swift
│   │   │   │   └── VMFactory.swift
│   │   │   └── VNC
│   │   │       ├── PassphraseGenerator.swift
│   │   │       └── VNCService.swift
│   │   └── tests
│   │       ├── Mocks
│   │       │   ├── MockVM.swift
│   │       │   ├── MockVMVirtualizationService.swift
│   │       │   └── MockVNCService.swift
│   │       ├── VM
│   │       │   └── VMDetailsPrinterTests.swift
│   │       ├── VMTests.swift
│   │       ├── VMVirtualizationServiceTests.swift
│   │       └── VNCServiceTests.swift
│   ├── lumier
│   │   ├── .dockerignore
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   └── src
│   │       ├── bin
│   │       │   └── entry.sh
│   │       ├── config
│   │       │   └── constants.sh
│   │       ├── hooks
│   │       │   └── on-logon.sh
│   │       └── lib
│   │           ├── utils.sh
│   │           └── vm.sh
│   ├── python
│   │   ├── agent
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── agent
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── adapters
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── huggingfacelocal_adapter.py
│   │   │   │   │   ├── human_adapter.py
│   │   │   │   │   ├── mlxvlm_adapter.py
│   │   │   │   │   └── models
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── generic.py
│   │   │   │   │       ├── internvl.py
│   │   │   │   │       ├── opencua.py
│   │   │   │   │       └── qwen2_5_vl.py
│   │   │   │   ├── agent.py
│   │   │   │   ├── callbacks
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── budget_manager.py
│   │   │   │   │   ├── image_retention.py
│   │   │   │   │   ├── logging.py
│   │   │   │   │   ├── operator_validator.py
│   │   │   │   │   ├── pii_anonymization.py
│   │   │   │   │   ├── prompt_instructions.py
│   │   │   │   │   ├── telemetry.py
│   │   │   │   │   └── trajectory_saver.py
│   │   │   │   ├── cli.py
│   │   │   │   ├── computers
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── cua.py
│   │   │   │   │   └── custom.py
│   │   │   │   ├── decorators.py
│   │   │   │   ├── human_tool
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── __main__.py
│   │   │   │   │   ├── server.py
│   │   │   │   │   └── ui.py
│   │   │   │   ├── integrations
│   │   │   │   │   └── hud
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── agent.py
│   │   │   │   │       └── proxy.py
│   │   │   │   ├── loops
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── anthropic.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── composed_grounded.py
│   │   │   │   │   ├── gemini.py
│   │   │   │   │   ├── glm45v.py
│   │   │   │   │   ├── gta1.py
│   │   │   │   │   ├── holo.py
│   │   │   │   │   ├── internvl.py
│   │   │   │   │   ├── model_types.csv
│   │   │   │   │   ├── moondream3.py
│   │   │   │   │   ├── omniparser.py
│   │   │   │   │   ├── openai.py
│   │   │   │   │   ├── opencua.py
│   │   │   │   │   └── uitars.py
│   │   │   │   ├── proxy
│   │   │   │   │   ├── examples.py
│   │   │   │   │   └── handlers.py
│   │   │   │   ├── responses.py
│   │   │   │   ├── types.py
│   │   │   │   └── ui
│   │   │   │       ├── __init__.py
│   │   │   │       ├── __main__.py
│   │   │   │       └── gradio
│   │   │   │           ├── __init__.py
│   │   │   │           ├── app.py
│   │   │   │           └── ui_components.py
│   │   │   ├── benchmarks
│   │   │   │   ├── .gitignore
│   │   │   │   ├── contrib.md
│   │   │   │   ├── interactive.py
│   │   │   │   ├── models
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   └── gta1.py
│   │   │   │   ├── README.md
│   │   │   │   ├── ss-pro.py
│   │   │   │   ├── ss-v2.py
│   │   │   │   └── utils.py
│   │   │   ├── example.py
│   │   │   ├── pyproject.toml
│   │   │   └── README.md
│   │   ├── computer
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── computer
│   │   │   │   ├── __init__.py
│   │   │   │   ├── computer.py
│   │   │   │   ├── diorama_computer.py
│   │   │   │   ├── helpers.py
│   │   │   │   ├── interface
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── generic.py
│   │   │   │   │   ├── linux.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   ├── models.py
│   │   │   │   │   └── windows.py
│   │   │   │   ├── logger.py
│   │   │   │   ├── models.py
│   │   │   │   ├── providers
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── cloud
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── docker
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── lume
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── lume_api.py
│   │   │   │   │   ├── lumier
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── types.py
│   │   │   │   │   └── winsandbox
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── provider.py
│   │   │   │   │       └── setup_script.ps1
│   │   │   │   ├── ui
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── __main__.py
│   │   │   │   │   └── gradio
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       └── app.py
│   │   │   │   └── utils.py
│   │   │   ├── poetry.toml
│   │   │   ├── pyproject.toml
│   │   │   └── README.md
│   │   ├── computer-server
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── computer_server
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── cli.py
│   │   │   │   ├── diorama
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── diorama_computer.py
│   │   │   │   │   ├── diorama.py
│   │   │   │   │   ├── draw.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   └── safezone.py
│   │   │   │   ├── handlers
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── generic.py
│   │   │   │   │   ├── linux.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   └── windows.py
│   │   │   │   ├── main.py
│   │   │   │   ├── server.py
│   │   │   │   └── watchdog.py
│   │   │   ├── examples
│   │   │   │   ├── __init__.py
│   │   │   │   └── usage_example.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   ├── run_server.py
│   │   │   └── test_connection.py
│   │   ├── core
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── core
│   │   │   │   ├── __init__.py
│   │   │   │   └── telemetry
│   │   │   │       ├── __init__.py
│   │   │   │       └── posthog.py
│   │   │   ├── poetry.toml
│   │   │   ├── pyproject.toml
│   │   │   └── README.md
│   │   ├── mcp-server
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── CONCURRENT_SESSIONS.md
│   │   │   ├── mcp_server
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── server.py
│   │   │   │   └── session_manager.py
│   │   │   ├── pdm.lock
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── scripts
│   │   │       ├── install_mcp_server.sh
│   │   │       └── start_mcp_server.sh
│   │   ├── pylume
│   │   │   ├── __init__.py
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── pylume
│   │   │   │   ├── __init__.py
│   │   │   │   ├── client.py
│   │   │   │   ├── exceptions.py
│   │   │   │   ├── lume
│   │   │   │   ├── models.py
│   │   │   │   ├── pylume.py
│   │   │   │   └── server.py
│   │   │   ├── pyproject.toml
│   │   │   └── README.md
│   │   └── som
│   │       ├── .bumpversion.cfg
│   │       ├── LICENSE
│   │       ├── poetry.toml
│   │       ├── pyproject.toml
│   │       ├── README.md
│   │       ├── som
│   │       │   ├── __init__.py
│   │       │   ├── detect.py
│   │       │   ├── detection.py
│   │       │   ├── models.py
│   │       │   ├── ocr.py
│   │       │   ├── util
│   │       │   │   └── utils.py
│   │       │   └── visualization.py
│   │       └── tests
│   │           └── test_omniparser.py
│   ├── typescript
│   │   ├── .gitignore
│   │   ├── .nvmrc
│   │   ├── agent
│   │   │   ├── examples
│   │   │   │   ├── playground-example.html
│   │   │   │   └── README.md
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── client.ts
│   │   │   │   ├── index.ts
│   │   │   │   └── types.ts
│   │   │   ├── tests
│   │   │   │   └── client.test.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── biome.json
│   │   ├── computer
│   │   │   ├── .editorconfig
│   │   │   ├── .gitattributes
│   │   │   ├── .gitignore
│   │   │   ├── LICENSE
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── computer
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── providers
│   │   │   │   │   │   ├── base.ts
│   │   │   │   │   │   ├── cloud.ts
│   │   │   │   │   │   └── index.ts
│   │   │   │   │   └── types.ts
│   │   │   │   ├── index.ts
│   │   │   │   ├── interface
│   │   │   │   │   ├── base.ts
│   │   │   │   │   ├── factory.ts
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── linux.ts
│   │   │   │   │   ├── macos.ts
│   │   │   │   │   └── windows.ts
│   │   │   │   └── types.ts
│   │   │   ├── tests
│   │   │   │   ├── computer
│   │   │   │   │   └── cloud.test.ts
│   │   │   │   ├── interface
│   │   │   │   │   ├── factory.test.ts
│   │   │   │   │   ├── index.test.ts
│   │   │   │   │   ├── linux.test.ts
│   │   │   │   │   ├── macos.test.ts
│   │   │   │   │   └── windows.test.ts
│   │   │   │   └── setup.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── core
│   │   │   ├── .editorconfig
│   │   │   ├── .gitattributes
│   │   │   ├── .gitignore
│   │   │   ├── LICENSE
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── index.ts
│   │   │   │   └── telemetry
│   │   │   │       ├── clients
│   │   │   │       │   ├── index.ts
│   │   │   │       │   └── posthog.ts
│   │   │   │       └── index.ts
│   │   │   ├── tests
│   │   │   │   └── telemetry.test.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── package.json
│   │   ├── pnpm-lock.yaml
│   │   ├── pnpm-workspace.yaml
│   │   └── README.md
│   └── xfce
│       ├── .dockerignore
│       ├── .gitignore
│       ├── Dockerfile
│       ├── README.md
│       └── src
│           ├── scripts
│           │   ├── resize-display.sh
│           │   ├── start-computer-server.sh
│           │   ├── start-novnc.sh
│           │   ├── start-vnc.sh
│           │   └── xstartup.sh
│           ├── supervisor
│           │   └── supervisord.conf
│           └── xfce-config
│               ├── helpers.rc
│               ├── xfce4-power-manager.xml
│               └── xfce4-session.xml
├── LICENSE.md
├── Makefile
├── notebooks
│   ├── agent_nb.ipynb
│   ├── blog
│   │   ├── build-your-own-operator-on-macos-1.ipynb
│   │   └── build-your-own-operator-on-macos-2.ipynb
│   ├── composite_agents_docker_nb.ipynb
│   ├── computer_nb.ipynb
│   ├── computer_server_nb.ipynb
│   ├── customizing_computeragent.ipynb
│   ├── eval_osworld.ipynb
│   ├── ollama_nb.ipynb
│   ├── pylume_nb.ipynb
│   ├── README.md
│   ├── sota_hackathon_cloud.ipynb
│   └── sota_hackathon.ipynb
├── pdm.lock
├── pyproject.toml
├── pyrightconfig.json
├── README.md
├── samples
│   └── community
│       ├── global-online
│       │   └── README.md
│       └── hack-the-north
│           └── README.md
├── scripts
│   ├── build-uv.sh
│   ├── build.ps1
│   ├── build.sh
│   ├── cleanup.sh
│   ├── playground-docker.sh
│   ├── playground.sh
│   └── run-docker-dev.sh
└── tests
    ├── pytest.ini
    ├── shell_cmd.py
    ├── test_files.py
    ├── test_mcp_server_session_management.py
    ├── test_mcp_server_streaming.py
    ├── test_shell_bash.py
    ├── test_telemetry.py
    ├── test_venv.py
    └── test_watchdog.py
```

# Files

--------------------------------------------------------------------------------
/libs/python/computer-server/computer_server/handlers/linux.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Linux implementation of automation and accessibility handlers.
  3 | 
  4 | This implementation attempts to use pyautogui for GUI automation when available.
  5 | If running in a headless environment without X11, it will fall back to simulated responses.
  6 | To use GUI automation in a headless environment:
  7 | 1. Install Xvfb: sudo apt-get install xvfb
  8 | 2. Run with virtual display: xvfb-run python -m computer_server
  9 | """
 10 | from typing import Dict, Any, List, Tuple, Optional
 11 | import logging
 12 | import subprocess
 13 | import asyncio
 14 | import base64
 15 | import os
 16 | import json
 17 | from io import BytesIO
 18 | 
 19 | # Configure logger
 20 | logger = logging.getLogger(__name__)
 21 | 
 22 | # Try to import pyautogui, but don't fail if it's not available
 23 | # This allows the server to run in headless environments
 24 | try:
 25 |     import pyautogui
 26 |     pyautogui.FAILSAFE = False
 27 | 
 28 |     logger.info("pyautogui successfully imported, GUI automation available")
 29 | except Exception as e:
 30 |     logger.warning(f"pyautogui import failed: {str(e)}. GUI operations will be simulated.")
 31 | 
 32 | from pynput.mouse import Button, Controller as MouseController
 33 | from pynput.keyboard import Key, Controller as KeyboardController
 34 | 
 35 | from .base import BaseAccessibilityHandler, BaseAutomationHandler
 36 | 
 37 | class LinuxAccessibilityHandler(BaseAccessibilityHandler):
 38 |     """Linux implementation of accessibility handler."""
 39 |     
 40 |     async def get_accessibility_tree(self) -> Dict[str, Any]:
 41 |         """Get the accessibility tree of the current window.
 42 |         
 43 |         Returns:
 44 |             Dict[str, Any]: A dictionary containing success status and a simulated tree structure
 45 |                            since Linux doesn't have equivalent accessibility API like macOS.
 46 |         """
 47 |         # Linux doesn't have equivalent accessibility API like macOS
 48 |         # Return a minimal dummy tree
 49 |         logger.info("Getting accessibility tree (simulated, no accessibility API available on Linux)")
 50 |         return {
 51 |             "success": True,
 52 |             "tree": {
 53 |                 "role": "Window",
 54 |                 "title": "Linux Window",
 55 |                 "position": {"x": 0, "y": 0},
 56 |                 "size": {"width": 1920, "height": 1080},
 57 |                 "children": []
 58 |             }
 59 |         }
 60 |     
 61 |     async def find_element(self, role: Optional[str] = None,
 62 |                           title: Optional[str] = None,
 63 |                           value: Optional[str] = None) -> Dict[str, Any]:
 64 |         """Find an element in the accessibility tree by criteria.
 65 |         
 66 |         Args:
 67 |             role: The role of the element to find.
 68 |             title: The title of the element to find.
 69 |             value: The value of the element to find.
 70 |             
 71 |         Returns:
 72 |             Dict[str, Any]: A dictionary indicating that element search is not supported on Linux.
 73 |         """
 74 |         logger.info(f"Finding element with role={role}, title={title}, value={value} (not supported on Linux)")
 75 |         return {
 76 |             "success": False,
 77 |             "message": "Element search not supported on Linux"
 78 |         }
 79 |     
 80 |     def get_cursor_position(self) -> Tuple[int, int]:
 81 |         """Get the current cursor position.
 82 |         
 83 |         Returns:
 84 |             Tuple[int, int]: The x and y coordinates of the cursor position.
 85 |                            Returns (0, 0) if pyautogui is not available.
 86 |         """
 87 |         try:
 88 |             pos = pyautogui.position()
 89 |             return pos.x, pos.y
 90 |         except Exception as e:
 91 |             logger.warning(f"Failed to get cursor position with pyautogui: {e}")
 92 |         
 93 |         logger.info("Getting cursor position (simulated)")
 94 |         return 0, 0
 95 |     
 96 |     def get_screen_size(self) -> Tuple[int, int]:
 97 |         """Get the screen size.
 98 |         
 99 |         Returns:
100 |             Tuple[int, int]: The width and height of the screen in pixels.
101 |                            Returns (1920, 1080) if pyautogui is not available.
102 |         """
103 |         try:
104 |             size = pyautogui.size()
105 |             return size.width, size.height
106 |         except Exception as e:
107 |             logger.warning(f"Failed to get screen size with pyautogui: {e}")
108 |         
109 |         logger.info("Getting screen size (simulated)")
110 |         return 1920, 1080
111 | 
112 | class LinuxAutomationHandler(BaseAutomationHandler):
113 |     """Linux implementation of automation handler using pyautogui."""
114 |     keyboard = KeyboardController()
115 |     mouse = MouseController()
116 |     
117 |     # Mouse Actions
118 |     async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left") -> Dict[str, Any]:
119 |         """Press and hold a mouse button at the specified coordinates.
120 |         
121 |         Args:
122 |             x: The x coordinate to move to before pressing. If None, uses current position.
123 |             y: The y coordinate to move to before pressing. If None, uses current position.
124 |             button: The mouse button to press ("left", "right", or "middle").
125 |             
126 |         Returns:
127 |             Dict[str, Any]: A dictionary with success status and error message if failed.
128 |         """
129 |         try:
130 |             if x is not None and y is not None:
131 |                 pyautogui.moveTo(x, y)
132 |             pyautogui.mouseDown(button=button)
133 |             return {"success": True}
134 |         except Exception as e:
135 |             return {"success": False, "error": str(e)}
136 |     
137 |     async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left") -> Dict[str, Any]:
138 |         """Release a mouse button at the specified coordinates.
139 |         
140 |         Args:
141 |             x: The x coordinate to move to before releasing. If None, uses current position.
142 |             y: The y coordinate to move to before releasing. If None, uses current position.
143 |             button: The mouse button to release ("left", "right", or "middle").
144 |             
145 |         Returns:
146 |             Dict[str, Any]: A dictionary with success status and error message if failed.
147 |         """
148 |         try:
149 |             if x is not None and y is not None:
150 |                 pyautogui.moveTo(x, y)
151 |             pyautogui.mouseUp(button=button)
152 |             return {"success": True}
153 |         except Exception as e:
154 |             return {"success": False, "error": str(e)}
155 |     
156 |     async def move_cursor(self, x: int, y: int) -> Dict[str, Any]:
157 |         """Move the cursor to the specified coordinates.
158 |         
159 |         Args:
160 |             x: The x coordinate to move to.
161 |             y: The y coordinate to move to.
162 |             
163 |         Returns:
164 |             Dict[str, Any]: A dictionary with success status and error message if failed.
165 |         """
166 |         try:
167 |             pyautogui.moveTo(x, y)
168 |             return {"success": True}
169 |         except Exception as e:
170 |             return {"success": False, "error": str(e)}
171 | 
172 |     async def left_click(self, x: Optional[int] = None, y: Optional[int] = None) -> Dict[str, Any]:
173 |         """Perform a left mouse click at the specified coordinates.
174 |         
175 |         Args:
176 |             x: The x coordinate to click at. If None, clicks at current position.
177 |             y: The y coordinate to click at. If None, clicks at current position.
178 |             
179 |         Returns:
180 |             Dict[str, Any]: A dictionary with success status and error message if failed.
181 |         """
182 |         try:
183 |             if x is not None and y is not None:
184 |                 pyautogui.moveTo(x, y)
185 |             pyautogui.click()
186 |             return {"success": True}
187 |         except Exception as e:
188 |             return {"success": False, "error": str(e)}
189 | 
190 |     async def right_click(self, x: Optional[int] = None, y: Optional[int] = None) -> Dict[str, Any]:
191 |         """Perform a right mouse click at the specified coordinates.
192 |         
193 |         Args:
194 |             x: The x coordinate to click at. If None, clicks at current position.
195 |             y: The y coordinate to click at. If None, clicks at current position.
196 |             
197 |         Returns:
198 |             Dict[str, Any]: A dictionary with success status and error message if failed.
199 |         """
200 |         try:
201 |             if x is not None and y is not None:
202 |                 pyautogui.moveTo(x, y)
203 |             pyautogui.rightClick()
204 |             return {"success": True}
205 |         except Exception as e:
206 |             return {"success": False, "error": str(e)}
207 | 
208 |     async def double_click(self, x: Optional[int] = None, y: Optional[int] = None) -> Dict[str, Any]:
209 |         """Perform a double click at the specified coordinates.
210 |         
211 |         Args:
212 |             x: The x coordinate to double click at. If None, clicks at current position.
213 |             y: The y coordinate to double click at. If None, clicks at current position.
214 |             
215 |         Returns:
216 |             Dict[str, Any]: A dictionary with success status and error message if failed.
217 |         """
218 |         try:
219 |             if x is not None and y is not None:
220 |                 pyautogui.moveTo(x, y)
221 |             pyautogui.doubleClick(interval=0.1)
222 |             return {"success": True}
223 |         except Exception as e:
224 |             return {"success": False, "error": str(e)}
225 | 
226 |     async def click(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left") -> Dict[str, Any]:
227 |         """Perform a mouse click with the specified button at the given coordinates.
228 |         
229 |         Args:
230 |             x: The x coordinate to click at. If None, clicks at current position.
231 |             y: The y coordinate to click at. If None, clicks at current position.
232 |             button: The mouse button to click ("left", "right", or "middle").
233 |             
234 |         Returns:
235 |             Dict[str, Any]: A dictionary with success status and error message if failed.
236 |         """
237 |         try:
238 |             if x is not None and y is not None:
239 |                 pyautogui.moveTo(x, y)
240 |             pyautogui.click(button=button)
241 |             return {"success": True}
242 |         except Exception as e:
243 |             return {"success": False, "error": str(e)}
244 | 
245 |     async def drag_to(self, x: int, y: int, button: str = "left", duration: float = 0.5) -> Dict[str, Any]:
246 |         """Drag from the current position to the specified coordinates.
247 |         
248 |         Args:
249 |             x: The x coordinate to drag to.
250 |             y: The y coordinate to drag to.
251 |             button: The mouse button to use for dragging.
252 |             duration: The time in seconds to take for the drag operation.
253 |             
254 |         Returns:
255 |             Dict[str, Any]: A dictionary with success status and error message if failed.
256 |         """
257 |         try:
258 |             pyautogui.dragTo(x, y, duration=duration, button=button)
259 |             return {"success": True}
260 |         except Exception as e:
261 |             return {"success": False, "error": str(e)}
262 | 
263 |     async def drag(self, start_x: int, start_y: int, end_x: int, end_y: int, button: str = "left") -> Dict[str, Any]:
264 |         """Drag from start coordinates to end coordinates.
265 |         
266 |         Args:
267 |             start_x: The starting x coordinate.
268 |             start_y: The starting y coordinate.
269 |             end_x: The ending x coordinate.
270 |             end_y: The ending y coordinate.
271 |             button: The mouse button to use for dragging.
272 |             
273 |         Returns:
274 |             Dict[str, Any]: A dictionary with success status and error message if failed.
275 |         """
276 |         try:
277 |             pyautogui.moveTo(start_x, start_y)
278 |             pyautogui.dragTo(end_x, end_y, duration=0.5, button=button)
279 |             return {"success": True}
280 |         except Exception as e:
281 |             return {"success": False, "error": str(e)}
282 | 
283 |     async def drag_path(self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5) -> Dict[str, Any]:
284 |         """Drag along a path defined by a list of coordinates.
285 |         
286 |         Args:
287 |             path: A list of (x, y) coordinate tuples defining the drag path.
288 |             button: The mouse button to use for dragging.
289 |             duration: The time in seconds to take for each segment of the drag.
290 |             
291 |         Returns:
292 |             Dict[str, Any]: A dictionary with success status and error message if failed.
293 |         """
294 |         try:
295 |             if not path:
296 |                 return {"success": False, "error": "Path is empty"}
297 |             pyautogui.moveTo(*path[0])
298 |             for x, y in path[1:]:
299 |                 pyautogui.dragTo(x, y, duration=duration, button=button)
300 |             return {"success": True}
301 |         except Exception as e:
302 |             return {"success": False, "error": str(e)}
303 | 
304 |     # Keyboard Actions
305 |     async def key_down(self, key: str) -> Dict[str, Any]:
306 |         """Press and hold a key.
307 |         
308 |         Args:
309 |             key: The key to press down.
310 |             
311 |         Returns:
312 |             Dict[str, Any]: A dictionary with success status and error message if failed.
313 |         """
314 |         try:
315 |             pyautogui.keyDown(key)
316 |             return {"success": True}
317 |         except Exception as e:
318 |             return {"success": False, "error": str(e)}
319 |         
320 |     async def key_up(self, key: str) -> Dict[str, Any]:
321 |         """Release a key.
322 |         
323 |         Args:
324 |             key: The key to release.
325 |             
326 |         Returns:
327 |             Dict[str, Any]: A dictionary with success status and error message if failed.
328 |         """
329 |         try:
330 |             pyautogui.keyUp(key)
331 |             return {"success": True}
332 |         except Exception as e:
333 |             return {"success": False, "error": str(e)}
334 |     
335 |     async def type_text(self, text: str) -> Dict[str, Any]:
336 |         """Type the specified text using the keyboard.
337 |         
338 |         Args:
339 |             text: The text to type.
340 |             
341 |         Returns:
342 |             Dict[str, Any]: A dictionary with success status and error message if failed.
343 |         """
344 |         try:
345 |             # use pynput for Unicode support
346 |             self.keyboard.type(text)
347 |             return {"success": True}
348 |         except Exception as e:
349 |             return {"success": False, "error": str(e)}
350 | 
351 |     async def press_key(self, key: str) -> Dict[str, Any]:
352 |         """Press and release a key.
353 |         
354 |         Args:
355 |             key: The key to press.
356 |             
357 |         Returns:
358 |             Dict[str, Any]: A dictionary with success status and error message if failed.
359 |         """
360 |         try:
361 |             pyautogui.press(key)
362 |             return {"success": True}
363 |         except Exception as e:
364 |             return {"success": False, "error": str(e)}
365 | 
366 |     async def hotkey(self, keys: List[str]) -> Dict[str, Any]:
367 |         """Press a combination of keys simultaneously.
368 |         
369 |         Args:
370 |             keys: A list of keys to press together as a hotkey combination.
371 |             
372 |         Returns:
373 |             Dict[str, Any]: A dictionary with success status and error message if failed.
374 |         """
375 |         try:
376 |             pyautogui.hotkey(*keys)
377 |             return {"success": True}
378 |         except Exception as e:
379 |             return {"success": False, "error": str(e)}
380 | 
381 |     # Scrolling Actions
382 |     async def scroll(self, x: int, y: int) -> Dict[str, Any]:
383 |         """Scroll the mouse wheel.
384 |         
385 |         Args:
386 |             x: The horizontal scroll amount.
387 |             y: The vertical scroll amount.
388 |             
389 |         Returns:
390 |             Dict[str, Any]: A dictionary with success status and error message if failed.
391 |         """
392 |         try:
393 |             self.mouse.scroll(x, y)
394 |             return {"success": True}
395 |         except Exception as e:
396 |             return {"success": False, "error": str(e)}
397 |     
398 |     async def scroll_down(self, clicks: int = 1) -> Dict[str, Any]:
399 |         """Scroll down by the specified number of clicks.
400 |         
401 |         Args:
402 |             clicks: The number of scroll clicks to perform downward.
403 |             
404 |         Returns:
405 |             Dict[str, Any]: A dictionary with success status and error message if failed.
406 |         """
407 |         try:
408 |             pyautogui.scroll(-clicks)
409 |             return {"success": True}
410 |         except Exception as e:
411 |             return {"success": False, "error": str(e)}
412 | 
413 |     async def scroll_up(self, clicks: int = 1) -> Dict[str, Any]:
414 |         """Scroll up by the specified number of clicks.
415 |         
416 |         Args:
417 |             clicks: The number of scroll clicks to perform upward.
418 |             
419 |         Returns:
420 |             Dict[str, Any]: A dictionary with success status and error message if failed.
421 |         """
422 |         try:
423 |             pyautogui.scroll(clicks)
424 |             return {"success": True}
425 |         except Exception as e:
426 |             return {"success": False, "error": str(e)}
427 | 
428 |     # Screen Actions
429 |     async def screenshot(self) -> Dict[str, Any]:
430 |         """Take a screenshot of the current screen.
431 |         
432 |         Returns:
433 |             Dict[str, Any]: A dictionary containing success status and base64-encoded image data,
434 |                            or error message if failed.
435 |         """
436 |         try:
437 |             from PIL import Image
438 |             screenshot = pyautogui.screenshot()
439 |             if not isinstance(screenshot, Image.Image):
440 |                 return {"success": False, "error": "Failed to capture screenshot"}
441 |             buffered = BytesIO()
442 |             screenshot.save(buffered, format="PNG", optimize=True)
443 |             buffered.seek(0)
444 |             image_data = base64.b64encode(buffered.getvalue()).decode()
445 |             return {"success": True, "image_data": image_data}
446 |         except Exception as e:
447 |             return {"success": False, "error": f"Screenshot error: {str(e)}"}
448 | 
449 |     async def get_screen_size(self) -> Dict[str, Any]:
450 |         """Get the size of the screen.
451 |         
452 |         Returns:
453 |             Dict[str, Any]: A dictionary containing success status and screen dimensions,
454 |                            or error message if failed.
455 |         """
456 |         try:
457 |             size = pyautogui.size()
458 |             return {"success": True, "size": {"width": size.width, "height": size.height}}
459 |         except Exception as e:
460 |             return {"success": False, "error": str(e)}
461 | 
462 |     async def get_cursor_position(self) -> Dict[str, Any]:
463 |         """Get the current position of the cursor.
464 |         
465 |         Returns:
466 |             Dict[str, Any]: A dictionary containing success status and cursor coordinates,
467 |                            or error message if failed.
468 |         """
469 |         try:
470 |             pos = pyautogui.position()
471 |             return {"success": True, "position": {"x": pos.x, "y": pos.y}}
472 |         except Exception as e:
473 |             return {"success": False, "error": str(e)}
474 | 
475 |     # Clipboard Actions
476 |     async def copy_to_clipboard(self) -> Dict[str, Any]:
477 |         """Get the current content of the clipboard.
478 |         
479 |         Returns:
480 |             Dict[str, Any]: A dictionary containing success status and clipboard content,
481 |                            or error message if failed.
482 |         """
483 |         try:
484 |             import pyperclip
485 |             content = pyperclip.paste()
486 |             return {"success": True, "content": content}
487 |         except Exception as e:
488 |             return {"success": False, "error": str(e)}
489 | 
490 |     async def set_clipboard(self, text: str) -> Dict[str, Any]:
491 |         """Set the clipboard content to the specified text.
492 |         
493 |         Args:
494 |             text: The text to copy to the clipboard.
495 |             
496 |         Returns:
497 |             Dict[str, Any]: A dictionary with success status and error message if failed.
498 |         """
499 |         try:
500 |             import pyperclip
501 |             pyperclip.copy(text)
502 |             return {"success": True}
503 |         except Exception as e:
504 |             return {"success": False, "error": str(e)}
505 | 
506 |     # Command Execution
507 |     async def run_command(self, command: str) -> Dict[str, Any]:
508 |         """Execute a shell command asynchronously.
509 |         
510 |         Args:
511 |             command: The shell command to execute.
512 |             
513 |         Returns:
514 |             Dict[str, Any]: A dictionary containing success status, stdout, stderr,
515 |                            and return code, or error message if failed.
516 |         """
517 |         try:
518 |             # Create subprocess
519 |             process = await asyncio.create_subprocess_shell(
520 |                 command,
521 |                 stdout=asyncio.subprocess.PIPE,
522 |                 stderr=asyncio.subprocess.PIPE
523 |             )
524 |             # Wait for the subprocess to finish
525 |             stdout, stderr = await process.communicate()
526 |             # Return decoded output
527 |             return {
528 |                 "success": True, 
529 |                 "stdout": stdout.decode() if stdout else "", 
530 |                 "stderr": stderr.decode() if stderr else "", 
531 |                 "return_code": process.returncode
532 |             }
533 |         except Exception as e:
534 |             return {"success": False, "error": str(e)}
535 | 
```

--------------------------------------------------------------------------------
/libs/python/computer/computer/providers/winsandbox/provider.py:
--------------------------------------------------------------------------------

```python
  1 | """Windows Sandbox VM provider implementation using pywinsandbox."""
  2 | 
  3 | import os
  4 | import asyncio
  5 | import logging
  6 | import time
  7 | from typing import Dict, Any, Optional, List
  8 | from pathlib import Path
  9 | 
 10 | from ..base import BaseVMProvider, VMProviderType
 11 | 
 12 | # Setup logging
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | try:
 16 |     import winsandbox
 17 |     HAS_WINSANDBOX = True
 18 | except ImportError:
 19 |     HAS_WINSANDBOX = False
 20 | 
 21 | 
 22 | class WinSandboxProvider(BaseVMProvider):
 23 |     """Windows Sandbox VM provider implementation using pywinsandbox.
 24 |     
 25 |     This provider uses Windows Sandbox to create isolated Windows environments.
 26 |     Storage is always ephemeral with Windows Sandbox.
 27 |     """
 28 |     
 29 |     def __init__(
 30 |         self, 
 31 |         port: int = 7777,
 32 |         host: str = "localhost",
 33 |         storage: Optional[str] = None,
 34 |         verbose: bool = False,
 35 |         ephemeral: bool = True,  # Windows Sandbox is always ephemeral
 36 |         memory_mb: int = 4096,
 37 |         networking: bool = True,
 38 |         **kwargs
 39 |     ):
 40 |         """Initialize the Windows Sandbox provider.
 41 |         
 42 |         Args:
 43 |             port: Port for the computer server (default: 7777)
 44 |             host: Host to use for connections (default: localhost)
 45 |             storage: Storage path (ignored - Windows Sandbox is always ephemeral)
 46 |             verbose: Enable verbose logging
 47 |             ephemeral: Always True for Windows Sandbox
 48 |             memory_mb: Memory allocation in MB (default: 4096)
 49 |             networking: Enable networking in sandbox (default: True)
 50 |         """
 51 |         if not HAS_WINSANDBOX:
 52 |             raise ImportError(
 53 |                 "pywinsandbox is required for WinSandboxProvider. "
 54 |                 "Please install it with 'pip install pywinsandbox'"
 55 |             )
 56 |             
 57 |         self.host = host
 58 |         self.port = port
 59 |         self.verbose = verbose
 60 |         self.memory_mb = memory_mb
 61 |         self.networking = networking
 62 |         
 63 |         # Windows Sandbox is always ephemeral
 64 |         if not ephemeral:
 65 |             logger.warning("Windows Sandbox storage is always ephemeral. Ignoring ephemeral=False.")
 66 |         self.ephemeral = True
 67 |         
 68 |         # Storage is always ephemeral for Windows Sandbox
 69 |         if storage and storage != "ephemeral":
 70 |             logger.warning("Windows Sandbox does not support persistent storage. Using ephemeral storage.")
 71 |         self.storage = "ephemeral"
 72 |         
 73 |         self.logger = logging.getLogger(__name__)
 74 |         
 75 |         # Track active sandboxes
 76 |         self._active_sandboxes: Dict[str, Any] = {}
 77 |         
 78 |     @property
 79 |     def provider_type(self) -> VMProviderType:
 80 |         """Get the provider type."""
 81 |         return VMProviderType.WINSANDBOX
 82 |         
 83 |     async def __aenter__(self):
 84 |         """Enter async context manager."""
 85 |         # Verify Windows Sandbox is available
 86 |         if not HAS_WINSANDBOX:
 87 |             raise ImportError("pywinsandbox is not available")
 88 |         
 89 |         return self
 90 |         
 91 |     async def __aexit__(self, exc_type, exc_val, exc_tb):
 92 |         """Exit async context manager."""
 93 |         # Clean up any active sandboxes
 94 |         for name, sandbox in self._active_sandboxes.items():
 95 |             try:
 96 |                 sandbox.shutdown()
 97 |                 self.logger.info(f"Terminated sandbox: {name}")
 98 |             except Exception as e:
 99 |                 self.logger.error(f"Error terminating sandbox {name}: {e}")
100 |         
101 |         self._active_sandboxes.clear()
102 |         
103 |     async def get_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
104 |         """Get VM information by name.
105 |         
106 |         Args:
107 |             name: Name of the VM to get information for
108 |             storage: Ignored for Windows Sandbox (always ephemeral)
109 |             
110 |         Returns:
111 |             Dictionary with VM information including status, IP address, etc.
112 |         """
113 |         if name not in self._active_sandboxes:
114 |             return {
115 |                 "name": name,
116 |                 "status": "stopped",
117 |                 "ip_address": None,
118 |                 "storage": "ephemeral"
119 |             }
120 |         
121 |         sandbox = self._active_sandboxes[name]
122 |         
123 |         # Check if sandbox is still running
124 |         try:
125 |             # Try to ping the sandbox to see if it's responsive
126 |             try:
127 |                 sandbox.rpyc.modules.os.getcwd()
128 |                 sandbox_responsive = True
129 |             except Exception:
130 |                 sandbox_responsive = False
131 |             
132 |             if not sandbox_responsive:
133 |                 return {
134 |                     "name": name,
135 |                     "status": "starting",
136 |                     "ip_address": None,
137 |                     "storage": "ephemeral",
138 |                     "memory_mb": self.memory_mb,
139 |                     "networking": self.networking
140 |                 }
141 |             
142 |             # Check for computer server address file
143 |             server_address_file = r"C:\Users\WDAGUtilityAccount\Desktop\shared_windows_sandbox_dir\server_address"
144 |             
145 |             try:
146 |                 # Check if the server address file exists
147 |                 file_exists = sandbox.rpyc.modules.os.path.exists(server_address_file)
148 |                 
149 |                 if file_exists:
150 |                     # Read the server address file
151 |                     with sandbox.rpyc.builtin.open(server_address_file, 'r') as f:
152 |                         server_address = f.read().strip()
153 |                     
154 |                     if server_address and ':' in server_address:
155 |                         # Parse IP:port from the file
156 |                         ip_address, port = server_address.split(':', 1)
157 |                         
158 |                         # Verify the server is actually responding
159 |                         try:
160 |                             import socket
161 |                             sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
162 |                             sock.settimeout(3)
163 |                             result = sock.connect_ex((ip_address, int(port)))
164 |                             sock.close()
165 |                             
166 |                             if result == 0:
167 |                                 # Server is responding
168 |                                 status = "running"
169 |                                 self.logger.debug(f"Computer server found at {ip_address}:{port}")
170 |                             else:
171 |                                 # Server file exists but not responding
172 |                                 status = "starting"
173 |                                 ip_address = None
174 |                         except Exception as e:
175 |                             self.logger.debug(f"Error checking server connectivity: {e}")
176 |                             status = "starting"
177 |                             ip_address = None
178 |                     else:
179 |                         # File exists but doesn't contain valid address
180 |                         status = "starting"
181 |                         ip_address = None
182 |                 else:
183 |                     # Server address file doesn't exist yet
184 |                     status = "starting"
185 |                     ip_address = None
186 |                     
187 |             except Exception as e:
188 |                 self.logger.debug(f"Error checking server address file: {e}")
189 |                 status = "starting"
190 |                 ip_address = None
191 |                 
192 |         except Exception as e:
193 |             self.logger.error(f"Error checking sandbox status: {e}")
194 |             status = "error"
195 |             ip_address = None
196 |         
197 |         return {
198 |             "name": name,
199 |             "status": status,
200 |             "ip_address": ip_address,
201 |             "storage": "ephemeral",
202 |             "memory_mb": self.memory_mb,
203 |             "networking": self.networking
204 |         }
205 |         
206 |     async def list_vms(self) -> List[Dict[str, Any]]:
207 |         """List all available VMs."""
208 |         vms = []
209 |         for name in self._active_sandboxes.keys():
210 |             vm_info = await self.get_vm(name)
211 |             vms.append(vm_info)
212 |         return vms
213 |         
214 |     async def run_vm(self, image: str, name: str, run_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]:
215 |         """Run a VM with the given options.
216 |         
217 |         Args:
218 |             image: Image name (ignored for Windows Sandbox - always uses host Windows)
219 |             name: Name of the VM to run
220 |             run_opts: Dictionary of run options (memory, cpu, etc.)
221 |             storage: Ignored for Windows Sandbox (always ephemeral)
222 |         
223 |         Returns:
224 |             Dictionary with VM run status and information
225 |         """
226 |         if name in self._active_sandboxes:
227 |             return {
228 |                 "success": False,
229 |                 "error": f"Sandbox {name} is already running"
230 |             }
231 |         
232 |         try:
233 |             # Extract options from run_opts
234 |             memory_mb = run_opts.get("memory_mb", self.memory_mb)
235 |             if isinstance(memory_mb, str):
236 |                 # Convert memory string like "4GB" to MB
237 |                 if memory_mb.upper().endswith("GB"):
238 |                     memory_mb = int(float(memory_mb[:-2]) * 1024)
239 |                 elif memory_mb.upper().endswith("MB"):
240 |                     memory_mb = int(memory_mb[:-2])
241 |                 else:
242 |                     memory_mb = self.memory_mb
243 |             
244 |             networking = run_opts.get("networking", self.networking)
245 |             
246 |             # Create folder mappers; always map a persistent venv directory on host for caching packages
247 |             folder_mappers = []
248 |             # Ensure host side persistent venv directory exists (Path.home()/wsb_venv)
249 |             host_wsb_env = Path.home() / ".cua" / "wsb_cache"
250 |             try:
251 |                 host_wsb_env.mkdir(parents=True, exist_ok=True)
252 |             except Exception:
253 |                 # If cannot create, continue without persistent mapping
254 |                 host_wsb_env = None
255 |             shared_directories = run_opts.get("shared_directories", [])
256 |             for shared_dir in shared_directories:
257 |                 if isinstance(shared_dir, dict):
258 |                     host_path = shared_dir.get("hostPath", "")
259 |                 elif isinstance(shared_dir, str):
260 |                     host_path = shared_dir
261 |                 else:
262 |                     continue
263 |                     
264 |                 if host_path and os.path.exists(host_path):
265 |                     folder_mappers.append(winsandbox.FolderMapper(host_path))
266 | 
267 |             # Add mapping for the persistent venv directory (read/write) so it appears in Sandbox Desktop
268 |             if host_wsb_env is not None and host_wsb_env.exists():
269 |                 try:
270 |                     folder_mappers.append(
271 |                         winsandbox.FolderMapper(str(host_wsb_env), read_only=False)
272 |                     )
273 |                 except Exception as e:
274 |                     self.logger.warning(f"Failed to map host winsandbox_venv: {e}")
275 |             
276 |             self.logger.info(f"Creating Windows Sandbox: {name}")
277 |             self.logger.info(f"Memory: {memory_mb}MB, Networking: {networking}")
278 |             if folder_mappers:
279 |                 self.logger.info(f"Shared directories: {len(folder_mappers)}")
280 |             
281 |             # Create the sandbox without logon script
282 |             try:
283 |                 # Try with memory_mb parameter (newer pywinsandbox version)
284 |                 sandbox = winsandbox.new_sandbox(
285 |                     memory_mb=str(memory_mb),
286 |                     networking=networking,
287 |                     folder_mappers=folder_mappers
288 |                 )
289 |             except TypeError as e:
290 |                 if "memory_mb" in str(e):
291 |                     # Fallback for older pywinsandbox version that doesn't support memory_mb
292 |                     self.logger.warning(
293 |                         f"Your pywinsandbox version doesn't support memory_mb parameter. "
294 |                         f"Using default memory settings. To use custom memory settings, "
295 |                         f"please update pywinsandbox: pip install -U git+https://github.com/karkason/pywinsandbox.git"
296 |                     )
297 |                     sandbox = winsandbox.new_sandbox(
298 |                         networking=networking,
299 |                         folder_mappers=folder_mappers
300 |                     )
301 |                 else:
302 |                     # Re-raise if it's a different TypeError
303 |                     raise
304 |             
305 |             # Store the sandbox
306 |             self._active_sandboxes[name] = sandbox
307 |             
308 |             self.logger.info(f"Windows Sandbox {name} created successfully")
309 |             
310 |             venv_exists = (host_wsb_env / "venv" / "Lib" / "site-packages" / "computer_server").exists() if host_wsb_env else False
311 | 
312 |             # Setup the computer server in the sandbox
313 |             await self._setup_computer_server(sandbox, name, wait_for_venv=(not venv_exists))
314 |             
315 |             return {
316 |                 "success": True,
317 |                 "name": name,
318 |                 "status": "starting",
319 |                 "memory_mb": memory_mb,
320 |                 "networking": networking,
321 |                 "storage": "ephemeral"
322 |             }
323 |             
324 |         except Exception as e:
325 |             self.logger.error(f"Failed to create Windows Sandbox {name}: {e}")
326 |             # stack trace
327 |             import traceback
328 |             self.logger.error(f"Stack trace: {traceback.format_exc()}")
329 |             return {
330 |                 "success": False,
331 |                 "error": f"Failed to create sandbox: {str(e)}"
332 |             }
333 |         
334 |     async def stop_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
335 |         """Stop a running VM.
336 |         
337 |         Args:
338 |             name: Name of the VM to stop
339 |             storage: Ignored for Windows Sandbox
340 |             
341 |         Returns:
342 |             Dictionary with stop status and information
343 |         """
344 |         if name not in self._active_sandboxes:
345 |             return {
346 |                 "success": False,
347 |                 "error": f"Sandbox {name} is not running"
348 |             }
349 |         
350 |         try:
351 |             sandbox = self._active_sandboxes[name]
352 |             
353 |             # Terminate the sandbox
354 |             sandbox.shutdown()
355 |             
356 |             # Remove from active sandboxes
357 |             del self._active_sandboxes[name]
358 |             
359 |             self.logger.info(f"Windows Sandbox {name} stopped successfully")
360 |             
361 |             return {
362 |                 "success": True,
363 |                 "name": name,
364 |                 "status": "stopped"
365 |             }
366 |             
367 |         except Exception as e:
368 |             self.logger.error(f"Failed to stop Windows Sandbox {name}: {e}")
369 |             return {
370 |                 "success": False,
371 |                 "error": f"Failed to stop sandbox: {str(e)}"
372 |             }
373 |         
374 |     async def update_vm(self, name: str, update_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]:
375 |         """Update VM configuration.
376 |         
377 |         Note: Windows Sandbox does not support runtime configuration updates.
378 |         The sandbox must be stopped and restarted with new configuration.
379 |         
380 |         Args:
381 |             name: Name of the VM to update
382 |             update_opts: Dictionary of update options
383 |             storage: Ignored for Windows Sandbox
384 |             
385 |         Returns:
386 |             Dictionary with update status and information
387 |         """
388 |         return {
389 |             "success": False,
390 |             "error": "Windows Sandbox does not support runtime configuration updates. "
391 |                     "Please stop and restart the sandbox with new configuration."
392 |         }
393 | 
394 |     async def restart_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
395 |         raise NotImplementedError("WinSandboxProvider does not support restarting VMs.")
396 |         
397 |     async def get_ip(self, name: str, storage: Optional[str] = None, retry_delay: int = 2) -> str:
398 |         """Get the IP address of a VM, waiting indefinitely until it's available.
399 |         
400 |         Args:
401 |             name: Name of the VM to get the IP for
402 |             storage: Ignored for Windows Sandbox
403 |             retry_delay: Delay between retries in seconds (default: 2)
404 |             
405 |         Returns:
406 |             IP address of the VM when it becomes available
407 |         """
408 |         total_attempts = 0
409 |         
410 |         # Loop indefinitely until we get a valid IP
411 |         while True:
412 |             total_attempts += 1
413 |             
414 |             # Log retry message but not on first attempt
415 |             if total_attempts > 1:
416 |                 self.logger.info(f"Waiting for Windows Sandbox {name} IP address (attempt {total_attempts})...")
417 |             
418 |             try:
419 |                 # Get VM information
420 |                 vm_info = await self.get_vm(name, storage=storage)
421 |                 
422 |                 # Check if we got a valid IP
423 |                 ip = vm_info.get("ip_address", None)
424 |                 if ip and ip != "unknown" and not ip.startswith("0.0.0.0"):
425 |                     self.logger.info(f"Got valid Windows Sandbox IP address: {ip}")
426 |                     return ip
427 |                     
428 |                 # Check the VM status
429 |                 status = vm_info.get("status", "unknown")
430 |                 
431 |                 # If VM is not running yet, log and wait
432 |                 if status != "running":
433 |                     self.logger.info(f"Windows Sandbox is not running yet (status: {status}). Waiting...")
434 |                 # If VM is running but no IP yet, wait and retry
435 |                 else:
436 |                     self.logger.info("Windows Sandbox is running but no valid IP address yet. Waiting...")
437 |                 
438 |             except Exception as e:
439 |                 self.logger.warning(f"Error getting Windows Sandbox {name} IP: {e}, continuing to wait...")
440 |                 
441 |             # Wait before next retry
442 |             await asyncio.sleep(retry_delay)
443 |             
444 |             # Add progress log every 10 attempts
445 |             if total_attempts % 10 == 0:
446 |                 self.logger.info(f"Still waiting for Windows Sandbox {name} IP after {total_attempts} attempts...")
447 |     
448 |     async def _setup_computer_server(self, sandbox, name: str, visible: bool = False, wait_for_venv: bool = True):
449 |         """Setup the computer server in the Windows Sandbox using RPyC.
450 |         
451 |         Args:
452 |             sandbox: The Windows Sandbox instance
453 |             name: Name of the sandbox
454 |             visible: Whether the opened process should be visible (default: False)
455 |         """
456 |         try:
457 |             self.logger.info(f"Setting up computer server in sandbox {name}...")
458 | 
459 |             # Read the PowerShell setup script
460 |             script_path = os.path.join(os.path.dirname(__file__), "setup_script.ps1")
461 |             with open(script_path, 'r', encoding='utf-8') as f:
462 |                 setup_script_content = f.read()
463 |             
464 |             # Write the setup script to the sandbox using RPyC
465 |             script_dest_path = r"C:\Users\WDAGUtilityAccount\setup_cua.ps1"
466 |             
467 |             self.logger.info(f"Writing setup script to {script_dest_path}")
468 |             with sandbox.rpyc.builtin.open(script_dest_path, 'w') as f:
469 |                 f.write(setup_script_content)
470 |             
471 |             # Execute the PowerShell script in the background
472 |             self.logger.info("Executing setup script in sandbox...")
473 |             
474 |             # Use subprocess to run PowerShell script
475 |             import subprocess
476 |             powershell_cmd = [
477 |                 "powershell.exe", 
478 |                 "-ExecutionPolicy", "Bypass",
479 |                 "-NoExit",  # Keep window open after script completes
480 |                 "-File", script_dest_path
481 |             ]
482 |             
483 |             # Set creation flags based on visibility preference
484 |             if visible:
485 |                 # CREATE_NEW_CONSOLE - creates a new console window (visible)
486 |                 creation_flags = 0x00000010
487 |             else:
488 |                 creation_flags = 0x08000000 # CREATE_NO_WINDOW
489 |             
490 |             # Start the process using RPyC
491 |             process = sandbox.rpyc.modules.subprocess.Popen(
492 |                 powershell_cmd,
493 |                 creationflags=creation_flags,
494 |                 shell=False
495 |             )
496 | 
497 |             if wait_for_venv:
498 |                 print("Waiting for venv to be created for the first time setup of Windows Sandbox...")
499 |                 print("This may take a minute...")
500 |                 await asyncio.sleep(120)
501 |             
502 |             ip = await self.get_ip(name)
503 |             self.logger.info(f"Sandbox IP: {ip}")
504 |             self.logger.info(f"Setup script started in background in sandbox {name} with PID: {process.pid}")
505 |             
506 |         except Exception as e:
507 |             self.logger.error(f"Failed to setup computer server in sandbox {name}: {e}")
508 |             import traceback
509 |             self.logger.error(f"Stack trace: {traceback.format_exc()}")
510 | 
```

--------------------------------------------------------------------------------
/libs/python/computer/computer/providers/lume/provider.py:
--------------------------------------------------------------------------------

```python
  1 | """Lume VM provider implementation using curl commands.
  2 | 
  3 | This provider uses direct curl commands to interact with the Lume API,
  4 | removing the dependency on the pylume Python package.
  5 | """
  6 | 
  7 | import os
  8 | import re
  9 | import asyncio
 10 | import json
 11 | import logging
 12 | import subprocess
 13 | import urllib.parse
 14 | from typing import Dict, Any, Optional, List, Tuple
 15 | 
 16 | from ..base import BaseVMProvider, VMProviderType
 17 | from ...logger import Logger, LogLevel
 18 | from ..lume_api import (
 19 |     lume_api_get,
 20 |     lume_api_run,
 21 |     lume_api_stop,
 22 |     lume_api_update,
 23 |     lume_api_pull,
 24 |     HAS_CURL,
 25 |     parse_memory
 26 | )
 27 | 
 28 | # Setup logging
 29 | logger = logging.getLogger(__name__)
 30 | 
 31 | class LumeProvider(BaseVMProvider):
 32 |     """Lume VM provider implementation using direct curl commands.
 33 |     
 34 |     This provider uses curl to interact with the Lume API server,
 35 |     removing the dependency on the pylume Python package.
 36 |     """
 37 |     
 38 |     def __init__(
 39 |         self, 
 40 |         port: int = 7777,
 41 |         host: str = "localhost",
 42 |         storage: Optional[str] = None,
 43 |         verbose: bool = False,
 44 |         ephemeral: bool = False,
 45 |     ):
 46 |         """Initialize the Lume provider.
 47 |         
 48 |         Args:
 49 |             port: Port for the Lume API server (default: 7777)
 50 |             host: Host to use for API connections (default: localhost)
 51 |             storage: Path to store VM data
 52 |             verbose: Enable verbose logging
 53 |         """
 54 |         if not HAS_CURL:
 55 |             raise ImportError(
 56 |                 "curl is required for LumeProvider. "
 57 |                 "Please ensure it is installed and in your PATH."
 58 |             )
 59 |             
 60 |         self.host = host
 61 |         self.port = port  # Default port for Lume API
 62 |         self.storage = storage
 63 |         self.verbose = verbose
 64 |         self.ephemeral = ephemeral  # If True, VMs will be deleted after stopping
 65 |         
 66 |         # Base API URL for Lume API calls
 67 |         self.api_base_url = f"http://{self.host}:{self.port}"
 68 |         
 69 |         self.logger = logging.getLogger(__name__)
 70 |         
 71 |     @property
 72 |     def provider_type(self) -> VMProviderType:
 73 |         """Get the provider type."""
 74 |         return VMProviderType.LUME
 75 |         
 76 |     async def __aenter__(self):
 77 |         """Enter async context manager."""
 78 |         # No initialization needed, just return self
 79 |         return self
 80 |         
 81 |     async def __aexit__(self, exc_type, exc_val, exc_tb):
 82 |         """Exit async context manager."""
 83 |         # No cleanup needed
 84 |         pass
 85 |             
 86 |     def _lume_api_get(self, vm_name: str = "", storage: Optional[str] = None, debug: bool = False) -> Dict[str, Any]:
 87 |         """Get VM information using shared lume_api function.
 88 |         
 89 |         Args:
 90 |             vm_name: Optional name of the VM to get info for.
 91 |                      If empty, lists all VMs.
 92 |             storage: Optional storage path override. If provided, this will be used instead of self.storage
 93 |             debug: Whether to show debug output
 94 |             
 95 |         Returns:
 96 |             Dictionary with VM status information parsed from JSON response
 97 |         """
 98 |         # Use the shared implementation from lume_api module
 99 |         return lume_api_get(
100 |             vm_name=vm_name,
101 |             host=self.host,
102 |             port=self.port,
103 |             storage=storage if storage is not None else self.storage,
104 |             debug=debug,
105 |             verbose=self.verbose
106 |         )
107 |     
108 |     def _lume_api_run(self, vm_name: str, run_opts: Dict[str, Any], debug: bool = False) -> Dict[str, Any]:
109 |         """Run a VM using shared lume_api function.
110 |         
111 |         Args:
112 |             vm_name: Name of the VM to run
113 |             run_opts: Dictionary of run options
114 |             debug: Whether to show debug output
115 |             
116 |         Returns:
117 |             Dictionary with API response or error information
118 |         """
119 |         # Use the shared implementation from lume_api module
120 |         return lume_api_run(
121 |             vm_name=vm_name, 
122 |             host=self.host,
123 |             port=self.port,
124 |             run_opts=run_opts,
125 |             storage=self.storage,
126 |             debug=debug,
127 |             verbose=self.verbose
128 |         )
129 |     
130 |     def _lume_api_stop(self, vm_name: str, debug: bool = False) -> Dict[str, Any]:
131 |         """Stop a VM using shared lume_api function.
132 |         
133 |         Args:
134 |             vm_name: Name of the VM to stop
135 |             debug: Whether to show debug output
136 |             
137 |         Returns:
138 |             Dictionary with API response or error information
139 |         """
140 |         # Use the shared implementation from lume_api module
141 |         return lume_api_stop(
142 |             vm_name=vm_name, 
143 |             host=self.host,
144 |             port=self.port,
145 |             storage=self.storage,
146 |             debug=debug,
147 |             verbose=self.verbose
148 |         )
149 |     
150 |     def _lume_api_update(self, vm_name: str, update_opts: Dict[str, Any], debug: bool = False) -> Dict[str, Any]:
151 |         """Update VM configuration using shared lume_api function.
152 |         
153 |         Args:
154 |             vm_name: Name of the VM to update
155 |             update_opts: Dictionary of update options
156 |             debug: Whether to show debug output
157 |             
158 |         Returns:
159 |             Dictionary with API response or error information
160 |         """
161 |         # Use the shared implementation from lume_api module
162 |         return lume_api_update(
163 |             vm_name=vm_name, 
164 |             host=self.host,
165 |             port=self.port,
166 |             update_opts=update_opts,
167 |             storage=self.storage,
168 |             debug=debug,
169 |             verbose=self.verbose
170 |         )
171 |     
172 |     async def get_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
173 |         """Get VM information by name.
174 |         
175 |         Args:
176 |             name: Name of the VM to get information for
177 |             storage: Optional storage path override. If provided, this will be used
178 |                     instead of the provider's default storage path.
179 |             
180 |         Returns:
181 |             Dictionary with VM information including status, IP address, etc.
182 |             
183 |         Note:
184 |             If storage is not provided, the provider's default storage path will be used.
185 |             The storage parameter allows overriding the storage location for this specific call.
186 |         """
187 |         if not HAS_CURL:
188 |             logger.error("curl is not available. Cannot get VM status.")
189 |             return {
190 |                 "name": name,
191 |                 "status": "unavailable",
192 |                 "error": "curl is not available"
193 |             }
194 |         
195 |         # First try to get detailed VM info from the API
196 |         try:
197 |             # Query the Lume API for VM status using the provider's storage_path
198 |             vm_info = self._lume_api_get(
199 |                 vm_name=name, 
200 |                 storage=storage if storage is not None else self.storage,
201 |                 debug=self.verbose
202 |             )
203 |             
204 |             # Check for API errors
205 |             if "error" in vm_info:
206 |                 logger.debug(f"API request error: {vm_info['error']}")
207 |                 # If we got an error from the API, report the VM as not ready yet
208 |                 return {
209 |                     "name": name,
210 |                     "status": "starting",  # VM is still starting - do not attempt to connect yet
211 |                     "api_status": "error",
212 |                     "error": vm_info["error"]
213 |                 }
214 |             
215 |             # Process the VM status information
216 |             vm_status = vm_info.get("status", "unknown")
217 |             
218 |             # Check if VM is stopped or not running - don't wait for IP in this case
219 |             if vm_status == "stopped":
220 |                 logger.info(f"VM {name} is in '{vm_status}' state - not waiting for IP address")
221 |                 # Return the status as-is without waiting for an IP
222 |                 result = {
223 |                     "name": name,
224 |                     "status": vm_status,
225 |                     **vm_info  # Include all original fields from the API response
226 |                 }
227 |                 return result
228 |             
229 |             # Handle field name differences between APIs
230 |             # Some APIs use camelCase, others use snake_case
231 |             if "vncUrl" in vm_info:
232 |                 vnc_url = vm_info["vncUrl"]
233 |             elif "vnc_url" in vm_info:
234 |                 vnc_url = vm_info["vnc_url"]
235 |             else:
236 |                 vnc_url = ""
237 |                 
238 |             if "ipAddress" in vm_info:
239 |                 ip_address = vm_info["ipAddress"]
240 |             elif "ip_address" in vm_info:
241 |                 ip_address = vm_info["ip_address"]
242 |             else:
243 |                 # If no IP address is provided and VM is supposed to be running,
244 |                 # report it as still starting
245 |                 ip_address = None
246 |                 logger.info(f"VM {name} is in '{vm_status}' state but no IP address found - reporting as still starting")
247 |                 
248 |             logger.info(f"VM {name} status: {vm_status}")
249 |             
250 |             # Return the complete status information
251 |             result = {
252 |                 "name": name,
253 |                 "status": vm_status if vm_status else "running",
254 |                 "ip_address": ip_address,
255 |                 "vnc_url": vnc_url,
256 |                 "api_status": "ok"
257 |             }
258 |             
259 |             # Include all original fields from the API response
260 |             if isinstance(vm_info, dict):
261 |                 for key, value in vm_info.items():
262 |                     if key not in result:  # Don't override our carefully processed fields
263 |                         result[key] = value
264 |                         
265 |             return result
266 |             
267 |         except Exception as e:
268 |             logger.error(f"Failed to get VM status: {e}")
269 |             # Return a fallback status that indicates the VM is not ready yet
270 |             return {
271 |                 "name": name,
272 |                 "status": "initializing",  # VM is still initializing
273 |                 "error": f"Failed to get VM status: {str(e)}"
274 |             }
275 |         
276 |     async def list_vms(self) -> List[Dict[str, Any]]:
277 |         """List all available VMs."""
278 |         result = self._lume_api_get(debug=self.verbose)
279 |         
280 |         # Extract the VMs list from the response
281 |         if "vms" in result and isinstance(result["vms"], list):
282 |             return result["vms"]
283 |         elif "error" in result:
284 |             logger.error(f"Error listing VMs: {result['error']}")
285 |             return []
286 |         else:
287 |             return []
288 |         
289 |     async def run_vm(self, image: str, name: str, run_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]:
290 |         """Run a VM with the given options.
291 |         
292 |         If the VM does not exist in the storage location, this will attempt to pull it
293 |         from the Lume registry first.
294 |         
295 |         Args:
296 |             image: Image name to use when pulling the VM if it doesn't exist
297 |             name: Name of the VM to run
298 |             run_opts: Dictionary of run options (memory, cpu, etc.)
299 |             storage: Optional storage path override. If provided, this will be used
300 |                     instead of the provider's default storage path.
301 |         
302 |         Returns:
303 |             Dictionary with VM run status and information
304 |         """
305 |         # First check if VM exists by trying to get its info
306 |         vm_info = await self.get_vm(name, storage=storage)
307 |         
308 |         if "error" in vm_info:
309 |             # VM doesn't exist, try to pull it
310 |             self.logger.info(f"VM {name} not found, attempting to pull image {image} from registry...")
311 |             
312 |             # Call pull_vm with the image parameter
313 |             pull_result = await self.pull_vm(
314 |                 name=name, 
315 |                 image=image, 
316 |                 storage=storage
317 |             )
318 |             
319 |             # Check if pull was successful
320 |             if "error" in pull_result:
321 |                 self.logger.error(f"Failed to pull VM image: {pull_result['error']}")
322 |                 return pull_result  # Return the error from pull
323 |                 
324 |             self.logger.info(f"Successfully pulled VM image {image} as {name}")
325 |         
326 |         # Now run the VM with the given options
327 |         self.logger.info(f"Running VM {name} with options: {run_opts}")
328 |         
329 |         from ..lume_api import lume_api_run
330 |         return lume_api_run(
331 |             vm_name=name,
332 |             host=self.host,
333 |             port=self.port,
334 |             run_opts=run_opts,
335 |             storage=storage if storage is not None else self.storage,
336 |             debug=self.verbose,
337 |             verbose=self.verbose
338 |         )
339 |         
340 |     async def stop_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
341 |         """Stop a running VM.
342 |         
343 |         If this provider was initialized with ephemeral=True, the VM will also
344 |         be deleted after it is stopped.
345 |         
346 |         Args:
347 |             name: Name of the VM to stop
348 |             storage: Optional storage path override
349 |             
350 |         Returns:
351 |             Dictionary with stop status and information
352 |         """
353 |         # Stop the VM first
354 |         stop_result = self._lume_api_stop(name, debug=self.verbose)
355 |         
356 |         # Log ephemeral status for debugging
357 |         self.logger.info(f"Ephemeral mode status: {self.ephemeral}")
358 |         
359 |         # If ephemeral mode is enabled, delete the VM after stopping
360 |         if self.ephemeral and (stop_result.get("success", False) or "error" not in stop_result):
361 |             self.logger.info(f"Ephemeral mode enabled - deleting VM {name} after stopping")
362 |             try:
363 |                 delete_result = await self.delete_vm(name, storage=storage)
364 |                 
365 |                 # Return combined result
366 |                 return {
367 |                     **stop_result,  # Include all stop result info
368 |                     "deleted": True,
369 |                     "delete_result": delete_result
370 |                 }
371 |             except Exception as e:
372 |                 self.logger.error(f"Failed to delete ephemeral VM {name}: {e}")
373 |                 # Include the error but still return stop result
374 |                 return {
375 |                     **stop_result,
376 |                     "deleted": False,
377 |                     "delete_error": str(e)
378 |                 }
379 |         
380 |         # Just return the stop result if not ephemeral
381 |         return stop_result
382 |         
383 |     async def pull_vm(
384 |         self,
385 |         name: str,
386 |         image: str,
387 |         storage: Optional[str] = None,
388 |         registry: str = "ghcr.io",
389 |         organization: str = "trycua",
390 |         pull_opts: Optional[Dict[str, Any]] = None,
391 |     ) -> Dict[str, Any]:
392 |         """Pull a VM image from the registry.
393 |         
394 |         Args:
395 |             name: Name for the VM after pulling
396 |             image: The image name to pull (e.g. 'macos-sequoia-cua:latest')
397 |             storage: Optional storage path to use
398 |             registry: Registry to pull from (default: ghcr.io)
399 |             organization: Organization in registry (default: trycua)
400 |             pull_opts: Additional options for pulling the VM (optional)
401 |             
402 |         Returns:
403 |             Dictionary with information about the pulled VM
404 |             
405 |         Raises:
406 |             RuntimeError: If pull operation fails or image is not provided
407 |         """
408 |         # Validate image parameter
409 |         if not image:
410 |             raise ValueError("Image parameter is required for pull_vm")
411 |             
412 |         self.logger.info(f"Pulling VM image '{image}' as '{name}'")
413 |         self.logger.info("You can check the pull progress using: lume logs -f")
414 |         
415 |         # Set default pull_opts if not provided
416 |         if pull_opts is None:
417 |             pull_opts = {}
418 |             
419 |         # Log information about the operation
420 |         self.logger.debug(f"Pull storage location: {storage or 'default'}")
421 |         
422 |         try:
423 |             # Call the lume_api_pull function from lume_api.py
424 |             from ..lume_api import lume_api_pull
425 |             
426 |             result = lume_api_pull(
427 |                 image=image,
428 |                 name=name,
429 |                 host=self.host,
430 |                 port=self.port,
431 |                 storage=storage if storage is not None else self.storage,
432 |                 registry=registry,
433 |                 organization=organization,
434 |                 debug=self.verbose,
435 |                 verbose=self.verbose
436 |             )
437 |             
438 |             # Check for errors in the result
439 |             if "error" in result:
440 |                 self.logger.error(f"Failed to pull VM image: {result['error']}")
441 |                 return result
442 |                 
443 |             self.logger.info(f"Successfully pulled VM image '{image}' as '{name}'")
444 |             return result
445 |         except Exception as e:
446 |             self.logger.error(f"Failed to pull VM image '{image}': {e}")
447 |             return {"error": f"Failed to pull VM: {str(e)}"}
448 |         
449 |     async def delete_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
450 |         """Delete a VM permanently.
451 |         
452 |         Args:
453 |             name: Name of the VM to delete
454 |             storage: Optional storage path override
455 |             
456 |         Returns:
457 |             Dictionary with delete status and information
458 |         """
459 |         self.logger.info(f"Deleting VM {name}...")
460 |         
461 |         try:
462 |             # Call the lume_api_delete function we created
463 |             from ..lume_api import lume_api_delete
464 |             
465 |             result = lume_api_delete(
466 |                 vm_name=name,
467 |                 host=self.host,
468 |                 port=self.port,
469 |                 storage=storage if storage is not None else self.storage,
470 |                 debug=self.verbose,
471 |                 verbose=self.verbose
472 |             )
473 |             
474 |             # Check for errors in the result
475 |             if "error" in result:
476 |                 self.logger.error(f"Failed to delete VM: {result['error']}")
477 |                 return result
478 |                 
479 |             self.logger.info(f"Successfully deleted VM '{name}'")
480 |             return result
481 |         except Exception as e:
482 |             self.logger.error(f"Failed to delete VM '{name}': {e}")
483 |             return {"error": f"Failed to delete VM: {str(e)}"}
484 |     
485 |     async def update_vm(self, name: str, update_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]:
486 |         """Update VM configuration."""
487 |         return self._lume_api_update(name, update_opts, debug=self.verbose)
488 |         
489 |     async def restart_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
490 |         raise NotImplementedError("LumeProvider does not support restarting VMs.")
491 |         
492 |     async def get_ip(self, name: str, storage: Optional[str] = None, retry_delay: int = 2) -> str:
493 |         """Get the IP address of a VM, waiting indefinitely until it's available.
494 |         
495 |         Args:
496 |             name: Name of the VM to get the IP for
497 |             storage: Optional storage path override
498 |             retry_delay: Delay between retries in seconds (default: 2)
499 |             
500 |         Returns:
501 |             IP address of the VM when it becomes available
502 |         """
503 |         # Track total attempts for logging purposes
504 |         total_attempts = 0
505 |         
506 |         # Loop indefinitely until we get a valid IP
507 |         while True:
508 |             total_attempts += 1
509 |             
510 |             # Log retry message but not on first attempt
511 |             if total_attempts > 1:
512 |                 self.logger.info(f"Waiting for VM {name} IP address (attempt {total_attempts})...")
513 |             
514 |             try:
515 |                 # Get VM information
516 |                 vm_info = await self.get_vm(name, storage=storage)
517 |                 
518 |                 # Check if we got a valid IP
519 |                 ip = vm_info.get("ip_address", None)
520 |                 if ip and ip != "unknown" and not ip.startswith("0.0.0.0"):
521 |                     self.logger.info(f"Got valid VM IP address: {ip}")
522 |                     return ip
523 |                     
524 |                 # Check the VM status
525 |                 status = vm_info.get("status", "unknown")
526 |                 
527 |                 # If VM is not running yet, log and wait
528 |                 if status != "running":
529 |                     self.logger.info(f"VM is not running yet (status: {status}). Waiting...")
530 |                 # If VM is running but no IP yet, wait and retry
531 |                 else:
532 |                     self.logger.info("VM is running but no valid IP address yet. Waiting...")
533 |                 
534 |             except Exception as e:
535 |                 self.logger.warning(f"Error getting VM {name} IP: {e}, continuing to wait...")
536 |                 
537 |             # Wait before next retry
538 |             await asyncio.sleep(retry_delay)
539 |             
540 |             # Add progress log every 10 attempts
541 |             if total_attempts % 10 == 0:
542 |                 self.logger.info(f"Still waiting for VM {name} IP after {total_attempts} attempts...")
543 |         
544 | 
545 | 
```

--------------------------------------------------------------------------------
/libs/python/computer/computer/providers/docker/provider.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Docker VM provider implementation.
  3 | 
  4 | This provider uses Docker containers running the CUA Ubuntu image to create
  5 | Linux VMs with computer-server. It handles VM lifecycle operations through Docker
  6 | commands and container management.
  7 | """
  8 | 
  9 | import logging
 10 | import json
 11 | import asyncio
 12 | from typing import Dict, List, Optional, Any
 13 | import subprocess
 14 | import time
 15 | import re
 16 | 
 17 | from ..base import BaseVMProvider, VMProviderType
 18 | 
 19 | # Setup logging
 20 | logger = logging.getLogger(__name__)
 21 | 
 22 | # Check if Docker is available
 23 | try:
 24 |     subprocess.run(["docker", "--version"], capture_output=True, check=True)
 25 |     HAS_DOCKER = True
 26 | except (subprocess.SubprocessError, FileNotFoundError):
 27 |     HAS_DOCKER = False
 28 | 
 29 | 
 30 | class DockerProvider(BaseVMProvider):
 31 |     """
 32 |     Docker VM Provider implementation using Docker containers.
 33 |     
 34 |     This provider uses Docker to run containers with the CUA Ubuntu image
 35 |     that includes computer-server for remote computer use.
 36 |     """
 37 |     
 38 |     def __init__(
 39 |         self,
 40 |         port: Optional[int] = 8000,
 41 |         host: str = "localhost",
 42 |         storage: Optional[str] = None,
 43 |         shared_path: Optional[str] = None,
 44 |         image: str = "trycua/cua-ubuntu:latest",
 45 |         verbose: bool = False,
 46 |         ephemeral: bool = False,
 47 |         vnc_port: Optional[int] = 6901,
 48 |     ):
 49 |         """Initialize the Docker VM Provider.
 50 | 
 51 |         Args:
 52 |             port: Currently unused (VM provider port)
 53 |             host: Hostname for the API server (default: localhost)
 54 |             storage: Path for persistent VM storage
 55 |             shared_path: Path for shared folder between host and container
 56 |             image: Docker image to use (default: "trycua/cua-ubuntu:latest")
 57 |                    Supported images:
 58 |                    - "trycua/cua-ubuntu:latest" (Kasm-based)
 59 |                    - "trycua/cua-docker-xfce:latest" (vanilla XFCE)
 60 |             verbose: Enable verbose logging
 61 |             ephemeral: Use ephemeral (temporary) storage
 62 |             vnc_port: Port for VNC interface (default: 6901)
 63 |         """
 64 |         self.host = host
 65 |         self.api_port = 8000
 66 |         self.vnc_port = vnc_port
 67 |         self.ephemeral = ephemeral
 68 | 
 69 |         # Handle ephemeral storage (temporary directory)
 70 |         if ephemeral:
 71 |             self.storage = "ephemeral"
 72 |         else:
 73 |             self.storage = storage
 74 | 
 75 |         self.shared_path = shared_path
 76 |         self.image = image
 77 |         self.verbose = verbose
 78 |         self._container_id = None
 79 |         self._running_containers = {}  # Track running containers by name
 80 | 
 81 |         # Detect image type and configure user directory accordingly
 82 |         self._detect_image_config()
 83 |         
 84 |     def _detect_image_config(self):
 85 |         """Detect image type and configure paths accordingly."""
 86 |         # Detect if this is a docker-xfce image or Kasm image
 87 |         if "docker-xfce" in self.image.lower() or "xfce" in self.image.lower():
 88 |             self._home_dir = "/home/cua"
 89 |             self._image_type = "docker-xfce"
 90 |             logger.info(f"Detected docker-xfce image: using {self._home_dir}")
 91 |         else:
 92 |             # Default to Kasm configuration
 93 |             self._home_dir = "/home/kasm-user"
 94 |             self._image_type = "kasm"
 95 |             logger.info(f"Detected Kasm image: using {self._home_dir}")
 96 | 
 97 |     @property
 98 |     def provider_type(self) -> VMProviderType:
 99 |         """Return the provider type."""
100 |         return VMProviderType.DOCKER
101 |     
102 |     def _parse_memory(self, memory_str: str) -> str:
103 |         """Parse memory string to Docker format.
104 |         
105 |         Examples:
106 |             "8GB" -> "8g"
107 |             "1024MB" -> "1024m"
108 |             "512" -> "512m"
109 |         """
110 |         if isinstance(memory_str, int):
111 |             return f"{memory_str}m"
112 |             
113 |         if isinstance(memory_str, str):
114 |             # Extract number and unit
115 |             match = re.match(r"(\d+)([A-Za-z]*)", memory_str)
116 |             if match:
117 |                 value, unit = match.groups()
118 |                 unit = unit.upper()
119 |                 
120 |                 if unit == "GB" or unit == "G":
121 |                     return f"{value}g"
122 |                 elif unit == "MB" or unit == "M" or unit == "":
123 |                     return f"{value}m"
124 |                     
125 |         # Default fallback
126 |         logger.warning(f"Could not parse memory string '{memory_str}', using 4g default")
127 |         return "4g"  # Default to 4GB
128 |     
129 |     async def get_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
130 |         """Get VM information by name.
131 |         
132 |         Args:
133 |             name: Name of the VM to get information for
134 |             storage: Optional storage path override. If provided, this will be used
135 |                     instead of the provider's default storage path.
136 |         
137 |         Returns:
138 |             Dictionary with VM information including status, IP address, etc.
139 |         """
140 |         try:
141 |             # Check if container exists and get its status
142 |             cmd = ["docker", "inspect", name]
143 |             result = subprocess.run(cmd, capture_output=True, text=True)
144 |             
145 |             if result.returncode != 0:
146 |                 # Container doesn't exist
147 |                 return {
148 |                     "name": name,
149 |                     "status": "not_found",
150 |                     "ip_address": None,
151 |                     "ports": {},
152 |                     "image": self.image,
153 |                     "provider": "docker"
154 |                 }
155 |             
156 |             # Parse container info
157 |             container_info = json.loads(result.stdout)[0]
158 |             state = container_info["State"]
159 |             network_settings = container_info["NetworkSettings"]
160 |             
161 |             # Determine status
162 |             if state["Running"]:
163 |                 status = "running"
164 |             elif state["Paused"]:
165 |                 status = "paused"
166 |             else:
167 |                 status = "stopped"
168 |             
169 |             # Get IP address
170 |             ip_address = network_settings.get("IPAddress", "")
171 |             if not ip_address and "Networks" in network_settings:
172 |                 # Try to get IP from bridge network
173 |                 for network_name, network_info in network_settings["Networks"].items():
174 |                     if network_info.get("IPAddress"):
175 |                         ip_address = network_info["IPAddress"]
176 |                         break
177 |             
178 |             # Get port mappings
179 |             ports = {}
180 |             if "Ports" in network_settings and network_settings["Ports"]:
181 |                 # network_settings["Ports"] is a dict like:
182 |                 # {'6901/tcp': [{'HostIp': '0.0.0.0', 'HostPort': '6901'}, ...], ...}
183 |                 for container_port, port_mappings in network_settings["Ports"].items():
184 |                     if port_mappings:  # Check if there are any port mappings
185 |                         # Take the first mapping (usually the IPv4 one)
186 |                         for mapping in port_mappings:
187 |                             if mapping.get("HostPort"):
188 |                                 ports[container_port] = mapping["HostPort"]
189 |                                 break  # Use the first valid mapping
190 |             
191 |             return {
192 |                 "name": name,
193 |                 "status": status,
194 |                 "ip_address": ip_address or "127.0.0.1",  # Use localhost if no IP
195 |                 "ports": ports,
196 |                 "image": container_info["Config"]["Image"],
197 |                 "provider": "docker",
198 |                 "container_id": container_info["Id"][:12],  # Short ID
199 |                 "created": container_info["Created"],
200 |                 "started": state.get("StartedAt", ""),
201 |             }
202 |             
203 |         except Exception as e:
204 |             logger.error(f"Error getting VM info for {name}: {e}")
205 |             import traceback
206 |             traceback.print_exc()
207 |             return {
208 |                 "name": name,
209 |                 "status": "error",
210 |                 "error": str(e),
211 |                 "provider": "docker"
212 |             }
213 |     
214 |     async def list_vms(self) -> List[Dict[str, Any]]:
215 |         """List all Docker containers managed by this provider."""
216 |         try:
217 |             # List all containers (running and stopped) with the CUA image
218 |             cmd = ["docker", "ps", "-a", "--filter", f"ancestor={self.image}", "--format", "json"]
219 |             result = subprocess.run(cmd, capture_output=True, text=True, check=True)
220 |             
221 |             containers = []
222 |             if result.stdout.strip():
223 |                 for line in result.stdout.strip().split('\n'):
224 |                     if line.strip():
225 |                         container_data = json.loads(line)
226 |                         vm_info = await self.get_vm(container_data["Names"])
227 |                         containers.append(vm_info)
228 |             
229 |             return containers
230 |             
231 |         except subprocess.CalledProcessError as e:
232 |             logger.error(f"Error listing containers: {e.stderr}")
233 |             return []
234 |         except Exception as e:
235 |             logger.error(f"Error listing VMs: {e}")
236 |             import traceback
237 |             traceback.print_exc()
238 |             return []
239 |     
240 |     async def run_vm(self, image: str, name: str, run_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]:
241 |         """Run a VM with the given options.
242 |         
243 |         Args:
244 |             image: Name/tag of the Docker image to use
245 |             name: Name of the container to run
246 |             run_opts: Options for running the VM, including:
247 |                 - memory: Memory limit (e.g., "4GB", "2048MB")
248 |                 - cpu: CPU limit (e.g., 2 for 2 cores)
249 |                 - vnc_port: Specific port for VNC interface
250 |                 - api_port: Specific port for computer-server API
251 |         
252 |         Returns:
253 |             Dictionary with VM status information
254 |         """
255 |         try:
256 |             # Check if container already exists
257 |             existing_vm = await self.get_vm(name, storage)
258 |             if existing_vm["status"] == "running":
259 |                 logger.info(f"Container {name} is already running")
260 |                 return existing_vm
261 |             elif existing_vm["status"] in ["stopped", "paused"]:
262 |                 # Start existing container
263 |                 logger.info(f"Starting existing container {name}")
264 |                 start_cmd = ["docker", "start", name]
265 |                 result = subprocess.run(start_cmd, capture_output=True, text=True, check=True)
266 |                 
267 |                 # Wait for container to be ready
268 |                 await self._wait_for_container_ready(name)
269 |                 return await self.get_vm(name, storage)
270 |             
271 |             # Use provided image or default
272 |             docker_image = image if image != "default" else self.image
273 |             
274 |             # Build docker run command
275 |             cmd = ["docker", "run", "-d", "--name", name]
276 |             
277 |             # Add memory limit if specified
278 |             if "memory" in run_opts:
279 |                 memory_limit = self._parse_memory(run_opts["memory"])
280 |                 cmd.extend(["--memory", memory_limit])
281 |             
282 |             # Add CPU limit if specified
283 |             if "cpu" in run_opts:
284 |                 cpu_count = str(run_opts["cpu"])
285 |                 cmd.extend(["--cpus", cpu_count])
286 |             
287 |             # Add port mappings
288 |             vnc_port = run_opts.get("vnc_port", self.vnc_port)
289 |             api_port = run_opts.get("api_port", self.api_port)
290 |             
291 |             if vnc_port:
292 |                 cmd.extend(["-p", f"{vnc_port}:6901"])  # VNC port
293 |             if api_port:
294 |                 cmd.extend(["-p", f"{api_port}:8000"])  # computer-server API port
295 |             
296 |             # Add volume mounts if storage is specified
297 |             storage_path = storage or self.storage
298 |             if storage_path and storage_path != "ephemeral":
299 |                 # Mount storage directory using detected home directory
300 |                 cmd.extend(["-v", f"{storage_path}:{self._home_dir}/storage"])
301 | 
302 |             # Add shared path if specified
303 |             if self.shared_path:
304 |                 # Mount shared directory using detected home directory
305 |                 cmd.extend(["-v", f"{self.shared_path}:{self._home_dir}/shared"])
306 |             
307 |             # Add environment variables
308 |             cmd.extend(["-e", "VNC_PW=password"])  # Set VNC password
309 |             cmd.extend(["-e", "VNCOPTIONS=-disableBasicAuth"])  # Disable VNC basic auth
310 |             
311 |             # Add the image
312 |             cmd.append(docker_image)
313 |             
314 |             logger.info(f"Running Docker container with command: {' '.join(cmd)}")
315 |             
316 |             # Run the container
317 |             result = subprocess.run(cmd, capture_output=True, text=True, check=True)
318 |             container_id = result.stdout.strip()
319 |             
320 |             logger.info(f"Container {name} started with ID: {container_id[:12]}")
321 |             
322 |             # Store container info
323 |             self._container_id = container_id
324 |             self._running_containers[name] = container_id
325 |             
326 |             # Wait for container to be ready
327 |             await self._wait_for_container_ready(name)
328 |             
329 |             # Return VM info
330 |             vm_info = await self.get_vm(name, storage)
331 |             vm_info["container_id"] = container_id[:12]
332 |             
333 |             return vm_info
334 |             
335 |         except subprocess.CalledProcessError as e:
336 |             error_msg = f"Failed to run container {name}: {e.stderr}"
337 |             logger.error(error_msg)
338 |             return {
339 |                 "name": name,
340 |                 "status": "error",
341 |                 "error": error_msg,
342 |                 "provider": "docker"
343 |             }
344 |         except Exception as e:
345 |             error_msg = f"Error running VM {name}: {e}"
346 |             logger.error(error_msg)
347 |             return {
348 |                 "name": name,
349 |                 "status": "error",
350 |                 "error": error_msg,
351 |                 "provider": "docker"
352 |             }
353 |     
354 |     async def _wait_for_container_ready(self, container_name: str, timeout: int = 60) -> bool:
355 |         """Wait for the Docker container to be fully ready.
356 |         
357 |         Args:
358 |             container_name: Name of the Docker container to check
359 |             timeout: Maximum time to wait in seconds (default: 60 seconds)
360 |             
361 |         Returns:
362 |             True if the container is running and ready
363 |         """
364 |         logger.info(f"Waiting for container {container_name} to be ready...")
365 |         
366 |         start_time = time.time()
367 |         while time.time() - start_time < timeout:
368 |             try:
369 |                 # Check if container is running
370 |                 vm_info = await self.get_vm(container_name)
371 |                 if vm_info["status"] == "running":
372 |                     logger.info(f"Container {container_name} is running")
373 |                     
374 |                     # Additional check: try to connect to computer-server API
375 |                     # This is optional - we'll just wait a bit more for services to start
376 |                     await asyncio.sleep(5)
377 |                     return True
378 |                     
379 |             except Exception as e:
380 |                 logger.debug(f"Container {container_name} not ready yet: {e}")
381 |             
382 |             await asyncio.sleep(2)
383 |         
384 |         logger.warning(f"Container {container_name} did not become ready within {timeout} seconds")
385 |         return False
386 |     
387 |     async def stop_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
388 |         """Stop a running VM by stopping the Docker container."""
389 |         try:
390 |             logger.info(f"Stopping container {name}")
391 |             
392 |             # Stop the container
393 |             cmd = ["docker", "stop", name]
394 |             result = subprocess.run(cmd, capture_output=True, text=True, check=True)
395 |             
396 |             # Remove from running containers tracking
397 |             if name in self._running_containers:
398 |                 del self._running_containers[name]
399 |             
400 |             logger.info(f"Container {name} stopped successfully")
401 |             
402 |             return {
403 |                 "name": name,
404 |                 "status": "stopped",
405 |                 "message": "Container stopped successfully",
406 |                 "provider": "docker"
407 |             }
408 |             
409 |         except subprocess.CalledProcessError as e:
410 |             error_msg = f"Failed to stop container {name}: {e.stderr}"
411 |             logger.error(error_msg)
412 |             return {
413 |                 "name": name,
414 |                 "status": "error",
415 |                 "error": error_msg,
416 |                 "provider": "docker"
417 |             }
418 |         except Exception as e:
419 |             error_msg = f"Error stopping VM {name}: {e}"
420 |             logger.error(error_msg)
421 |             return {
422 |                 "name": name,
423 |                 "status": "error",
424 |                 "error": error_msg,
425 |                 "provider": "docker"
426 |             }
427 |     
428 |     async def restart_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
429 |         raise NotImplementedError("DockerProvider does not support restarting VMs.")
430 | 
431 |     async def update_vm(self, name: str, update_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]:
432 |         """Update VM configuration.
433 |         
434 |         Note: Docker containers cannot be updated while running. 
435 |         This method will return an error suggesting to recreate the container.
436 |         """
437 |         return {
438 |             "name": name,
439 |             "status": "error",
440 |             "error": "Docker containers cannot be updated while running. Please stop and recreate the container with new options.",
441 |             "provider": "docker"
442 |         }
443 |     
444 |     async def get_ip(self, name: str, storage: Optional[str] = None, retry_delay: int = 2) -> str:
445 |         """Get the IP address of a VM, waiting indefinitely until it's available.
446 |         
447 |         Args:
448 |             name: Name of the VM to get the IP for
449 |             storage: Optional storage path override
450 |             retry_delay: Delay between retries in seconds (default: 2)
451 |             
452 |         Returns:
453 |             IP address of the VM when it becomes available
454 |         """
455 |         logger.info(f"Getting IP address for container {name}")
456 |         
457 |         total_attempts = 0
458 |         while True:
459 |             total_attempts += 1
460 |             
461 |             try:
462 |                 vm_info = await self.get_vm(name, storage)
463 |                 
464 |                 if vm_info["status"] == "error":
465 |                     raise Exception(f"VM is in error state: {vm_info.get('error', 'Unknown error')}")
466 |                 
467 |                 # TODO: for now, return localhost
468 |                 # it seems the docker container is not accessible from the host
469 |                 # on WSL2, unless you port forward? not sure
470 |                 if True:
471 |                     logger.warning("Overriding container IP with localhost")
472 |                     return "localhost"
473 | 
474 |                 # Check if we got a valid IP
475 |                 ip = vm_info.get("ip_address", None)
476 |                 if ip and ip != "unknown" and not ip.startswith("0.0.0.0"):
477 |                     logger.info(f"Got valid container IP address: {ip}")
478 |                     return ip
479 |                     
480 |                 # For Docker containers, we can also use localhost if ports are mapped
481 |                 if vm_info["status"] == "running" and vm_info.get("ports"):
482 |                     logger.info(f"Container is running with port mappings, using localhost")
483 |                     return "127.0.0.1"
484 |                 
485 |                 # Check the container status
486 |                 status = vm_info.get("status", "unknown")
487 |                 
488 |                 if status == "stopped":
489 |                     logger.info(f"Container status is {status}, but still waiting for it to start")
490 |                 elif status != "running":
491 |                     logger.info(f"Container is not running yet (status: {status}). Waiting...")
492 |                 else:
493 |                     logger.info("Container is running but no valid IP address yet. Waiting...")
494 |                 
495 |             except Exception as e:
496 |                 logger.warning(f"Error getting container {name} IP: {e}, continuing to wait...")
497 |                 
498 |             # Wait before next retry
499 |             await asyncio.sleep(retry_delay)
500 |             
501 |             # Add progress log every 10 attempts
502 |             if total_attempts % 10 == 0:
503 |                 logger.info(f"Still waiting for container {name} IP after {total_attempts} attempts...")
504 |     
505 |     async def __aenter__(self):
506 |         """Async context manager entry."""
507 |         logger.debug("Entering DockerProvider context")
508 |         return self
509 |         
510 |     async def __aexit__(self, exc_type, exc_val, exc_tb):
511 |         """Async context manager exit.
512 |         
513 |         This method handles cleanup of running containers if needed.
514 |         """
515 |         logger.debug(f"Exiting DockerProvider context, handling exceptions: {exc_type}")
516 |         try:
517 |             # Optionally stop running containers on context exit
518 |             # For now, we'll leave containers running as they might be needed
519 |             # Users can manually stop them if needed
520 |             pass
521 |         except Exception as e:
522 |             logger.error(f"Error during DockerProvider cleanup: {e}")
523 |             if exc_type is None:
524 |                 raise
525 |         return False
526 | 
```

--------------------------------------------------------------------------------
/blog/build-your-own-operator-on-macos-1.md:
--------------------------------------------------------------------------------

```markdown
  1 | # Build Your Own Operator on macOS - Part 1
  2 | 
  3 | *Published on March 31, 2025 by Francesco Bonacci*
  4 | 
  5 | In this first blogpost, we'll learn how to build our own Computer-Use Operator using OpenAI's `computer-use-preview` model. But first, let's understand what some common terms mean:
  6 | 
  7 | - A **Virtual Machine (VM)** is like a computer within your computer - a safe, isolated environment where the AI can work without affecting your main system.
  8 | - **computer-use-preview** is OpenAI's specialized language model trained to understand and interact with computer interfaces through screenshots.
  9 | - A **Computer-Use Agent** is an AI agent that can control a computer just like a human would - clicking buttons, typing text, and interacting with applications.
 10 | 
 11 | Our Operator will run in an isolated macOS VM, by making use of our [cua-computer](https://github.com/trycua/cua/tree/main/libs/computer) package and [lume virtualization CLI](https://github.com/trycua/cua/tree/main/libs/lume).
 12 | 
 13 | Check out what it looks like to use your own Operator from a Gradio app:
 14 | 
 15 | <div align="center">
 16 |   <video src="https://github.com/user-attachments/assets/a2cf69ad-2ab2-4eb9-8e1a-45606dd7eec6" width="600" controls></video>
 17 | </div>
 18 | 
 19 | ## What You'll Learn
 20 | 
 21 | By the end of this tutorial, you'll be able to:
 22 | - Set up a macOS virtual machine for AI automation
 23 | - Connect OpenAI's computer-use model to your VM
 24 | - Create a basic loop for the AI to interact with your VM
 25 | - Handle different types of computer actions (clicking, typing, etc.)
 26 | - Implement safety checks and error handling
 27 | 
 28 | **Prerequisites:**
 29 | - macOS Sonoma (14.0) or later
 30 | - 8GB RAM minimum (16GB recommended)
 31 | - OpenAI API access (Tier 3+)
 32 | - Basic Python knowledge
 33 | - Familiarity with terminal commands
 34 | 
 35 | **Estimated Time:** 45-60 minutes
 36 | 
 37 | ## Introduction to Computer-Use Agents
 38 | 
 39 | Last March OpenAI released a fine-tuned version of GPT-4o, namely [CUA](https://openai.com/index/computer-using-agent/), introducing pixel-level vision capabilities with advanced reasoning through reinforcement learning. This fine-tuning enables the computer-use model to interpret screenshots and interact with graphical user interfaces on a pixel-level such as buttons, menus, and text fields - mimicking human interactions on a computer screen. It scores a remarkable 38.1% success rate on [OSWorld](https://os-world.github.io) - a benchmark for Computer-Use agents on Linux and Windows. This is the 2nd available model after Anthropic's [Claude 3.5 Sonnet](https://www.anthropic.com/news/3-5-models-and-computer-use) to support computer-use capabilities natively with no external models (e.g. accessory [SoM (Set-of-Mark)](https://arxiv.org/abs/2310.11441) and OCR runs).
 40 | 
 41 | Professor Ethan Mollick provides an excellent explanation of computer-use agents in this article: [When you give a Claude a mouse](https://www.oneusefulthing.org/p/when-you-give-a-claude-a-mouse).
 42 | 
 43 | ### ChatGPT Operator
 44 | OpenAI's computer-use model powers [ChatGPT Operator](https://openai.com/index/introducing-operator), a Chromium-based interface exclusively available to ChatGPT Pro subscribers. Users leverage this functionality to automate web-based tasks such as online shopping, expense report submission, and booking reservations by interacting with websites in a human-like manner.
 45 | 
 46 | ## Benefits of Custom Operators
 47 | 
 48 | ### Why Build Your Own?
 49 | While OpenAI's Operator uses a controlled Chromium VM instance, there are scenarios where you may want to use your own VM with full desktop capabilities. Here are some examples:
 50 | 
 51 | - Automating native macOS apps like Finder, Xcode
 52 | - Managing files, changing settings, and running terminal commands 
 53 | - Testing desktop software and applications
 54 | - Creating workflows that combine web and desktop tasks
 55 | - Automating media editing in apps like Final Cut Pro and Blender
 56 | 
 57 | This gives you more control and flexibility to automate tasks beyond just web browsing, with full access to interact with native applications and system-level operations. Additionally, running your own VM locally provides better privacy for sensitive user files and delivers superior performance by leveraging your own hardware instead of renting expensive Cloud VMs.
 58 | 
 59 | ## Access Requirements
 60 | 
 61 | ### Model Availability
 62 | As we speak, the **computer-use-preview** model has limited availability:
 63 | - Only accessible to OpenAI tier 3+ users
 64 | - Additional application process may be required even for eligible users
 65 | - Cannot be used in the OpenAI Playground
 66 | - Outside of ChatGPT Operator, usage is restricted to the new **Responses API**
 67 | 
 68 | ## Understanding the OpenAI API
 69 | 
 70 | ### Responses API Overview
 71 | Let's start with the basics. In our case, we'll use OpenAI's Responses API to communicate with their computer-use model.
 72 | 
 73 | Think of it like this:
 74 | 1. We send the model a screenshot of our VM and tell it what we want it to do
 75 | 2. The model looks at the screenshot and decides what actions to take
 76 | 3. It sends back instructions (like "click here" or "type this")
 77 | 4. We execute those instructions in our VM
 78 | 
 79 | The [Responses API](https://platform.openai.com/docs/guides/responses) is OpenAI's newest way to interact with their AI models. It comes with several built-in tools:
 80 | - **Web search**: Let the AI search the internet
 81 | - **File search**: Help the AI find documents
 82 | - **Computer use**: Allow the AI to control a computer (what we'll be using)
 83 | 
 84 | As we speak, the computer-use model is only available through the Responses API.
 85 | 
 86 | ### Responses API Examples
 87 | Let's look at some simple examples. We'll start with the traditional way of using OpenAI's API with Chat Completions, then show the new Responses API primitive.
 88 | 
 89 | Chat Completions:
 90 | ```python
 91 | # The old way required managing conversation history manually
 92 | messages = [{"role": "user", "content": "Hello"}]
 93 | response = client.chat.completions.create(
 94 |     model="gpt-4",
 95 |     messages=messages  # We had to track all messages ourselves
 96 | )
 97 | messages.append(response.choices[0].message)  # Manual message tracking
 98 | ```
 99 | 
100 | Responses API:
101 | ```python
102 | # Example 1: Simple web search
103 | # The API handles all the complexity for us
104 | response = client.responses.create(
105 |     model="gpt-4",
106 |     input=[{
107 |         "role": "user", 
108 |         "content": "What's the latest news about AI?"
109 |     }],
110 |     tools=[{
111 |         "type": "web_search",  # Tell the API to use web search
112 |         "search_query": "latest AI news"
113 |     }]
114 | )
115 | 
116 | # Example 2: File search
117 | # Looking for specific documents becomes easy
118 | response = client.responses.create(
119 |     model="gpt-4",
120 |     input=[{
121 |         "role": "user", 
122 |         "content": "Find documents about project X"
123 |     }],
124 |     tools=[{
125 |         "type": "file_search",
126 |         "query": "project X",
127 |         "file_types": ["pdf", "docx"]  # Specify which file types to look for
128 |     }]
129 | )
130 | ```
131 | 
132 | ### Computer-Use Model Setup
133 | For our operator, we'll use the computer-use model. Here's how we set it up:
134 | 
135 | ```python
136 | # Set up the computer-use model to control our VM
137 | response = client.responses.create(
138 |     model="computer-use-preview",  # Special model for computer control
139 |     tools=[{
140 |         "type": "computer_use_preview",
141 |         "display_width": 1024,     # Size of our VM screen
142 |         "display_height": 768,
143 |         "environment": "mac"       # Tell it we're using macOS.
144 |     }],
145 |     input=[
146 |         {
147 |             "role": "user", 
148 |             "content": [
149 |                 # What we want the AI to do
150 |                 {"type": "input_text", "text": "Open Safari and go to google.com"},
151 |                 # Current screenshot of our VM
152 |                 {"type": "input_image", "image_url": f"data:image/png;base64,{screenshot_base64}"}
153 |             ]
154 |         }
155 |     ],
156 |     truncation="auto"  # Let OpenAI handle message length
157 | )
158 | ```
159 | 
160 | ### Understanding the Response
161 | When we send a request, the API sends back a response that looks like this:
162 | 
163 | ```json
164 | "output": [
165 |     {
166 |         "type": "reasoning",           # The AI explains what it's thinking
167 |         "id": "rs_67cc...",
168 |         "summary": [
169 |             {
170 |                 "type": "summary_text",
171 |                 "text": "Clicking on the browser address bar."
172 |             }
173 |         ]
174 |     },
175 |     {
176 |         "type": "computer_call",       # The actual action to perform
177 |         "id": "cu_67cc...",
178 |         "call_id": "call_zw3...",
179 |         "action": {
180 |             "type": "click",           # What kind of action (click, type, etc.)
181 |             "button": "left",          # Which mouse button to use
182 |             "x": 156,                  # Where to click (coordinates)
183 |             "y": 50
184 |         },
185 |         "pending_safety_checks": [],   # Any safety warnings to consider
186 |         "status": "completed"          # Whether the action was successful
187 |     }
188 | ]
189 | ```
190 | 
191 | Each response contains:
192 | 1. **Reasoning**: The AI's explanation of what it's doing
193 | 2. **Action**: The specific computer action to perform
194 | 3. **Safety Checks**: Any potential risks to review
195 | 4. **Status**: Whether everything worked as planned
196 | 
197 | ## CUA-Computer Interface
198 | 
199 | ### Architecture Overview
200 | Let's break down the main components of our system and how they work together:
201 | 
202 | 1. **The Virtual Machine (VM)**
203 |    - Think of this as a safe playground for our AI
204 |    - It's a complete macOS system running inside your computer
205 |    - Anything the AI does stays inside this VM, keeping your main system safe
206 |    - We use `lume` to create and manage this VM
207 | 
208 | 2. **The Computer Interface (CUI)**
209 |    - This is how we control the VM
210 |    - It can move the mouse, type text, and take screenshots
211 |    - Works like a remote control for the VM
212 |    - Built using our `cua-computer` package
213 | 
214 | 3. **The OpenAI Model**
215 |    - This is the brain of our operator
216 |    - It looks at screenshots of the VM
217 |    - Decides what actions to take
218 |    - Sends back instructions like "click here" or "type this"
219 | 
220 | Here's how they all work together:
221 | 
222 | ```mermaid
223 | sequenceDiagram
224 |     participant User as You
225 |     participant CUI as Computer Interface
226 |     participant VM as Virtual Machine
227 |     participant AI as OpenAI API
228 | 
229 |     Note over User,AI: The Main Loop
230 |     User->>CUI: Start the operator
231 |     CUI->>VM: Create macOS sandbox
232 |     activate VM
233 |     VM-->>CUI: VM is ready
234 | 
235 |     loop Action Loop
236 |         Note over CUI,AI: Each iteration
237 |         CUI->>VM: Take a screenshot
238 |         VM-->>CUI: Return current screen
239 |         CUI->>AI: Send screenshot + instructions
240 |         AI-->>CUI: Return next action
241 |         
242 |         Note over CUI,VM: Execute the action
243 |         alt Mouse Click
244 |             CUI->>VM: Move and click mouse
245 |         else Type Text
246 |             CUI->>VM: Type characters
247 |         else Scroll Screen
248 |             CUI->>VM: Scroll window
249 |         else Press Keys
250 |             CUI->>VM: Press keyboard keys
251 |         else Wait
252 |             CUI->>VM: Pause for a moment
253 |         end
254 |     end
255 | 
256 |     VM-->>CUI: Task finished
257 |     deactivate VM
258 |     CUI-->>User: All done!
259 | ```
260 | 
261 | The diagram above shows how information flows through our system:
262 | 1. You start the operator
263 | 2. The Computer Interface creates a virtual macOS
264 | 3. Then it enters a loop:
265 |    - Take a picture of the VM screen
266 |    - Send it to OpenAI with instructions
267 |    - Get back an action to perform
268 |    - Execute that action in the VM
269 |    - Repeat until the task is done
270 | 
271 | This design keeps everything organized and safe. The AI can only interact with the VM through our controlled interface, and the VM keeps the AI's actions isolated from your main system.
272 | 
273 | ---
274 | 
275 | ## Implementation Guide
276 | 
277 | ### Prerequisites
278 | 
279 | 1. **Lume CLI Setup**
280 |    For installing the standalone lume binary, run the following command from a terminal, or download the [latest pkg](https://github.com/trycua/cua/releases/latest/download/lume.pkg.tar.gz).
281 | 
282 |    ```bash
283 |    sudo /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)"
284 |    ```
285 | 
286 |    **Important Storage Notes:**
287 |    - Initial download requires 80GB of free space
288 |    - After first run, space usage reduces to ~30GB due to macOS's sparse file system
289 |    - VMs are stored in `~/.lume`
290 |    - Cached images are stored in `~/.lume/cache`
291 | 
292 |    You can check your downloaded VM images anytime:
293 |    ```bash
294 |    lume ls
295 |    ```
296 | 
297 |    Example output:
298 | 
299 |    | name                     | os      | cpu   | memory  | disk           | display   | status    | ip             | vnc                                               |
300 |    |--------------------------|---------|-------|---------|----------------|-----------|-----------|----------------|---------------------------------------------------|
301 |    | macos-sequoia-cua:latest | macOS   | 12    | 16.00G  | 64.5GB/80.0GB  | 1024x768  | running   | 192.168.64.78  | vnc://:[email protected]:56085    |
302 | 
303 |    After checking your available images, you can run the VM to ensure everything is working correctly:
304 |    ```bash
305 |    lume run macos-sequoia-cua:latest
306 |    ```
307 | 
308 | 2. **Python Environment Setup**
309 |    **Note**: The `cua-computer` package requires Python 3.10 or later. We recommend creating a dedicated Python environment:
310 | 
311 |    **Using venv:**
312 |    ```bash
313 |    python -m venv cua-env
314 |    source cua-env/bin/activate
315 |    ```
316 | 
317 |    **Using conda:**
318 |    ```bash
319 |    conda create -n cua-env python=3.10
320 |    conda activate cua-env
321 |    ```
322 | 
323 |    Then install the required packages:
324 | 
325 |    ```bash
326 |    pip install openai
327 |    pip install cua-computer
328 |    ```
329 | 
330 |    Ensure you have an OpenAI API key (set as an environment variable or in your OpenAI configuration).
331 | 
332 | ### Building the Operator
333 | 
334 | #### Importing Required Modules
335 | With the prerequisites installed and configured, we're ready to build our first operator.
336 | The following example uses asynchronous Python (async/await). You can run it either in a VS Code Notebook or as a standalone Python script.
337 | 
338 | ```python
339 | import asyncio
340 | import base64
341 | import openai
342 | 
343 | from computer import Computer
344 | ```
345 | 
346 | #### Mapping API Actions to CUA Methods
347 | The following helper function converts a `computer_call` action from the OpenAI Responses API into corresponding commands on the CUI interface. For example, if the API instructs a `click` action, we move the cursor and perform a left click on the lume VM Sandbox. We will use the computer interface to execute the actions.
348 | 
349 | ```python
350 | async def execute_action(computer, action):
351 |     action_type = action.type
352 |     
353 |     if action_type == "click":
354 |         x = action.x
355 |         y = action.y
356 |         button = action.button
357 |         print(f"Executing click at ({x}, {y}) with button '{button}'")
358 |         await computer.interface.move_cursor(x, y)
359 |         if button == "right":
360 |             await computer.interface.right_click()
361 |         else:
362 |             await computer.interface.left_click()
363 |     
364 |     elif action_type == "type":
365 |         text = action.text
366 |         print(f"Typing text: {text}")
367 |         await computer.interface.type_text(text)
368 |     
369 |     elif action_type == "scroll":
370 |         x = action.x
371 |         y = action.y
372 |         scroll_x = action.scroll_x
373 |         scroll_y = action.scroll_y
374 |         print(f"Scrolling at ({x}, {y}) with offsets (scroll_x={scroll_x}, scroll_y={scroll_y})")
375 |         await computer.interface.move_cursor(x, y)
376 |         await computer.interface.scroll(scroll_y)  # Using vertical scroll only
377 |     
378 |     elif action_type == "keypress":
379 |         keys = action.keys
380 |         for key in keys:
381 |             print(f"Pressing key: {key}")
382 |             # Map common key names to CUA equivalents
383 |             if key.lower() == "enter":
384 |                 await computer.interface.press_key("return")
385 |             elif key.lower() == "space":
386 |                 await computer.interface.press_key("space")
387 |             else:
388 |                 await computer.interface.press_key(key)
389 |     
390 |     elif action_type == "wait":
391 |         wait_time = action.time
392 |         print(f"Waiting for {wait_time} seconds")
393 |         await asyncio.sleep(wait_time)
394 |     
395 |     elif action_type == "screenshot":
396 |         print("Taking screenshot")
397 |         # This is handled automatically in the main loop, but we can take an extra one if requested
398 |         screenshot = await computer.interface.screenshot()
399 |         return screenshot
400 |     
401 |     else:
402 |         print(f"Unrecognized action: {action_type}")
403 | ```
404 | 
405 | #### Implementing the Computer-Use Loop
406 | This section defines a loop that:
407 | 
408 | 1. Initializes the cua-computer instance (connecting to a macOS sandbox).
409 | 2. Captures a screenshot of the current state.
410 | 3. Sends the screenshot (with a user prompt) to the OpenAI Responses API using the `computer-use-preview` model.
411 | 4. Processes the returned `computer_call` action and executes it using our helper function.
412 | 5. Captures an updated screenshot after the action (this example runs one iteration, but you can wrap it in a loop).
413 | 
414 | For a full loop, you would repeat these steps until no further actions are returned.
415 | 
416 | ```python
417 | async def cua_openai_loop():
418 |     # Initialize the lume computer instance (macOS sandbox)
419 |     async with Computer(
420 |         display="1024x768",
421 |         memory="4GB",
422 |         cpu="2",
423 |         os_type="macos"
424 |     ) as computer:
425 |         await computer.run() # Start the lume VM
426 |         
427 |         # Capture the initial screenshot
428 |         screenshot = await computer.interface.screenshot()
429 |         screenshot_base64 = base64.b64encode(screenshot).decode('utf-8')
430 | 
431 |         # Initial request to start the loop
432 |         response = openai.responses.create(
433 |             model="computer-use-preview",
434 |             tools=[{
435 |                 "type": "computer_use_preview",
436 |                 "display_width": 1024,
437 |                 "display_height": 768,
438 |                 "environment": "mac"
439 |             }],
440 |             input=[
441 |                 {  
442 |                     "role": "user", 
443 |                     "content": [
444 |                         {"type": "input_text", "text": "Open Safari, download and install Cursor."},
445 |                         {"type": "input_image", "image_url": f"data:image/png;base64,{screenshot_base64}"}
446 |                     ]
447 |                 }
448 |             ],
449 |             truncation="auto"
450 |         )
451 | 
452 |         # Continue the loop until no more computer_call actions
453 |         while True:
454 |             # Check for computer_call actions
455 |             computer_calls = [item for item in response.output if item and item.type == "computer_call"]
456 |             if not computer_calls:
457 |                 print("No more computer calls. Loop complete.")
458 |                 break
459 | 
460 |             # Get the first computer call
461 |             call = computer_calls[0]
462 |             last_call_id = call.call_id
463 |             action = call.action
464 |             print("Received action from OpenAI Responses API:", action)
465 | 
466 |             # Handle any pending safety checks
467 |             if call.pending_safety_checks:
468 |                 print("Safety checks pending:", call.pending_safety_checks)
469 |                 # In a real implementation, you would want to get user confirmation here
470 |                 acknowledged_checks = call.pending_safety_checks
471 |             else:
472 |                 acknowledged_checks = []
473 | 
474 |             # Execute the action
475 |             await execute_action(computer, action)
476 |             await asyncio.sleep(1)  # Allow time for changes to take effect
477 | 
478 |             # Capture new screenshot after action
479 |             new_screenshot = await computer.interface.screenshot()
480 |             new_screenshot_base64 = base64.b64encode(new_screenshot).decode('utf-8')
481 | 
482 |             # Send the screenshot back as computer_call_output
483 |             response = openai.responses.create(
484 |                 model="computer-use-preview",
485 |                 tools=[{
486 |                     "type": "computer_use_preview",
487 |                     "display_width": 1024,
488 |                     "display_height": 768,
489 |                     "environment": "mac"
490 |                 }],
491 |                 input=[{  
492 |                     "type": "computer_call_output",
493 |                     "call_id": last_call_id,
494 |                     "acknowledged_safety_checks": acknowledged_checks,
495 |                     "output": {
496 |                         "type": "input_image",
497 |                         "image_url": f"data:image/png;base64,{new_screenshot_base64}"
498 |                     }
499 |                 }],
500 |                 truncation="auto"
501 |             )
502 | 
503 |         # End the session
504 |         await computer.stop()
505 | 
506 | # Run the loop
507 | if __name__ == "__main__":
508 |     asyncio.run(cua_openai_loop())
509 | ```
510 | 
511 | You can find the full code in our [notebook](https://github.com/trycua/cua/blob/main/notebooks/blog/build-your-own-operator-on-macos-1.ipynb).
512 | 
513 | #### Request Handling Differences
514 | The first request to the OpenAI Responses API is special in that it includes the initial screenshot and prompt. Subsequent requests are handled differently, using the `computer_call_output` type to provide feedback on the executed action.
515 | 
516 | ##### Initial Request Format
517 | - We use `role: "user"` with `content` that contains both `input_text` (the prompt) and `input_image` (the screenshot)
518 | 
519 | ##### Subsequent Request Format
520 | - We use `type: "computer_call_output"` instead of the user role
521 | - We include the `call_id` to link the output to the specific previous action that was executed
522 | - We provide any `acknowledged_safety_checks` that were approved
523 | - We include the new screenshot in the `output` field
524 | 
525 | This structured approach allows the API to maintain context and continuity throughout the interaction session.
526 | 
527 | **Note**: For multi-turn conversations, you should include the `previous_response_id` in your initial requests when starting a new conversation with prior context. However, when using `computer_call_output` for action feedback, you don't need to explicitly manage the conversation history - OpenAI's API automatically tracks the context using the `call_id`. The `previous_response_id` is primarily important when the user provides additional instructions or when starting a new request that should continue from a previous session.
528 | 
529 | ## Conclusion
530 | 
531 | ### Summary
532 | This blogpost demonstrates a single iteration of a OpenAI Computer-Use loop where:
533 | 
534 | - A macOS sandbox is controlled using the CUA interface.
535 | - A screenshot and prompt are sent to the OpenAI Responses API.
536 | - The returned action (e.g. a click or type command) is executed via the CUI interface.
537 | 
538 | In a production setting, you would wrap the action-response cycle in a loop, handling multiple actions and safety checks as needed.
539 | 
540 | ### Next Steps
541 | In the next blogpost, we'll introduce our Agent framework which abstracts away all these tedious implementation steps. This framework provides a higher-level API that handles the interaction loop between OpenAI's computer-use model and the macOS sandbox, allowing you to focus on building sophisticated applications rather than managing the low-level details we've explored here. Can't wait? Check out the [cua-agent](https://github.com/trycua/cua/tree/main/libs/agent) package!
542 | 
543 | ### Resources
544 | - [OpenAI Computer-Use docs](https://platform.openai.com/docs/guides/tools-computer-use)
545 | - [cua-computer](https://github.com/trycua/cua/tree/main/libs/computer)
546 | - [lume](https://github.com/trycua/cua/tree/main/libs/lume)
547 | 
```

--------------------------------------------------------------------------------
/libs/python/computer-server/computer_server/diorama/diorama.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python3
  2 | """Diorama: A virtual desktop manager for macOS"""
  3 | 
  4 | import os
  5 | import asyncio
  6 | import logging
  7 | import sys
  8 | import io
  9 | from typing import Union
 10 | from PIL import Image, ImageDraw
 11 | 
 12 | from computer_server.diorama.draw import capture_all_apps, AppActivationContext, get_frontmost_and_active_app, get_all_windows, get_running_apps
 13 | 
 14 | from computer_server.diorama.diorama_computer import DioramaComputer
 15 | from computer_server.handlers.macos import *
 16 | 
 17 | # simple, nicely formatted logging
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | automation_handler = MacOSAutomationHandler()
 21 | 
 22 | class Diorama:
 23 |     """Virtual desktop manager that provides automation capabilities for macOS applications.
 24 |     
 25 |     Manages application windows and provides an interface for taking screenshots,
 26 |     mouse interactions, keyboard input, and coordinate transformations between
 27 |     screenshot space and screen space.
 28 |     """
 29 |     _scheduler_queue = None
 30 |     _scheduler_task = None
 31 |     _loop = None
 32 |     _scheduler_started = False
 33 | 
 34 |     @classmethod
 35 |     def create_from_apps(cls, *args) -> DioramaComputer:
 36 |         """Create a DioramaComputer instance from a list of application names.
 37 |         
 38 |         Args:
 39 |             *args: Variable number of application names to include in the desktop
 40 |             
 41 |         Returns:
 42 |             DioramaComputer: A computer interface for the specified applications
 43 |         """
 44 |         cls._ensure_scheduler()
 45 |         return cls(args).computer
 46 | 
 47 |     # Dictionary to store cursor positions for each unique app_list hash
 48 |     _cursor_positions = {}
 49 |     
 50 |     def __init__(self, app_list):
 51 |         """Initialize a Diorama instance for the specified applications.
 52 |         
 53 |         Args:
 54 |             app_list: List of application names to manage
 55 |         """
 56 |         self.app_list = app_list
 57 |         self.interface = self.Interface(self)
 58 |         self.computer = DioramaComputer(self)
 59 |         self.focus_context = None
 60 |         
 61 |         # Create a hash for this app_list to use as a key
 62 |         self.app_list_hash = hash(tuple(sorted(app_list)))
 63 |         
 64 |         # Initialize cursor position for this app_list if it doesn't exist
 65 |         if self.app_list_hash not in Diorama._cursor_positions:
 66 |             Diorama._cursor_positions[self.app_list_hash] = (0, 0)
 67 | 
 68 |     @classmethod
 69 |     def _ensure_scheduler(cls):
 70 |         """Ensure the async scheduler loop is running.
 71 |         
 72 |         Creates and starts the scheduler task if it hasn't been started yet.
 73 |         """
 74 |         if not cls._scheduler_started:
 75 |             logger.info("Starting Diorama scheduler loop…")
 76 |             cls._scheduler_queue = asyncio.Queue()
 77 |             cls._loop = asyncio.get_event_loop()
 78 |             cls._scheduler_task = cls._loop.create_task(cls._scheduler_loop())
 79 |             cls._scheduler_started = True
 80 | 
 81 |     @classmethod
 82 |     async def _scheduler_loop(cls):
 83 |         """Main scheduler loop that processes automation commands.
 84 |         
 85 |         Continuously processes commands from the scheduler queue, handling
 86 |         screenshots, mouse actions, keyboard input, and scrolling operations.
 87 |         """
 88 |         while True:
 89 |             cmd = await cls._scheduler_queue.get()
 90 |             action = cmd.get("action")
 91 |             args = cmd.get("arguments", {})
 92 |             future = cmd.get("future")
 93 |             logger.info(f"Processing command: {action} | args={args}")
 94 |             
 95 |             app_whitelist = args.get("app_list", [])
 96 |             
 97 |             all_windows = get_all_windows()
 98 |             running_apps = get_running_apps()
 99 |             frontmost_app, active_app_to_use, active_app_pid = get_frontmost_and_active_app(all_windows, running_apps, app_whitelist)
100 |             focus_context = AppActivationContext(active_app_pid, active_app_to_use, logger)
101 |             
102 |             with focus_context:
103 |                 try:
104 |                     if action == "screenshot":
105 |                         logger.info(f"Taking screenshot for apps: {app_whitelist}")
106 |                         result, img = capture_all_apps(
107 |                             app_whitelist=app_whitelist,
108 |                             save_to_disk=False,
109 |                             take_focus=False
110 |                         )
111 |                         logger.info("Screenshot complete.")
112 |                         if future:
113 |                             future.set_result((result, img))
114 |                     # Mouse actions
115 |                     elif action in ["left_click", "right_click", "double_click", "move_cursor", "drag_to"]:
116 |                         x = args.get("x")
117 |                         y = args.get("y")
118 |                         
119 |                         duration = args.get("duration", 0.5)
120 |                         if action == "left_click":
121 |                             await automation_handler.left_click(x, y)
122 |                         elif action == "right_click":
123 |                             await automation_handler.right_click(x, y)
124 |                         elif action == "double_click":
125 |                             await automation_handler.double_click(x, y)
126 |                         elif action == "move_cursor":
127 |                             await automation_handler.move_cursor(x, y)
128 |                         elif action == "drag_to":
129 |                             await automation_handler.drag_to(x, y, duration=duration)
130 |                         if future:
131 |                             future.set_result(None)
132 |                     elif action in ["scroll_up", "scroll_down"]:
133 |                         x = args.get("x")
134 |                         y = args.get("y")
135 |                         if x is not None and y is not None:
136 |                             await automation_handler.move_cursor(x, y)
137 |                         
138 |                         clicks = args.get("clicks", 1)
139 |                         if action == "scroll_up":
140 |                             await automation_handler.scroll_up(clicks)
141 |                         else:
142 |                             await automation_handler.scroll_down(clicks)
143 |                         if future:
144 |                             future.set_result(None)
145 |                     # Keyboard actions
146 |                     elif action == "type_text":
147 |                         text = args.get("text")
148 |                         await automation_handler.type_text(text)
149 |                         if future:
150 |                             future.set_result(None)
151 |                     elif action == "press_key":
152 |                         key = args.get("key")
153 |                         await automation_handler.press_key(key)
154 |                         if future:
155 |                             future.set_result(None)
156 |                     elif action == "hotkey":
157 |                         keys = args.get("keys", [])
158 |                         await automation_handler.hotkey(keys)
159 |                         if future:
160 |                             future.set_result(None)
161 |                     elif action == "get_cursor_position":
162 |                         pos = await automation_handler.get_cursor_position()
163 |                         if future:
164 |                             future.set_result(pos)
165 |                     else:
166 |                         logger.warning(f"Unknown action: {action}")
167 |                         if future:
168 |                             future.set_exception(ValueError(f"Unknown action: {action}"))
169 |                 except Exception as e:
170 |                     logger.error(f"Exception during {action}: {e}", exc_info=True)
171 |                     if future:
172 |                         future.set_exception(e)
173 | 
174 |     class Interface():
175 |         """Interface for interacting with the virtual desktop.
176 |         
177 |         Provides methods for taking screenshots, mouse interactions, keyboard input,
178 |         and coordinate transformations between screenshot and screen coordinates.
179 |         """
180 |         
181 |         def __init__(self, diorama):
182 |             """Initialize the interface with a reference to the parent Diorama instance.
183 |             
184 |             Args:
185 |                 diorama: The parent Diorama instance
186 |             """
187 |             self._diorama = diorama
188 |             
189 |             self._scene_hitboxes = []
190 |             self._scene_size = None
191 | 
192 |         async def _send_cmd(self, action, arguments=None):
193 |             """Send a command to the scheduler queue.
194 |             
195 |             Args:
196 |                 action (str): The action to perform
197 |                 arguments (dict, optional): Arguments for the action
198 |                 
199 |             Returns:
200 |                 The result of the command execution
201 |             """
202 |             Diorama._ensure_scheduler()
203 |             loop = asyncio.get_event_loop()
204 |             future = loop.create_future()
205 |             logger.info(f"Enqueuing {action} command for apps: {self._diorama.app_list}")
206 |             await Diorama._scheduler_queue.put({
207 |                 "action": action,
208 |                 "arguments": {"app_list": self._diorama.app_list, **(arguments or {})},
209 |                 "future": future
210 |             })
211 |             try:
212 |                 return await future
213 |             except asyncio.CancelledError:
214 |                 logger.warning(f"Command was cancelled: {action}")
215 |                 return None
216 | 
217 |         async def screenshot(self, as_bytes: bool = True) -> Union[str, Image.Image]:
218 |             """Take a screenshot of the managed applications.
219 |             
220 |             Args:
221 |                 as_bytes (bool): If True, return base64-encoded bytes; if False, return PIL Image
222 |                 
223 |             Returns:
224 |                 Union[str, Image.Image]: Base64-encoded PNG bytes or PIL Image object
225 |             """
226 |             import base64
227 |             result, img = await self._send_cmd("screenshot")
228 |             self._scene_hitboxes = result.get("hitboxes", [])
229 |             self._scene_size = img.size
230 |             
231 |             if as_bytes:
232 |                 # PIL Image to bytes, then base64 encode for JSON
233 |                 import io
234 |                 img_byte_arr = io.BytesIO()
235 |                 img.save(img_byte_arr, format="PNG")
236 |                 img_bytes = img_byte_arr.getvalue()
237 |                 img_b64 = base64.b64encode(img_bytes).decode("ascii")
238 |                 return img_b64
239 |             else:
240 |                 return img
241 | 
242 |         async def left_click(self, x, y):
243 |             """Perform a left mouse click at the specified coordinates.
244 |             
245 |             Args:
246 |                 x (int): X coordinate in screenshot space (or None to use last position)
247 |                 y (int): Y coordinate in screenshot space (or None to use last position)
248 |             """
249 |             # Get last cursor position for this app_list hash
250 |             app_list_hash = hash(tuple(sorted(self._diorama.app_list)))
251 |             last_pos = Diorama._cursor_positions.get(app_list_hash, (0, 0))
252 |             x, y = x or last_pos[0], y or last_pos[1]
253 |             # Update cursor position for this app_list hash
254 |             Diorama._cursor_positions[app_list_hash] = (x, y)
255 | 
256 |             sx, sy = await self.to_screen_coordinates(x, y)
257 |             await self._send_cmd("left_click", {"x": sx, "y": sy})
258 | 
259 |         async def right_click(self, x, y):
260 |             """Perform a right mouse click at the specified coordinates.
261 |             
262 |             Args:
263 |                 x (int): X coordinate in screenshot space (or None to use last position)
264 |                 y (int): Y coordinate in screenshot space (or None to use last position)
265 |             """
266 |             # Get last cursor position for this app_list hash
267 |             app_list_hash = hash(tuple(sorted(self._diorama.app_list)))
268 |             last_pos = Diorama._cursor_positions.get(app_list_hash, (0, 0))
269 |             x, y = x or last_pos[0], y or last_pos[1]
270 |             # Update cursor position for this app_list hash
271 |             Diorama._cursor_positions[app_list_hash] = (x, y)
272 |             
273 |             sx, sy = await self.to_screen_coordinates(x, y)
274 |             await self._send_cmd("right_click", {"x": sx, "y": sy})
275 | 
276 |         async def double_click(self, x, y):
277 |             """Perform a double mouse click at the specified coordinates.
278 |             
279 |             Args:
280 |                 x (int): X coordinate in screenshot space (or None to use last position)
281 |                 y (int): Y coordinate in screenshot space (or None to use last position)
282 |             """
283 |             # Get last cursor position for this app_list hash
284 |             app_list_hash = hash(tuple(sorted(self._diorama.app_list)))
285 |             last_pos = Diorama._cursor_positions.get(app_list_hash, (0, 0))
286 |             x, y = x or last_pos[0], y or last_pos[1]
287 |             # Update cursor position for this app_list hash
288 |             Diorama._cursor_positions[app_list_hash] = (x, y)
289 |             
290 |             sx, sy = await self.to_screen_coordinates(x, y)
291 |             await self._send_cmd("double_click", {"x": sx, "y": sy})
292 | 
293 |         async def move_cursor(self, x, y):
294 |             """Move the mouse cursor to the specified coordinates.
295 |             
296 |             Args:
297 |                 x (int): X coordinate in screenshot space (or None to use last position)
298 |                 y (int): Y coordinate in screenshot space (or None to use last position)
299 |             """
300 |             # Get last cursor position for this app_list hash
301 |             app_list_hash = hash(tuple(sorted(self._diorama.app_list)))
302 |             last_pos = Diorama._cursor_positions.get(app_list_hash, (0, 0))
303 |             x, y = x or last_pos[0], y or last_pos[1]
304 |             # Update cursor position for this app_list hash
305 |             Diorama._cursor_positions[app_list_hash] = (x, y)
306 |             
307 |             sx, sy = await self.to_screen_coordinates(x, y)
308 |             await self._send_cmd("move_cursor", {"x": sx, "y": sy})
309 | 
310 |         async def drag_to(self, x, y, duration=0.5):
311 |             """Drag the mouse from current position to the specified coordinates.
312 |             
313 |             Args:
314 |                 x (int): X coordinate in screenshot space (or None to use last position)
315 |                 y (int): Y coordinate in screenshot space (or None to use last position)
316 |                 duration (float): Duration of the drag operation in seconds
317 |             """
318 |             # Get last cursor position for this app_list hash
319 |             app_list_hash = hash(tuple(sorted(self._diorama.app_list)))
320 |             last_pos = Diorama._cursor_positions.get(app_list_hash, (0, 0))
321 |             x, y = x or last_pos[0], y or last_pos[1]
322 |             # Update cursor position for this app_list hash
323 |             Diorama._cursor_positions[app_list_hash] = (x, y)
324 |             
325 |             sx, sy = await self.to_screen_coordinates(x, y)
326 |             await self._send_cmd("drag_to", {"x": sx, "y": sy, "duration": duration})
327 | 
328 |         async def get_cursor_position(self):
329 |             """Get the current cursor position in screen coordinates.
330 |             
331 |             Returns:
332 |                 tuple: (x, y) coordinates of the cursor in screen space
333 |             """
334 |             return await self._send_cmd("get_cursor_position")
335 | 
336 |         async def type_text(self, text):
337 |             """Type the specified text using the keyboard.
338 |             
339 |             Args:
340 |                 text (str): The text to type
341 |             """
342 |             await self._send_cmd("type_text", {"text": text})
343 | 
344 |         async def press_key(self, key):
345 |             """Press a single key on the keyboard.
346 |             
347 |             Args:
348 |                 key (str): The key to press
349 |             """
350 |             await self._send_cmd("press_key", {"key": key})
351 | 
352 |         async def hotkey(self, keys):
353 |             """Press a combination of keys simultaneously.
354 |             
355 |             Args:
356 |                 keys (list): List of keys to press together
357 |             """
358 |             await self._send_cmd("hotkey", {"keys": list(keys)})
359 | 
360 |         async def scroll_up(self, clicks: int = 1):
361 |             """Scroll up at the current cursor position.
362 |             
363 |             Args:
364 |                 clicks (int): Number of scroll clicks to perform
365 |             """
366 |             # Get last cursor position for this app_list hash
367 |             app_list_hash = hash(tuple(sorted(self._diorama.app_list)))
368 |             last_pos = Diorama._cursor_positions.get(app_list_hash, (0, 0))
369 |             x, y = last_pos[0], last_pos[1]
370 |             
371 |             await self._send_cmd("scroll_up", {"clicks": clicks, "x": x, "y": y})
372 | 
373 |         async def scroll_down(self, clicks: int = 1):
374 |             """Scroll down at the current cursor position.
375 |             
376 |             Args:
377 |                 clicks (int): Number of scroll clicks to perform
378 |             """
379 |             # Get last cursor position for this app_list hash
380 |             app_list_hash = hash(tuple(sorted(self._diorama.app_list)))
381 |             last_pos = Diorama._cursor_positions.get(app_list_hash, (0, 0))
382 |             x, y = last_pos[0], last_pos[1]
383 |             
384 |             await self._send_cmd("scroll_down", {"clicks": clicks, "x": x, "y": y})
385 | 
386 |         async def get_screen_size(self) -> dict[str, int]:
387 |             """Get the size of the screenshot area.
388 |             
389 |             Returns:
390 |                 dict[str, int]: Dictionary with 'width' and 'height' keys
391 |             """
392 |             if not self._scene_size:
393 |                 await self.screenshot()
394 |             return { "width": self._scene_size[0], "height": self._scene_size[1] }
395 | 
396 |         async def to_screen_coordinates(self, x: float, y: float) -> tuple[float, float]:
397 |             """Convert screenshot coordinates to screen coordinates.
398 | 
399 |             Args:
400 |                 x: X absolute coordinate in screenshot space
401 |                 y: Y absolute coordinate in screenshot space
402 | 
403 |             Returns:
404 |                 tuple[float, float]: (x, y) absolute coordinates in screen space
405 |             """
406 |             if not self._scene_hitboxes:
407 |                 await self.screenshot() # get hitboxes
408 |             # Try all hitboxes
409 |             for h in self._scene_hitboxes[::-1]:
410 |                 rect_from = h.get("hitbox")
411 |                 rect_to = h.get("target")
412 |                 if not rect_from or len(rect_from) != 4:
413 |                     continue
414 |                 
415 |                 # check if (x, y) is inside rect_from
416 |                 x0, y0, x1, y1 = rect_from
417 |                 if x0 <= x <= x1 and y0 <= y <= y1:
418 |                     logger.info(f"Found hitbox: {h}")
419 |                     # remap (x, y) to rect_to
420 |                     tx0, ty0, tx1, ty1 = rect_to
421 |                     
422 |                     # calculate offset from x0, y0
423 |                     offset_x = x - x0
424 |                     offset_y = y - y0
425 |                     
426 |                     # remap offset to rect_to
427 |                     tx = tx0 + offset_x
428 |                     ty = ty0 + offset_y
429 |                     
430 |                     return tx, ty
431 |             return x, y
432 | 
433 |         async def to_screenshot_coordinates(self, x: float, y: float) -> tuple[float, float]:
434 |             """Convert screen coordinates to screenshot coordinates.
435 | 
436 |             Args:
437 |                 x: X absolute coordinate in screen space
438 |                 y: Y absolute coordinate in screen space
439 | 
440 |             Returns:
441 |                 tuple[float, float]: (x, y) absolute coordinates in screenshot space
442 |             """
443 |             if not self._scene_hitboxes:
444 |                 await self.screenshot() # get hitboxes
445 |             # Try all hitboxes
446 |             for h in self._scene_hitboxes[::-1]:
447 |                 rect_from = h.get("target")
448 |                 rect_to = h.get("hitbox")
449 |                 if not rect_from or len(rect_from) != 4:
450 |                     continue
451 |                 
452 |                 # check if (x, y) is inside rect_from
453 |                 x0, y0, x1, y1 = rect_from
454 |                 if x0 <= x <= x1 and y0 <= y <= y1:
455 |                     # remap (x, y) to rect_to
456 |                     tx0, ty0, tx1, ty1 = rect_to
457 |                     
458 |                     # calculate offset from x0, y0
459 |                     offset_x = x - x0
460 |                     offset_y = y - y0
461 |                     
462 |                     # remap offset to rect_to
463 |                     tx = tx0 + offset_x
464 |                     ty = ty0 + offset_y
465 |                     
466 |                     return tx, ty
467 |             return x, y
468 | 
469 | import pyautogui
470 | import time
471 | 
472 | async def main():
473 |     """Main function demonstrating Diorama usage with multiple desktops and mouse tracking."""
474 |     desktop1 = Diorama.create_from_apps(["Discord", "Notes"])
475 |     desktop2 = Diorama.create_from_apps(["Terminal"])
476 | 
477 |     img1 = await desktop1.interface.screenshot(as_bytes=False)
478 |     img2 = await desktop2.interface.screenshot(as_bytes=False)
479 | 
480 |     img1.save("app_screenshots/desktop1.png")
481 |     img2.save("app_screenshots/desktop2.png")
482 |     # Initialize Diorama desktop
483 |     desktop3 = Diorama.create_from_apps("Safari")
484 |     screen_size = await desktop3.interface.get_screen_size()
485 |     print(screen_size)
486 | 
487 |     # Take initial screenshot
488 |     img = await desktop3.interface.screenshot(as_bytes=False)
489 |     img.save("app_screenshots/desktop3.png")
490 | 
491 |     # Prepare hitboxes and draw on the single screenshot
492 |     hitboxes = desktop3.interface._scene_hitboxes[::-1]
493 |     base_img = img.copy()
494 |     draw = ImageDraw.Draw(base_img)
495 |     for h in hitboxes:
496 |         rect = h.get("hitbox")
497 |         if not rect or len(rect) != 4:
498 |             continue
499 |         draw.rectangle(rect, outline="red", width=2)
500 | 
501 |     # Track and draw mouse position in real time (single screenshot size)
502 |     last_mouse_pos = None
503 |     print("Tracking mouse... Press Ctrl+C to stop.")
504 |     try:
505 |         while True:
506 |             mouse_x, mouse_y = pyautogui.position()
507 |             if last_mouse_pos != (mouse_x, mouse_y):
508 |                 last_mouse_pos = (mouse_x, mouse_y)
509 |                 # Map to screenshot coordinates
510 |                 sx, sy = await desktop3.interface.to_screenshot_coordinates(mouse_x, mouse_y)
511 |                 # Draw on a copy of the screenshot
512 |                 frame = base_img.copy()
513 |                 frame_draw = ImageDraw.Draw(frame)
514 |                 frame_draw.ellipse((sx-5, sy-5, sx+5, sy+5), fill="blue", outline="blue")
515 |                 # Save the frame
516 |                 frame.save("app_screenshots/desktop3_mouse.png")
517 |                 print(f"Mouse at screen ({mouse_x}, {mouse_y}) -> screenshot ({sx:.1f}, {sy:.1f})")
518 |             time.sleep(0.05)  # Throttle updates to ~20 FPS
519 |     except KeyboardInterrupt:
520 |         print("Stopped tracking.")
521 | 
522 |         draw.text((rect[0], rect[1]), str(idx), fill="red")
523 |     
524 |     canvas.save("app_screenshots/desktop3_hitboxes.png")
525 |     
526 |     
527 | 
528 |     # move mouse in a square spiral around the screen
529 |     import math
530 |     import random
531 |     
532 |     step = 20  # pixels per move
533 |     dot_radius = 10
534 |     width = screen_size["width"]
535 |     height = screen_size["height"]
536 |     x, y = 0, 10
537 | 
538 |     while x < width and y < height:
539 |         await desktop3.interface.move_cursor(x, y)
540 |         img = await desktop3.interface.screenshot(as_bytes=False)
541 |         draw = ImageDraw.Draw(img)
542 |         draw.ellipse((x-dot_radius, y-dot_radius, x+dot_radius, y+dot_radius), fill="red")
543 |         img.save("current.png")
544 |         await asyncio.sleep(0.03)
545 |         x += step
546 |         y = math.sin(x / width * math.pi * 2) * 50 + 25
547 | 
548 | if __name__ == "__main__":
549 |     asyncio.run(main())
550 | 
```

--------------------------------------------------------------------------------
/libs/lume/src/Server/Server.swift:
--------------------------------------------------------------------------------

```swift
  1 | import Darwin
  2 | import Foundation
  3 | import Network
  4 | 
  5 | // MARK: - Error Types
  6 | enum PortError: Error, LocalizedError {
  7 |     case alreadyInUse(port: UInt16)
  8 | 
  9 |     var errorDescription: String? {
 10 |         switch self {
 11 |         case .alreadyInUse(let port):
 12 |             return "Port \(port) is already in use by another process"
 13 |         }
 14 |     }
 15 | }
 16 | 
 17 | // MARK: - Server Class
 18 | @MainActor
 19 | final class Server {
 20 | 
 21 |     // MARK: - Route Type
 22 |     private struct Route {
 23 |         let method: String
 24 |         let path: String
 25 |         let handler: (HTTPRequest) async throws -> HTTPResponse
 26 | 
 27 |         func matches(_ request: HTTPRequest) -> Bool {
 28 |             if method != request.method { return false }
 29 | 
 30 |             // Handle path parameters
 31 |             let routeParts = path.split(separator: "/")
 32 |             let requestParts = request.path.split(separator: "/")
 33 | 
 34 |             if routeParts.count != requestParts.count { return false }
 35 | 
 36 |             for (routePart, requestPart) in zip(routeParts, requestParts) {
 37 |                 if routePart.hasPrefix(":") { continue }  // Path parameter
 38 |                 if routePart != requestPart { return false }
 39 |             }
 40 | 
 41 |             return true
 42 |         }
 43 | 
 44 |         func extractParams(_ request: HTTPRequest) -> [String: String] {
 45 |             var params: [String: String] = [:]
 46 |             let routeParts = path.split(separator: "/")
 47 |             
 48 |             // Split request path to remove query parameters
 49 |             let requestPathOnly = request.path.split(separator: "?", maxSplits: 1)[0]
 50 |             let requestParts = requestPathOnly.split(separator: "/")
 51 | 
 52 |             for (routePart, requestPart) in zip(routeParts, requestParts) {
 53 |                 if routePart.hasPrefix(":") {
 54 |                     let paramName = String(routePart.dropFirst())
 55 |                     params[paramName] = String(requestPart)
 56 |                 }
 57 |             }
 58 | 
 59 |             return params
 60 |         }
 61 |     }
 62 | 
 63 |     // MARK: - Properties
 64 |     private let port: NWEndpoint.Port
 65 |     private let controller: LumeController
 66 |     private var isRunning = false
 67 |     private var listener: NWListener?
 68 |     private var routes: [Route]
 69 | 
 70 |     // MARK: - Initialization
 71 |     init(port: UInt16 = 7777) {
 72 |         self.port = NWEndpoint.Port(rawValue: port)!
 73 |         self.controller = LumeController()
 74 |         self.routes = []
 75 | 
 76 |         // Define API routes after self is fully initialized
 77 |         self.setupRoutes()
 78 |     }
 79 | 
 80 |     // MARK: - Route Setup
 81 |     private func setupRoutes() {
 82 |         routes = [
 83 |             Route(
 84 |                 method: "GET", path: "/lume/vms",
 85 |                 handler: { [weak self] request in
 86 |                     guard let self else { throw HTTPError.internalError }
 87 |                     // Extract storage from query params if present
 88 |                     let storage = self.extractQueryParam(request: request, name: "storage")
 89 |                     return try await self.handleListVMs(storage: storage)
 90 |                 }),
 91 |             Route(
 92 |                 method: "GET", path: "/lume/vms/:name",
 93 |                 handler: { [weak self] request in
 94 |                     guard let self else { throw HTTPError.internalError }
 95 |                     let params = Route(
 96 |                         method: "GET", path: "/lume/vms/:name",
 97 |                         handler: { _ in
 98 |                             HTTPResponse(statusCode: .ok, body: "")
 99 |                         }
100 |                     ).extractParams(request)
101 |                     guard let name = params["name"] else {
102 |                         return HTTPResponse(statusCode: .badRequest, body: "Missing VM name")
103 |                     }
104 | 
105 |                     // Extract storage from query params if present
106 |                     let storage = self.extractQueryParam(request: request, name: "storage")
107 | 
108 |                     return try await self.handleGetVM(name: name, storage: storage)
109 |                 }),
110 |             Route(
111 |                 method: "DELETE", path: "/lume/vms/:name",
112 |                 handler: { [weak self] request in
113 |                     guard let self else { throw HTTPError.internalError }
114 |                     let params = Route(
115 |                         method: "DELETE", path: "/lume/vms/:name",
116 |                         handler: { _ in
117 |                             HTTPResponse(statusCode: .ok, body: "")
118 |                         }
119 |                     ).extractParams(request)
120 |                     guard let name = params["name"] else {
121 |                         return HTTPResponse(statusCode: .badRequest, body: "Missing VM name")
122 |                     }
123 | 
124 |                     // Extract storage from query params if present
125 |                     let storage = self.extractQueryParam(request: request, name: "storage")
126 | 
127 |                     return try await self.handleDeleteVM(name: name, storage: storage)
128 |                 }),
129 |             Route(
130 |                 method: "POST", path: "/lume/vms",
131 |                 handler: { [weak self] request in
132 |                     guard let self else { throw HTTPError.internalError }
133 |                     return try await self.handleCreateVM(request.body)
134 |                 }),
135 |             Route(
136 |                 method: "POST", path: "/lume/vms/clone",
137 |                 handler: { [weak self] request in
138 |                     guard let self else { throw HTTPError.internalError }
139 |                     return try await self.handleCloneVM(request.body)
140 |                 }),
141 |             Route(
142 |                 method: "PATCH", path: "/lume/vms/:name",
143 |                 handler: { [weak self] request in
144 |                     guard let self else { throw HTTPError.internalError }
145 |                     let params = Route(
146 |                         method: "PATCH", path: "/lume/vms/:name",
147 |                         handler: { _ in
148 |                             HTTPResponse(statusCode: .ok, body: "")
149 |                         }
150 |                     ).extractParams(request)
151 |                     guard let name = params["name"] else {
152 |                         return HTTPResponse(statusCode: .badRequest, body: "Missing VM name")
153 |                     }
154 |                     return try await self.handleSetVM(name: name, body: request.body)
155 |                 }),
156 |             Route(
157 |                 method: "POST", path: "/lume/vms/:name/run",
158 |                 handler: { [weak self] request in
159 |                     guard let self else { throw HTTPError.internalError }
160 |                     let params = Route(
161 |                         method: "POST", path: "/lume/vms/:name/run",
162 |                         handler: { _ in
163 |                             HTTPResponse(statusCode: .ok, body: "")
164 |                         }
165 |                     ).extractParams(request)
166 |                     guard let name = params["name"] else {
167 |                         return HTTPResponse(statusCode: .badRequest, body: "Missing VM name")
168 |                     }
169 |                     return try await self.handleRunVM(name: name, body: request.body)
170 |                 }),
171 |             Route(
172 |                 method: "POST", path: "/lume/vms/:name/stop",
173 |                 handler: { [weak self] request in
174 |                     guard let self else { throw HTTPError.internalError }
175 |                     let params = Route(
176 |                         method: "POST", path: "/lume/vms/:name/stop",
177 |                         handler: { _ in
178 |                             HTTPResponse(statusCode: .ok, body: "")
179 |                         }
180 |                     ).extractParams(request)
181 |                     guard let name = params["name"] else {
182 |                         return HTTPResponse(statusCode: .badRequest, body: "Missing VM name")
183 |                     }
184 | 
185 |                     Logger.info("Processing stop VM request", metadata: ["method": request.method, "path": request.path])
186 | 
187 |                     // Extract storage from the request body
188 |                     var storage: String? = nil
189 |                     if let bodyData = request.body, !bodyData.isEmpty {
190 |                         do {
191 |                             if let json = try JSONSerialization.jsonObject(with: bodyData) as? [String: Any],
192 |                                let bodyStorage = json["storage"] as? String {
193 |                                 storage = bodyStorage
194 |                                 Logger.info("Extracted storage from request body", metadata: ["storage": bodyStorage])
195 |                             }
196 |                         } catch {
197 |                             Logger.error("Failed to parse request body JSON", metadata: ["error": error.localizedDescription])
198 |                         }
199 |                     }
200 | 
201 |                     return try await self.handleStopVM(name: name, storage: storage)
202 |                 }),
203 |             Route(
204 |                 method: "GET", path: "/lume/ipsw",
205 |                 handler: { [weak self] _ in
206 |                     guard let self else { throw HTTPError.internalError }
207 |                     return try await self.handleIPSW()
208 |                 }),
209 |             Route(
210 |                 method: "POST", path: "/lume/pull",
211 |                 handler: { [weak self] request in
212 |                     guard let self else { throw HTTPError.internalError }
213 |                     return try await self.handlePull(request.body)
214 |                 }),
215 |             Route(
216 |                 method: "POST", path: "/lume/prune",
217 |                 handler: { [weak self] _ in
218 |                     guard let self else { throw HTTPError.internalError }
219 |                     return try await self.handlePruneImages()
220 |                 }),
221 |             Route(
222 |                 method: "GET", path: "/lume/images",
223 |                 handler: { [weak self] request in
224 |                     guard let self else { throw HTTPError.internalError }
225 |                     return try await self.handleGetImages(request)
226 |                 }),
227 |             // New config endpoint
228 |             Route(
229 |                 method: "GET", path: "/lume/config",
230 |                 handler: { [weak self] _ in
231 |                     guard let self else { throw HTTPError.internalError }
232 |                     return try await self.handleGetConfig()
233 |                 }),
234 |             Route(
235 |                 method: "POST", path: "/lume/config",
236 |                 handler: { [weak self] request in
237 |                     guard let self else { throw HTTPError.internalError }
238 |                     return try await self.handleUpdateConfig(request.body)
239 |                 }),
240 |             Route(
241 |                 method: "GET", path: "/lume/config/locations",
242 |                 handler: { [weak self] _ in
243 |                     guard let self else { throw HTTPError.internalError }
244 |                     return try await self.handleGetLocations()
245 |                 }),
246 |             Route(
247 |                 method: "POST", path: "/lume/config/locations",
248 |                 handler: { [weak self] request in
249 |                     guard let self else { throw HTTPError.internalError }
250 |                     return try await self.handleAddLocation(request.body)
251 |                 }),
252 |             Route(
253 |                 method: "DELETE", path: "/lume/config/locations/:name",
254 |                 handler: { [weak self] request in
255 |                     guard let self else { throw HTTPError.internalError }
256 |                     let params = Route(
257 |                         method: "DELETE", path: "/lume/config/locations/:name",
258 |                         handler: { _ in
259 |                             HTTPResponse(statusCode: .ok, body: "")
260 |                         }
261 |                     ).extractParams(request)
262 |                     guard let name = params["name"] else {
263 |                         return HTTPResponse(statusCode: .badRequest, body: "Missing location name")
264 |                     }
265 |                     return try await self.handleRemoveLocation(name)
266 |                 }),
267 |             
268 |             // Logs retrieval route
269 |             Route(
270 |                 method: "GET", path: "/lume/logs",
271 |                 handler: { [weak self] request in
272 |                     guard let self else { throw HTTPError.internalError }
273 |                     
274 |                     // Extract query parameters
275 |                     let type = self.extractQueryParam(request: request, name: "type") // "info", "error", or "all"
276 |                     let linesParam = self.extractQueryParam(request: request, name: "lines")
277 |                     let lines = linesParam.flatMap { Int($0) } // Convert to Int if present
278 |                     
279 |                     return try await self.handleGetLogs(type: type, lines: lines)
280 |                 }),
281 |             Route(
282 |                 method: "POST", path: "/lume/config/locations/default/:name",
283 |                 handler: { [weak self] request in
284 |                     guard let self else { throw HTTPError.internalError }
285 |                     let params = Route(
286 |                         method: "POST", path: "/lume/config/locations/default/:name",
287 |                         handler: { _ in
288 |                             HTTPResponse(statusCode: .ok, body: "")
289 |                         }
290 |                     ).extractParams(request)
291 |                     guard let name = params["name"] else {
292 |                         return HTTPResponse(statusCode: .badRequest, body: "Missing location name")
293 |                     }
294 |                     return try await self.handleSetDefaultLocation(name)
295 |                 }),
296 |             Route(
297 |                 method: "POST", path: "/lume/vms/push",
298 |                 handler: { [weak self] request in
299 |                     guard let self else { throw HTTPError.internalError }
300 |                     return try await self.handlePush(request.body)
301 |                 }),
302 |         ]
303 |     }
304 | 
305 |     // Helper to extract query parameters from the URL
306 |     private func extractQueryParam(request: HTTPRequest, name: String) -> String? {
307 |         // Extract only the query part by splitting on '?'
308 |         let parts = request.path.split(separator: "?", maxSplits: 1)
309 |         guard parts.count > 1 else { return nil } // No query parameters
310 |         
311 |         let queryString = String(parts[1])
312 |         // Create a placeholder URL with the query string
313 |         if let urlComponents = URLComponents(string: "http://placeholder.com?"+queryString),
314 |            let queryItems = urlComponents.queryItems
315 |         {
316 |             return queryItems.first(where: { $0.name == name })?.value?.removingPercentEncoding
317 |         }
318 |         return nil
319 |     }
320 | 
321 |     // MARK: - Port Utilities
322 |     private func isPortAvailable(port: Int) async -> Bool {
323 |         // Create a socket
324 |         let socketFD = socket(AF_INET, SOCK_STREAM, 0)
325 |         if socketFD == -1 {
326 |             return false
327 |         }
328 | 
329 |         // Set socket options to allow reuse
330 |         var value: Int32 = 1
331 |         if setsockopt(
332 |             socketFD, SOL_SOCKET, SO_REUSEADDR, &value, socklen_t(MemoryLayout<Int32>.size)) == -1
333 |         {
334 |             close(socketFD)
335 |             return false
336 |         }
337 | 
338 |         // Set up the address structure
339 |         var addr = sockaddr_in()
340 |         addr.sin_family = sa_family_t(AF_INET)
341 |         addr.sin_port = UInt16(port).bigEndian
342 |         addr.sin_addr.s_addr = INADDR_ANY.bigEndian
343 | 
344 |         // Bind to the port
345 |         let bindResult = withUnsafePointer(to: &addr) { addrPtr in
346 |             addrPtr.withMemoryRebound(to: sockaddr.self, capacity: 1) { addrPtr in
347 |                 Darwin.bind(socketFD, addrPtr, socklen_t(MemoryLayout<sockaddr_in>.size))
348 |             }
349 |         }
350 | 
351 |         // Clean up
352 |         close(socketFD)
353 | 
354 |         // If bind failed, the port is in use
355 |         return bindResult == 0
356 |     }
357 | 
358 |     // MARK: - Server Lifecycle
359 |     func start() async throws {
360 |         // First check if the port is already in use
361 |         if !(await isPortAvailable(port: Int(port.rawValue))) {
362 |             // Don't log anything here, just throw the error
363 |             throw PortError.alreadyInUse(port: port.rawValue)
364 |         }
365 | 
366 |         let parameters = NWParameters.tcp
367 |         listener = try NWListener(using: parameters, on: port)
368 | 
369 |         // Create an actor to safely manage state transitions
370 |         actor StartupState {
371 |             var error: Error?
372 |             var isComplete = false
373 | 
374 |             func setError(_ error: Error) {
375 |                 self.error = error
376 |                 self.isComplete = true
377 |             }
378 | 
379 |             func setComplete() {
380 |                 self.isComplete = true
381 |             }
382 | 
383 |             func checkStatus() -> (isComplete: Bool, error: Error?) {
384 |                 return (isComplete, error)
385 |             }
386 |         }
387 | 
388 |         let startupState = StartupState()
389 | 
390 |         // Set up a state update handler to detect port binding errors
391 |         listener?.stateUpdateHandler = { state in
392 |             Task {
393 |                 switch state {
394 |                 case .setup:
395 |                     // Initial state, no action needed
396 |                     Logger.info("Listener setup", metadata: ["port": "\(self.port.rawValue)"])
397 |                     break
398 |                 case .waiting(let error):
399 |                     // Log the full error details to see what we're getting
400 |                     Logger.error(
401 |                         "Listener waiting",
402 |                         metadata: [
403 |                             "error": error.localizedDescription,
404 |                             "debugDescription": error.debugDescription,
405 |                             "localizedDescription": error.localizedDescription,
406 |                             "port": "\(self.port.rawValue)",
407 |                         ])
408 | 
409 |                     // Check for different port in use error messages
410 |                     if error.debugDescription.contains("Address already in use")
411 |                         || error.localizedDescription.contains("in use")
412 |                         || error.localizedDescription.contains("address already in use")
413 |                     {
414 |                         Logger.error(
415 |                             "Port conflict detected", metadata: ["port": "\(self.port.rawValue)"])
416 |                         await startupState.setError(
417 |                             PortError.alreadyInUse(port: self.port.rawValue))
418 |                     } else {
419 |                         // Wait for a short period to see if the listener recovers
420 |                         // Some network errors are transient
421 |                         try? await Task.sleep(nanoseconds: 1_000_000_000)  // 1 second
422 | 
423 |                         // If we're still waiting after delay, consider it an error
424 |                         if case .waiting = await self.listener?.state {
425 |                             await startupState.setError(error)
426 |                         }
427 |                     }
428 |                 case .failed(let error):
429 |                     // Log the full error details
430 |                     Logger.error(
431 |                         "Listener failed",
432 |                         metadata: [
433 |                             "error": error.localizedDescription,
434 |                             "debugDescription": error.debugDescription,
435 |                             "port": "\(self.port.rawValue)",
436 |                         ])
437 |                     await startupState.setError(error)
438 |                 case .ready:
439 |                     // Listener successfully bound to port
440 |                     Logger.info("Listener ready", metadata: ["port": "\(self.port.rawValue)"])
441 |                     await startupState.setComplete()
442 |                 case .cancelled:
443 |                     // Listener was cancelled
444 |                     Logger.info("Listener cancelled", metadata: ["port": "\(self.port.rawValue)"])
445 |                     break
446 |                 @unknown default:
447 |                     Logger.info(
448 |                         "Unknown listener state",
449 |                         metadata: ["state": "\(state)", "port": "\(self.port.rawValue)"])
450 |                     break
451 |                 }
452 |             }
453 |         }
454 | 
455 |         listener?.newConnectionHandler = { [weak self] connection in
456 |             Task { @MainActor [weak self] in
457 |                 guard let self else { return }
458 |                 self.handleConnection(connection)
459 |             }
460 |         }
461 | 
462 |         listener?.start(queue: .main)
463 | 
464 |         // Wait for either successful startup or an error
465 |         var status: (isComplete: Bool, error: Error?) = (false, nil)
466 |         repeat {
467 |             try await Task.sleep(nanoseconds: 100_000_000)  // 100ms
468 |             status = await startupState.checkStatus()
469 |         } while !status.isComplete
470 | 
471 |         // If there was a startup error, throw it
472 |         if let error = status.error {
473 |             self.stop()
474 |             throw error
475 |         }
476 | 
477 |         isRunning = true
478 | 
479 |         Logger.info("Server started", metadata: ["port": "\(port.rawValue)"])
480 | 
481 |         // Keep the server running
482 |         while isRunning {
483 |             try await Task.sleep(nanoseconds: 1_000_000_000)
484 |         }
485 |     }
486 | 
487 |     func stop() {
488 |         isRunning = false
489 |         listener?.cancel()
490 |     }
491 | 
492 |     // MARK: - Connection Handling
493 |     private func handleConnection(_ connection: NWConnection) {
494 |         connection.stateUpdateHandler = { [weak self] state in
495 |             switch state {
496 |             case .ready:
497 |                 Task { @MainActor [weak self] in
498 |                     guard let self else { return }
499 |                     self.receiveData(connection)
500 |                 }
501 |             case .failed(let error):
502 |                 Logger.error("Connection failed", metadata: ["error": error.localizedDescription])
503 |                 connection.cancel()
504 |             case .cancelled:
505 |                 // Connection is already cancelled, no need to cancel again
506 |                 break
507 |             default:
508 |                 break
509 |             }
510 |         }
511 |         connection.start(queue: .main)
512 |     }
513 | 
514 |     private func receiveData(_ connection: NWConnection) {
515 |         connection.receive(minimumIncompleteLength: 1, maximumLength: 65536) {
516 |             [weak self] content, _, isComplete, error in
517 |             if let error = error {
518 |                 Logger.error("Receive error", metadata: ["error": error.localizedDescription])
519 |                 connection.cancel()
520 |                 return
521 |             }
522 | 
523 |             guard let data = content, !data.isEmpty else {
524 |                 if isComplete {
525 |                     connection.cancel()
526 |                 }
527 |                 return
528 |             }
529 | 
530 |             Task { @MainActor [weak self] in
531 |                 guard let self else { return }
532 |                 do {
533 |                     let response = try await self.handleRequest(data)
534 |                     self.send(response, on: connection)
535 |                 } catch {
536 |                     let errorResponse = self.errorResponse(error)
537 |                     self.send(errorResponse, on: connection)
538 |                 }
539 |             }
540 |         }
541 |     }
542 | 
543 |     private func send(_ response: HTTPResponse, on connection: NWConnection) {
544 |         let data = response.serialize()
545 |         Logger.info(
546 |             "Serialized response", metadata: ["data": String(data: data, encoding: .utf8) ?? ""])
547 |         connection.send(
548 |             content: data,
549 |             completion: .contentProcessed { [weak connection] error in
550 |                 if let error = error {
551 |                     Logger.error(
552 |                         "Failed to send response", metadata: ["error": error.localizedDescription])
553 |                 } else {
554 |                     Logger.info("Response sent successfully")
555 |                 }
556 |                 if connection?.state != .cancelled {
557 |                     connection?.cancel()
558 |                 }
559 |             })
560 |     }
561 | 
562 |     // MARK: - Request Handling
563 |     private func handleRequest(_ data: Data) async throws -> HTTPResponse {
564 |         Logger.info(
565 |             "Received request data", metadata: ["data": String(data: data, encoding: .utf8) ?? ""])
566 | 
567 |         guard let request = HTTPRequest(data: data) else {
568 |             Logger.error("Failed to parse request")
569 |             return HTTPResponse(statusCode: .badRequest, body: "Invalid request")
570 |         }
571 | 
572 |         Logger.info(
573 |             "Parsed request",
574 |             metadata: [
575 |                 "method": request.method,
576 |                 "path": request.path,
577 |                 "headers": "\(request.headers)",
578 |                 "body": String(data: request.body ?? Data(), encoding: .utf8) ?? "",
579 |             ])
580 | 
581 |         // Find matching route
582 |         guard let route = routes.first(where: { $0.matches(request) }) else {
583 |             return HTTPResponse(statusCode: .notFound, body: "Not found")
584 |         }
585 | 
586 |         // Handle the request
587 |         let response = try await route.handler(request)
588 | 
589 |         Logger.info(
590 |             "Sending response",
591 |             metadata: [
592 |                 "statusCode": "\(response.statusCode.rawValue)",
593 |                 "headers": "\(response.headers)",
594 |                 "body": String(data: response.body ?? Data(), encoding: .utf8) ?? "",
595 |             ])
596 | 
597 |         return response
598 |     }
599 | 
600 |     private func errorResponse(_ error: Error) -> HTTPResponse {
601 |         HTTPResponse(
602 |             statusCode: .internalServerError,
603 |             headers: ["Content-Type": "application/json"],
604 |             body: try! JSONEncoder().encode(APIError(message: error.localizedDescription))
605 |         )
606 |     }
607 | }
608 | 
```