trycua/cua # codebase.md

This is page 12 of 21. Use http://codebase.md/trycua/cua?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .all-contributorsrc
├── .cursorignore
├── .devcontainer
│   ├── devcontainer.json
│   ├── post-install.sh
│   └── README.md
├── .dockerignore
├── .gitattributes
├── .github
│   ├── FUNDING.yml
│   ├── scripts
│   │   ├── get_pyproject_version.py
│   │   └── tests
│   │       ├── __init__.py
│   │       ├── README.md
│   │       └── test_get_pyproject_version.py
│   └── workflows
│       ├── ci-lume.yml
│       ├── docker-publish-kasm.yml
│       ├── docker-publish-xfce.yml
│       ├── docker-reusable-publish.yml
│       ├── npm-publish-computer.yml
│       ├── npm-publish-core.yml
│       ├── publish-lume.yml
│       ├── pypi-publish-agent.yml
│       ├── pypi-publish-computer-server.yml
│       ├── pypi-publish-computer.yml
│       ├── pypi-publish-core.yml
│       ├── pypi-publish-mcp-server.yml
│       ├── pypi-publish-pylume.yml
│       ├── pypi-publish-som.yml
│       ├── pypi-reusable-publish.yml
│       └── test-validation-script.yml
├── .gitignore
├── .vscode
│   ├── docs.code-workspace
│   ├── launch.json
│   ├── libs-ts.code-workspace
│   ├── lume.code-workspace
│   ├── lumier.code-workspace
│   ├── py.code-workspace
│   └── settings.json
├── blog
│   ├── app-use.md
│   ├── assets
│   │   ├── composite-agents.png
│   │   ├── docker-ubuntu-support.png
│   │   ├── hack-booth.png
│   │   ├── hack-closing-ceremony.jpg
│   │   ├── hack-cua-ollama-hud.jpeg
│   │   ├── hack-leaderboard.png
│   │   ├── hack-the-north.png
│   │   ├── hack-winners.jpeg
│   │   ├── hack-workshop.jpeg
│   │   ├── hud-agent-evals.png
│   │   └── trajectory-viewer.jpeg
│   ├── bringing-computer-use-to-the-web.md
│   ├── build-your-own-operator-on-macos-1.md
│   ├── build-your-own-operator-on-macos-2.md
│   ├── composite-agents.md
│   ├── cua-hackathon.md
│   ├── hack-the-north.md
│   ├── hud-agent-evals.md
│   ├── human-in-the-loop.md
│   ├── introducing-cua-cloud-containers.md
│   ├── lume-to-containerization.md
│   ├── sandboxed-python-execution.md
│   ├── training-computer-use-models-trajectories-1.md
│   ├── trajectory-viewer.md
│   ├── ubuntu-docker-support.md
│   └── windows-sandbox.md
├── CONTRIBUTING.md
├── Development.md
├── Dockerfile
├── docs
│   ├── .gitignore
│   ├── .prettierrc
│   ├── content
│   │   └── docs
│   │       ├── agent-sdk
│   │       │   ├── agent-loops.mdx
│   │       │   ├── benchmarks
│   │       │   │   ├── index.mdx
│   │       │   │   ├── interactive.mdx
│   │       │   │   ├── introduction.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── osworld-verified.mdx
│   │       │   │   ├── screenspot-pro.mdx
│   │       │   │   └── screenspot-v2.mdx
│   │       │   ├── callbacks
│   │       │   │   ├── agent-lifecycle.mdx
│   │       │   │   ├── cost-saving.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── logging.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── pii-anonymization.mdx
│   │       │   │   └── trajectories.mdx
│   │       │   ├── chat-history.mdx
│   │       │   ├── custom-computer-handlers.mdx
│   │       │   ├── custom-tools.mdx
│   │       │   ├── customizing-computeragent.mdx
│   │       │   ├── integrations
│   │       │   │   ├── hud.mdx
│   │       │   │   └── meta.json
│   │       │   ├── message-format.mdx
│   │       │   ├── meta.json
│   │       │   ├── migration-guide.mdx
│   │       │   ├── prompt-caching.mdx
│   │       │   ├── supported-agents
│   │       │   │   ├── composed-agents.mdx
│   │       │   │   ├── computer-use-agents.mdx
│   │       │   │   ├── grounding-models.mdx
│   │       │   │   ├── human-in-the-loop.mdx
│   │       │   │   └── meta.json
│   │       │   ├── supported-model-providers
│   │       │   │   ├── index.mdx
│   │       │   │   └── local-models.mdx
│   │       │   └── usage-tracking.mdx
│   │       ├── computer-sdk
│   │       │   ├── cloud-vm-management.mdx
│   │       │   ├── commands.mdx
│   │       │   ├── computer-ui.mdx
│   │       │   ├── computers.mdx
│   │       │   ├── meta.json
│   │       │   └── sandboxed-python.mdx
│   │       ├── index.mdx
│   │       ├── libraries
│   │       │   ├── agent
│   │       │   │   └── index.mdx
│   │       │   ├── computer
│   │       │   │   └── index.mdx
│   │       │   ├── computer-server
│   │       │   │   ├── Commands.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── REST-API.mdx
│   │       │   │   └── WebSocket-API.mdx
│   │       │   ├── core
│   │       │   │   └── index.mdx
│   │       │   ├── lume
│   │       │   │   ├── cli-reference.mdx
│   │       │   │   ├── faq.md
│   │       │   │   ├── http-api.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   ├── meta.json
│   │       │   │   └── prebuilt-images.mdx
│   │       │   ├── lumier
│   │       │   │   ├── building-lumier.mdx
│   │       │   │   ├── docker-compose.mdx
│   │       │   │   ├── docker.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   └── meta.json
│   │       │   ├── mcp-server
│   │       │   │   ├── client-integrations.mdx
│   │       │   │   ├── configuration.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   ├── llm-integrations.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── tools.mdx
│   │       │   │   └── usage.mdx
│   │       │   └── som
│   │       │       ├── configuration.mdx
│   │       │       └── index.mdx
│   │       ├── meta.json
│   │       ├── quickstart-cli.mdx
│   │       ├── quickstart-devs.mdx
│   │       └── telemetry.mdx
│   ├── next.config.mjs
│   ├── package-lock.json
│   ├── package.json
│   ├── pnpm-lock.yaml
│   ├── postcss.config.mjs
│   ├── public
│   │   └── img
│   │       ├── agent_gradio_ui.png
│   │       ├── agent.png
│   │       ├── cli.png
│   │       ├── computer.png
│   │       ├── som_box_threshold.png
│   │       └── som_iou_threshold.png
│   ├── README.md
│   ├── source.config.ts
│   ├── src
│   │   ├── app
│   │   │   ├── (home)
│   │   │   │   ├── [[...slug]]
│   │   │   │   │   └── page.tsx
│   │   │   │   └── layout.tsx
│   │   │   ├── api
│   │   │   │   └── search
│   │   │   │       └── route.ts
│   │   │   ├── favicon.ico
│   │   │   ├── global.css
│   │   │   ├── layout.config.tsx
│   │   │   ├── layout.tsx
│   │   │   ├── llms.mdx
│   │   │   │   └── [[...slug]]
│   │   │   │       └── route.ts
│   │   │   └── llms.txt
│   │   │       └── route.ts
│   │   ├── assets
│   │   │   ├── discord-black.svg
│   │   │   ├── discord-white.svg
│   │   │   ├── logo-black.svg
│   │   │   └── logo-white.svg
│   │   ├── components
│   │   │   ├── iou.tsx
│   │   │   └── mermaid.tsx
│   │   ├── lib
│   │   │   ├── llms.ts
│   │   │   └── source.ts
│   │   └── mdx-components.tsx
│   └── tsconfig.json
├── examples
│   ├── agent_examples.py
│   ├── agent_ui_examples.py
│   ├── cloud_api_examples.py
│   ├── computer_examples_windows.py
│   ├── computer_examples.py
│   ├── computer_ui_examples.py
│   ├── computer-example-ts
│   │   ├── .env.example
│   │   ├── .gitignore
│   │   ├── .prettierrc
│   │   ├── package-lock.json
│   │   ├── package.json
│   │   ├── pnpm-lock.yaml
│   │   ├── README.md
│   │   ├── src
│   │   │   ├── helpers.ts
│   │   │   └── index.ts
│   │   └── tsconfig.json
│   ├── docker_examples.py
│   ├── evals
│   │   ├── hud_eval_examples.py
│   │   └── wikipedia_most_linked.txt
│   ├── pylume_examples.py
│   ├── sandboxed_functions_examples.py
│   ├── som_examples.py
│   ├── utils.py
│   └── winsandbox_example.py
├── img
│   ├── agent_gradio_ui.png
│   ├── agent.png
│   ├── cli.png
│   ├── computer.png
│   ├── logo_black.png
│   └── logo_white.png
├── libs
│   ├── kasm
│   │   ├── Dockerfile
│   │   ├── LICENSE
│   │   ├── README.md
│   │   └── src
│   │       └── ubuntu
│   │           └── install
│   │               └── firefox
│   │                   ├── custom_startup.sh
│   │                   ├── firefox.desktop
│   │                   └── install_firefox.sh
│   ├── lume
│   │   ├── .cursorignore
│   │   ├── CONTRIBUTING.md
│   │   ├── Development.md
│   │   ├── img
│   │   │   └── cli.png
│   │   ├── Package.resolved
│   │   ├── Package.swift
│   │   ├── README.md
│   │   ├── resources
│   │   │   └── lume.entitlements
│   │   ├── scripts
│   │   │   ├── build
│   │   │   │   ├── build-debug.sh
│   │   │   │   ├── build-release-notarized.sh
│   │   │   │   └── build-release.sh
│   │   │   └── install.sh
│   │   ├── src
│   │   │   ├── Commands
│   │   │   │   ├── Clone.swift
│   │   │   │   ├── Config.swift
│   │   │   │   ├── Create.swift
│   │   │   │   ├── Delete.swift
│   │   │   │   ├── Get.swift
│   │   │   │   ├── Images.swift
│   │   │   │   ├── IPSW.swift
│   │   │   │   ├── List.swift
│   │   │   │   ├── Logs.swift
│   │   │   │   ├── Options
│   │   │   │   │   └── FormatOption.swift
│   │   │   │   ├── Prune.swift
│   │   │   │   ├── Pull.swift
│   │   │   │   ├── Push.swift
│   │   │   │   ├── Run.swift
│   │   │   │   ├── Serve.swift
│   │   │   │   ├── Set.swift
│   │   │   │   └── Stop.swift
│   │   │   ├── ContainerRegistry
│   │   │   │   ├── ImageContainerRegistry.swift
│   │   │   │   ├── ImageList.swift
│   │   │   │   └── ImagesPrinter.swift
│   │   │   ├── Errors
│   │   │   │   └── Errors.swift
│   │   │   ├── FileSystem
│   │   │   │   ├── Home.swift
│   │   │   │   ├── Settings.swift
│   │   │   │   ├── VMConfig.swift
│   │   │   │   ├── VMDirectory.swift
│   │   │   │   └── VMLocation.swift
│   │   │   ├── LumeController.swift
│   │   │   ├── Main.swift
│   │   │   ├── Server
│   │   │   │   ├── Handlers.swift
│   │   │   │   ├── HTTP.swift
│   │   │   │   ├── Requests.swift
│   │   │   │   ├── Responses.swift
│   │   │   │   └── Server.swift
│   │   │   ├── Utils
│   │   │   │   ├── CommandRegistry.swift
│   │   │   │   ├── CommandUtils.swift
│   │   │   │   ├── Logger.swift
│   │   │   │   ├── NetworkUtils.swift
│   │   │   │   ├── Path.swift
│   │   │   │   ├── ProcessRunner.swift
│   │   │   │   ├── ProgressLogger.swift
│   │   │   │   ├── String.swift
│   │   │   │   └── Utils.swift
│   │   │   ├── Virtualization
│   │   │   │   ├── DarwinImageLoader.swift
│   │   │   │   ├── DHCPLeaseParser.swift
│   │   │   │   ├── ImageLoaderFactory.swift
│   │   │   │   └── VMVirtualizationService.swift
│   │   │   ├── VM
│   │   │   │   ├── DarwinVM.swift
│   │   │   │   ├── LinuxVM.swift
│   │   │   │   ├── VM.swift
│   │   │   │   ├── VMDetails.swift
│   │   │   │   ├── VMDetailsPrinter.swift
│   │   │   │   ├── VMDisplayResolution.swift
│   │   │   │   └── VMFactory.swift
│   │   │   └── VNC
│   │   │       ├── PassphraseGenerator.swift
│   │   │       └── VNCService.swift
│   │   └── tests
│   │       ├── Mocks
│   │       │   ├── MockVM.swift
│   │       │   ├── MockVMVirtualizationService.swift
│   │       │   └── MockVNCService.swift
│   │       ├── VM
│   │       │   └── VMDetailsPrinterTests.swift
│   │       ├── VMTests.swift
│   │       ├── VMVirtualizationServiceTests.swift
│   │       └── VNCServiceTests.swift
│   ├── lumier
│   │   ├── .dockerignore
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   └── src
│   │       ├── bin
│   │       │   └── entry.sh
│   │       ├── config
│   │       │   └── constants.sh
│   │       ├── hooks
│   │       │   └── on-logon.sh
│   │       └── lib
│   │           ├── utils.sh
│   │           └── vm.sh
│   ├── python
│   │   ├── agent
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── agent
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── adapters
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── huggingfacelocal_adapter.py
│   │   │   │   │   ├── human_adapter.py
│   │   │   │   │   ├── mlxvlm_adapter.py
│   │   │   │   │   └── models
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── generic.py
│   │   │   │   │       ├── internvl.py
│   │   │   │   │       ├── opencua.py
│   │   │   │   │       └── qwen2_5_vl.py
│   │   │   │   ├── agent.py
│   │   │   │   ├── callbacks
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── budget_manager.py
│   │   │   │   │   ├── image_retention.py
│   │   │   │   │   ├── logging.py
│   │   │   │   │   ├── operator_validator.py
│   │   │   │   │   ├── pii_anonymization.py
│   │   │   │   │   ├── prompt_instructions.py
│   │   │   │   │   ├── telemetry.py
│   │   │   │   │   └── trajectory_saver.py
│   │   │   │   ├── cli.py
│   │   │   │   ├── computers
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── cua.py
│   │   │   │   │   └── custom.py
│   │   │   │   ├── decorators.py
│   │   │   │   ├── human_tool
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── __main__.py
│   │   │   │   │   ├── server.py
│   │   │   │   │   └── ui.py
│   │   │   │   ├── integrations
│   │   │   │   │   └── hud
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── agent.py
│   │   │   │   │       └── proxy.py
│   │   │   │   ├── loops
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── anthropic.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── composed_grounded.py
│   │   │   │   │   ├── gemini.py
│   │   │   │   │   ├── glm45v.py
│   │   │   │   │   ├── gta1.py
│   │   │   │   │   ├── holo.py
│   │   │   │   │   ├── internvl.py
│   │   │   │   │   ├── model_types.csv
│   │   │   │   │   ├── moondream3.py
│   │   │   │   │   ├── omniparser.py
│   │   │   │   │   ├── openai.py
│   │   │   │   │   ├── opencua.py
│   │   │   │   │   └── uitars.py
│   │   │   │   ├── proxy
│   │   │   │   │   ├── examples.py
│   │   │   │   │   └── handlers.py
│   │   │   │   ├── responses.py
│   │   │   │   ├── types.py
│   │   │   │   └── ui
│   │   │   │       ├── __init__.py
│   │   │   │       ├── __main__.py
│   │   │   │       └── gradio
│   │   │   │           ├── __init__.py
│   │   │   │           ├── app.py
│   │   │   │           └── ui_components.py
│   │   │   ├── benchmarks
│   │   │   │   ├── .gitignore
│   │   │   │   ├── contrib.md
│   │   │   │   ├── interactive.py
│   │   │   │   ├── models
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   └── gta1.py
│   │   │   │   ├── README.md
│   │   │   │   ├── ss-pro.py
│   │   │   │   ├── ss-v2.py
│   │   │   │   └── utils.py
│   │   │   ├── example.py
│   │   │   ├── pyproject.toml
│   │   │   └── README.md
│   │   ├── computer
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── computer
│   │   │   │   ├── __init__.py
│   │   │   │   ├── computer.py
│   │   │   │   ├── diorama_computer.py
│   │   │   │   ├── helpers.py
│   │   │   │   ├── interface
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── generic.py
│   │   │   │   │   ├── linux.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   ├── models.py
│   │   │   │   │   └── windows.py
│   │   │   │   ├── logger.py
│   │   │   │   ├── models.py
│   │   │   │   ├── providers
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── cloud
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── docker
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── lume
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── lume_api.py
│   │   │   │   │   ├── lumier
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── types.py
│   │   │   │   │   └── winsandbox
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── provider.py
│   │   │   │   │       └── setup_script.ps1
│   │   │   │   ├── ui
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── __main__.py
│   │   │   │   │   └── gradio
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       └── app.py
│   │   │   │   └── utils.py
│   │   │   ├── poetry.toml
│   │   │   ├── pyproject.toml
│   │   │   └── README.md
│   │   ├── computer-server
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── computer_server
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── cli.py
│   │   │   │   ├── diorama
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── diorama_computer.py
│   │   │   │   │   ├── diorama.py
│   │   │   │   │   ├── draw.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   └── safezone.py
│   │   │   │   ├── handlers
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── generic.py
│   │   │   │   │   ├── linux.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   └── windows.py
│   │   │   │   ├── main.py
│   │   │   │   ├── server.py
│   │   │   │   └── watchdog.py
│   │   │   ├── examples
│   │   │   │   ├── __init__.py
│   │   │   │   └── usage_example.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   ├── run_server.py
│   │   │   └── test_connection.py
│   │   ├── core
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── core
│   │   │   │   ├── __init__.py
│   │   │   │   └── telemetry
│   │   │   │       ├── __init__.py
│   │   │   │       └── posthog.py
│   │   │   ├── poetry.toml
│   │   │   ├── pyproject.toml
│   │   │   └── README.md
│   │   ├── mcp-server
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── CONCURRENT_SESSIONS.md
│   │   │   ├── mcp_server
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── server.py
│   │   │   │   └── session_manager.py
│   │   │   ├── pdm.lock
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── scripts
│   │   │       ├── install_mcp_server.sh
│   │   │       └── start_mcp_server.sh
│   │   ├── pylume
│   │   │   ├── __init__.py
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── pylume
│   │   │   │   ├── __init__.py
│   │   │   │   ├── client.py
│   │   │   │   ├── exceptions.py
│   │   │   │   ├── lume
│   │   │   │   ├── models.py
│   │   │   │   ├── pylume.py
│   │   │   │   └── server.py
│   │   │   ├── pyproject.toml
│   │   │   └── README.md
│   │   └── som
│   │       ├── .bumpversion.cfg
│   │       ├── LICENSE
│   │       ├── poetry.toml
│   │       ├── pyproject.toml
│   │       ├── README.md
│   │       ├── som
│   │       │   ├── __init__.py
│   │       │   ├── detect.py
│   │       │   ├── detection.py
│   │       │   ├── models.py
│   │       │   ├── ocr.py
│   │       │   ├── util
│   │       │   │   └── utils.py
│   │       │   └── visualization.py
│   │       └── tests
│   │           └── test_omniparser.py
│   ├── typescript
│   │   ├── .gitignore
│   │   ├── .nvmrc
│   │   ├── agent
│   │   │   ├── examples
│   │   │   │   ├── playground-example.html
│   │   │   │   └── README.md
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── client.ts
│   │   │   │   ├── index.ts
│   │   │   │   └── types.ts
│   │   │   ├── tests
│   │   │   │   └── client.test.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── biome.json
│   │   ├── computer
│   │   │   ├── .editorconfig
│   │   │   ├── .gitattributes
│   │   │   ├── .gitignore
│   │   │   ├── LICENSE
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── computer
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── providers
│   │   │   │   │   │   ├── base.ts
│   │   │   │   │   │   ├── cloud.ts
│   │   │   │   │   │   └── index.ts
│   │   │   │   │   └── types.ts
│   │   │   │   ├── index.ts
│   │   │   │   ├── interface
│   │   │   │   │   ├── base.ts
│   │   │   │   │   ├── factory.ts
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── linux.ts
│   │   │   │   │   ├── macos.ts
│   │   │   │   │   └── windows.ts
│   │   │   │   └── types.ts
│   │   │   ├── tests
│   │   │   │   ├── computer
│   │   │   │   │   └── cloud.test.ts
│   │   │   │   ├── interface
│   │   │   │   │   ├── factory.test.ts
│   │   │   │   │   ├── index.test.ts
│   │   │   │   │   ├── linux.test.ts
│   │   │   │   │   ├── macos.test.ts
│   │   │   │   │   └── windows.test.ts
│   │   │   │   └── setup.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── core
│   │   │   ├── .editorconfig
│   │   │   ├── .gitattributes
│   │   │   ├── .gitignore
│   │   │   ├── LICENSE
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── index.ts
│   │   │   │   └── telemetry
│   │   │   │       ├── clients
│   │   │   │       │   ├── index.ts
│   │   │   │       │   └── posthog.ts
│   │   │   │       └── index.ts
│   │   │   ├── tests
│   │   │   │   └── telemetry.test.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── package.json
│   │   ├── pnpm-lock.yaml
│   │   ├── pnpm-workspace.yaml
│   │   └── README.md
│   └── xfce
│       ├── .dockerignore
│       ├── .gitignore
│       ├── Dockerfile
│       ├── README.md
│       └── src
│           ├── scripts
│           │   ├── resize-display.sh
│           │   ├── start-computer-server.sh
│           │   ├── start-novnc.sh
│           │   ├── start-vnc.sh
│           │   └── xstartup.sh
│           ├── supervisor
│           │   └── supervisord.conf
│           └── xfce-config
│               ├── helpers.rc
│               ├── xfce4-power-manager.xml
│               └── xfce4-session.xml
├── LICENSE.md
├── Makefile
├── notebooks
│   ├── agent_nb.ipynb
│   ├── blog
│   │   ├── build-your-own-operator-on-macos-1.ipynb
│   │   └── build-your-own-operator-on-macos-2.ipynb
│   ├── composite_agents_docker_nb.ipynb
│   ├── computer_nb.ipynb
│   ├── computer_server_nb.ipynb
│   ├── customizing_computeragent.ipynb
│   ├── eval_osworld.ipynb
│   ├── ollama_nb.ipynb
│   ├── pylume_nb.ipynb
│   ├── README.md
│   ├── sota_hackathon_cloud.ipynb
│   └── sota_hackathon.ipynb
├── pdm.lock
├── pyproject.toml
├── pyrightconfig.json
├── README.md
├── samples
│   └── community
│       ├── global-online
│       │   └── README.md
│       └── hack-the-north
│           └── README.md
├── scripts
│   ├── build-uv.sh
│   ├── build.ps1
│   ├── build.sh
│   ├── cleanup.sh
│   ├── playground-docker.sh
│   ├── playground.sh
│   └── run-docker-dev.sh
└── tests
    ├── pytest.ini
    ├── shell_cmd.py
    ├── test_files.py
    ├── test_mcp_server_session_management.py
    ├── test_mcp_server_streaming.py
    ├── test_shell_bash.py
    ├── test_telemetry.py
    ├── test_venv.py
    └── test_watchdog.py
```

# Files

--------------------------------------------------------------------------------
/libs/python/computer/computer/interface/base.py:
--------------------------------------------------------------------------------

```python
  1 | """Base interface for computer control."""
  2 | 
  3 | from abc import ABC, abstractmethod
  4 | from typing import Optional, Dict, Any, Tuple, List
  5 | from ..logger import Logger, LogLevel
  6 | from .models import MouseButton, CommandResult
  7 | 
  8 | class BaseComputerInterface(ABC):
  9 |     """Base class for computer control interfaces."""
 10 | 
 11 |     def __init__(self, ip_address: str, username: str = "lume", password: str = "lume", api_key: Optional[str] = None, vm_name: Optional[str] = None):
 12 |         """Initialize interface.
 13 | 
 14 |         Args:
 15 |             ip_address: IP address of the computer to control
 16 |             username: Username for authentication
 17 |             password: Password for authentication
 18 |             api_key: Optional API key for cloud authentication
 19 |             vm_name: Optional VM name for cloud authentication
 20 |         """
 21 |         self.ip_address = ip_address
 22 |         self.username = username
 23 |         self.password = password
 24 |         self.api_key = api_key
 25 |         self.vm_name = vm_name
 26 |         self.logger = Logger("cua.interface", LogLevel.NORMAL)
 27 |         
 28 |         # Optional default delay time between commands (in seconds)
 29 |         self.delay: float = 0.0
 30 | 
 31 |     @abstractmethod
 32 |     async def wait_for_ready(self, timeout: int = 60) -> None:
 33 |         """Wait for interface to be ready.
 34 | 
 35 |         Args:
 36 |             timeout: Maximum time to wait in seconds
 37 | 
 38 |         Raises:
 39 |             TimeoutError: If interface is not ready within timeout
 40 |         """
 41 |         pass
 42 | 
 43 |     @abstractmethod
 44 |     def close(self) -> None:
 45 |         """Close the interface connection."""
 46 |         pass
 47 | 
 48 |     def force_close(self) -> None:
 49 |         """Force close the interface connection.
 50 | 
 51 |         By default, this just calls close(), but subclasses can override
 52 |         to provide more forceful cleanup.
 53 |         """
 54 |         self.close()
 55 | 
 56 |     # Mouse Actions
 57 |     @abstractmethod
 58 |     async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: "MouseButton" = "left", delay: Optional[float] = None) -> None:
 59 |         """Press and hold a mouse button.
 60 |         
 61 |         Args:
 62 |             x: X coordinate to press at. If None, uses current cursor position.
 63 |             y: Y coordinate to press at. If None, uses current cursor position.
 64 |             button: Mouse button to press ('left', 'middle', 'right').
 65 |             delay: Optional delay in seconds after the action
 66 |         """
 67 |         pass
 68 |     
 69 |     @abstractmethod
 70 |     async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: "MouseButton" = "left", delay: Optional[float] = None) -> None:
 71 |         """Release a mouse button.
 72 |         
 73 |         Args:
 74 |             x: X coordinate to release at. If None, uses current cursor position.
 75 |             y: Y coordinate to release at. If None, uses current cursor position.
 76 |             button: Mouse button to release ('left', 'middle', 'right').
 77 |             delay: Optional delay in seconds after the action
 78 |         """
 79 |         pass
 80 |     
 81 |     @abstractmethod
 82 |     async def left_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None:
 83 |         """Perform a left mouse button click.
 84 |         
 85 |         Args:
 86 |             x: X coordinate to click at. If None, uses current cursor position.
 87 |             y: Y coordinate to click at. If None, uses current cursor position.
 88 |             delay: Optional delay in seconds after the action
 89 |         """
 90 |         pass
 91 | 
 92 |     @abstractmethod
 93 |     async def right_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None:
 94 |         """Perform a right mouse button click.
 95 |         
 96 |         Args:
 97 |             x: X coordinate to click at. If None, uses current cursor position.
 98 |             y: Y coordinate to click at. If None, uses current cursor position.
 99 |             delay: Optional delay in seconds after the action
100 |         """
101 |         pass
102 | 
103 |     @abstractmethod
104 |     async def double_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None:
105 |         """Perform a double left mouse button click.
106 |         
107 |         Args:
108 |             x: X coordinate to double-click at. If None, uses current cursor position.
109 |             y: Y coordinate to double-click at. If None, uses current cursor position.
110 |             delay: Optional delay in seconds after the action
111 |         """
112 |         pass
113 | 
114 |     @abstractmethod
115 |     async def move_cursor(self, x: int, y: int, delay: Optional[float] = None) -> None:
116 |         """Move the cursor to the specified screen coordinates.
117 |         
118 |         Args:
119 |             x: X coordinate to move cursor to.
120 |             y: Y coordinate to move cursor to.
121 |             delay: Optional delay in seconds after the action
122 |         """
123 |         pass
124 | 
125 |     @abstractmethod
126 |     async def drag_to(self, x: int, y: int, button: str = "left", duration: float = 0.5, delay: Optional[float] = None) -> None:
127 |         """Drag from current position to specified coordinates.
128 | 
129 |         Args:
130 |             x: The x coordinate to drag to
131 |             y: The y coordinate to drag to
132 |             button: The mouse button to use ('left', 'middle', 'right')
133 |             duration: How long the drag should take in seconds
134 |             delay: Optional delay in seconds after the action
135 |         """
136 |         pass
137 | 
138 |     @abstractmethod
139 |     async def drag(self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5, delay: Optional[float] = None) -> None:
140 |         """Drag the cursor along a path of coordinates.
141 | 
142 |         Args:
143 |             path: List of (x, y) coordinate tuples defining the drag path
144 |             button: The mouse button to use ('left', 'middle', 'right')
145 |             duration: Total time in seconds that the drag operation should take
146 |             delay: Optional delay in seconds after the action
147 |         """
148 |         pass
149 | 
150 |     # Keyboard Actions
151 |     @abstractmethod
152 |     async def key_down(self, key: str, delay: Optional[float] = None) -> None:
153 |         """Press and hold a key.
154 |         
155 |         Args:
156 |             key: The key to press and hold (e.g., 'a', 'shift', 'ctrl').
157 |             delay: Optional delay in seconds after the action.
158 |         """
159 |         pass
160 |     
161 |     @abstractmethod
162 |     async def key_up(self, key: str, delay: Optional[float] = None) -> None:
163 |         """Release a previously pressed key.
164 |         
165 |         Args:
166 |             key: The key to release (e.g., 'a', 'shift', 'ctrl').
167 |             delay: Optional delay in seconds after the action.
168 |         """
169 |         pass
170 |     
171 |     @abstractmethod
172 |     async def type_text(self, text: str, delay: Optional[float] = None) -> None:
173 |         """Type the specified text string.
174 |         
175 |         Args:
176 |             text: The text string to type.
177 |             delay: Optional delay in seconds after the action.
178 |         """
179 |         pass
180 | 
181 |     @abstractmethod
182 |     async def press_key(self, key: str, delay: Optional[float] = None) -> None:
183 |         """Press and release a single key.
184 |         
185 |         Args:
186 |             key: The key to press (e.g., 'a', 'enter', 'escape').
187 |             delay: Optional delay in seconds after the action.
188 |         """
189 |         pass
190 | 
191 |     @abstractmethod
192 |     async def hotkey(self, *keys: str, delay: Optional[float] = None) -> None:
193 |         """Press multiple keys simultaneously (keyboard shortcut).
194 |         
195 |         Args:
196 |             *keys: Variable number of keys to press together (e.g., 'ctrl', 'c').
197 |             delay: Optional delay in seconds after the action.
198 |         """
199 |         pass
200 | 
201 |     # Scrolling Actions
202 |     @abstractmethod
203 |     async def scroll(self, x: int, y: int, delay: Optional[float] = None) -> None:
204 |         """Scroll the mouse wheel by specified amounts.
205 |         
206 |         Args:
207 |             x: Horizontal scroll amount (positive = right, negative = left).
208 |             y: Vertical scroll amount (positive = up, negative = down).
209 |             delay: Optional delay in seconds after the action.
210 |         """
211 |         pass
212 |     
213 |     @abstractmethod
214 |     async def scroll_down(self, clicks: int = 1, delay: Optional[float] = None) -> None:
215 |         """Scroll down by the specified number of clicks.
216 |         
217 |         Args:
218 |             clicks: Number of scroll clicks to perform downward.
219 |             delay: Optional delay in seconds after the action.
220 |         """
221 |         pass
222 | 
223 |     @abstractmethod
224 |     async def scroll_up(self, clicks: int = 1, delay: Optional[float] = None) -> None:
225 |         """Scroll up by the specified number of clicks.
226 |         
227 |         Args:
228 |             clicks: Number of scroll clicks to perform upward.
229 |             delay: Optional delay in seconds after the action.
230 |         """
231 |         pass
232 | 
233 |     # Screen Actions
234 |     @abstractmethod
235 |     async def screenshot(self) -> bytes:
236 |         """Take a screenshot.
237 | 
238 |         Returns:
239 |             Raw bytes of the screenshot image
240 |         """
241 |         pass
242 | 
243 |     @abstractmethod
244 |     async def get_screen_size(self) -> Dict[str, int]:
245 |         """Get the screen dimensions.
246 | 
247 |         Returns:
248 |             Dict with 'width' and 'height' keys
249 |         """
250 |         pass
251 | 
252 |     @abstractmethod
253 |     async def get_cursor_position(self) -> Dict[str, int]:
254 |         """Get the current cursor position on screen.
255 |         
256 |         Returns:
257 |             Dict with 'x' and 'y' keys containing cursor coordinates.
258 |         """
259 |         pass
260 | 
261 |     # Clipboard Actions
262 |     @abstractmethod
263 |     async def copy_to_clipboard(self) -> str:
264 |         """Get the current clipboard content.
265 |         
266 |         Returns:
267 |             The text content currently stored in the clipboard.
268 |         """
269 |         pass
270 | 
271 |     @abstractmethod
272 |     async def set_clipboard(self, text: str) -> None:
273 |         """Set the clipboard content to the specified text.
274 |         
275 |         Args:
276 |             text: The text to store in the clipboard.
277 |         """
278 |         pass
279 | 
280 |     # File System Actions
281 |     @abstractmethod
282 |     async def file_exists(self, path: str) -> bool:
283 |         """Check if a file exists at the specified path.
284 |         
285 |         Args:
286 |             path: The file path to check.
287 |             
288 |         Returns:
289 |             True if the file exists, False otherwise.
290 |         """
291 |         pass
292 | 
293 |     @abstractmethod
294 |     async def directory_exists(self, path: str) -> bool:
295 |         """Check if a directory exists at the specified path.
296 |         
297 |         Args:
298 |             path: The directory path to check.
299 |             
300 |         Returns:
301 |             True if the directory exists, False otherwise.
302 |         """
303 |         pass
304 |     
305 |     @abstractmethod
306 |     async def list_dir(self, path: str) -> List[str]:
307 |         """List the contents of a directory.
308 |         
309 |         Args:
310 |             path: The directory path to list.
311 |             
312 |         Returns:
313 |             List of file and directory names in the specified directory.
314 |         """
315 |         pass
316 |     
317 |     @abstractmethod
318 |     async def read_text(self, path: str) -> str:
319 |         """Read the text contents of a file.
320 |         
321 |         Args:
322 |             path: The file path to read from.
323 |             
324 |         Returns:
325 |             The text content of the file.
326 |         """
327 |         pass
328 |     
329 |     @abstractmethod
330 |     async def write_text(self, path: str, content: str) -> None:
331 |         """Write text content to a file.
332 |         
333 |         Args:
334 |             path: The file path to write to.
335 |             content: The text content to write.
336 |         """
337 |         pass
338 |     
339 |     @abstractmethod
340 |     async def read_bytes(self, path: str, offset: int = 0, length: Optional[int] = None) -> bytes:
341 |         """Read file binary contents with optional seeking support.
342 |         
343 |         Args:
344 |             path: Path to the file
345 |             offset: Byte offset to start reading from (default: 0)
346 |             length: Number of bytes to read (default: None for entire file)
347 |         """
348 |         pass
349 |     
350 |     @abstractmethod
351 |     async def write_bytes(self, path: str, content: bytes) -> None:
352 |         """Write binary content to a file.
353 |         
354 |         Args:
355 |             path: The file path to write to.
356 |             content: The binary content to write.
357 |         """
358 |         pass
359 |     
360 |     @abstractmethod
361 |     async def delete_file(self, path: str) -> None:
362 |         """Delete a file at the specified path.
363 |         
364 |         Args:
365 |             path: The file path to delete.
366 |         """
367 |         pass
368 |     
369 |     @abstractmethod
370 |     async def create_dir(self, path: str) -> None:
371 |         """Create a directory at the specified path.
372 |         
373 |         Args:
374 |             path: The directory path to create.
375 |         """
376 |         pass
377 |     
378 |     @abstractmethod
379 |     async def delete_dir(self, path: str) -> None:
380 |         """Delete a directory at the specified path.
381 |         
382 |         Args:
383 |             path: The directory path to delete.
384 |         """
385 |         pass
386 |     
387 |     @abstractmethod
388 |     async def get_file_size(self, path: str) -> int:
389 |         """Get the size of a file in bytes.
390 |         
391 |         Args:
392 |             path: The file path to get the size of.
393 |             
394 |         Returns:
395 |             The size of the file in bytes.
396 |         """
397 |         pass
398 |     
399 |     @abstractmethod
400 |     async def run_command(self, command: str) -> CommandResult:
401 |         """Run shell command and return structured result.
402 |         
403 |         Executes a shell command using subprocess.run with shell=True and check=False.
404 |         The command is run in the target environment and captures both stdout and stderr.
405 |         
406 |         Args:
407 |             command (str): The shell command to execute
408 |             
409 |         Returns:
410 |             CommandResult: A structured result containing:
411 |                 - stdout (str): Standard output from the command
412 |                 - stderr (str): Standard error from the command  
413 |                 - returncode (int): Exit code from the command (0 indicates success)
414 |                 
415 |         Raises:
416 |             RuntimeError: If the command execution fails at the system level
417 |             
418 |         Example:
419 |             result = await interface.run_command("ls -la")
420 |             if result.returncode == 0:
421 |                 print(f"Output: {result.stdout}")
422 |             else:
423 |                 print(f"Error: {result.stderr}, Exit code: {result.returncode}")
424 |         """
425 |         pass
426 | 
427 |     # Accessibility Actions
428 |     @abstractmethod
429 |     async def get_accessibility_tree(self) -> Dict:
430 |         """Get the accessibility tree of the current screen.
431 |         
432 |         Returns:
433 |             Dict containing the hierarchical accessibility information of screen elements.
434 |         """
435 |         pass
436 |     
437 |     @abstractmethod
438 |     async def to_screen_coordinates(self, x: float, y: float) -> tuple[float, float]:
439 |         """Convert screenshot coordinates to screen coordinates.
440 | 
441 |         Args:
442 |             x: X coordinate in screenshot space
443 |             y: Y coordinate in screenshot space
444 | 
445 |         Returns:
446 |             tuple[float, float]: (x, y) coordinates in screen space
447 |         """
448 |         pass
449 | 
450 |     @abstractmethod
451 |     async def to_screenshot_coordinates(self, x: float, y: float) -> tuple[float, float]:
452 |         """Convert screen coordinates to screenshot coordinates.
453 | 
454 |         Args:
455 |             x: X coordinate in screen space
456 |             y: Y coordinate in screen space
457 | 
458 |         Returns:
459 |             tuple[float, float]: (x, y) coordinates in screenshot space
460 |         """
461 |         pass
462 | 
```

--------------------------------------------------------------------------------
/libs/lumier/src/lib/vm.sh:
--------------------------------------------------------------------------------

```bash
  1 | #!/usr/bin/env bash
  2 | 
  3 | # Initialize global flags
  4 | export PULL_IN_PROGRESS=0
  5 | 
  6 | start_vm() {
  7 |     # Determine storage path for VM
  8 |     STORAGE_PATH="$HOST_STORAGE_PATH"
  9 |     if [ -z "$STORAGE_PATH" ]; then
 10 |         STORAGE_PATH="storage_${VM_NAME}"
 11 |     fi
 12 | 
 13 |     # Check if VM exists and its status using JSON format - quietly
 14 |     VM_INFO=$(lume_get "$VM_NAME" "$STORAGE_PATH" "json" "${LUMIER_DEBUG:-0}")
 15 | 
 16 |     # Check if VM not found error
 17 |     if [[ $VM_INFO == *"Virtual machine not found"* ]]; then
 18 |         IMAGE_NAME="${VERSION##*/}"
 19 |         # Parse registry and organization from VERSION
 20 |         REGISTRY=$(echo $VERSION | cut -d'/' -f1)
 21 |         ORGANIZATION=$(echo $VERSION | cut -d'/' -f2)
 22 |         
 23 |         echo "Pulling VM image $IMAGE_NAME..."
 24 |         lume_pull "$IMAGE_NAME" "$VM_NAME" "$STORAGE_PATH" "$REGISTRY" "$ORGANIZATION"
 25 |     else
 26 |         # Parse the JSON status - check if it contains "status" : "running"
 27 |         if [[ $VM_INFO == *'"status" : "running"'* ]]; then
 28 |             lume_stop "$VM_NAME" "$STORAGE_PATH"
 29 |         fi
 30 |     fi
 31 | 
 32 |     # Format memory size for display purposes
 33 |     MEMORY_DISPLAY="$RAM_SIZE"
 34 |     if [[ ! "$RAM_SIZE" == *"GB"* && ! "$RAM_SIZE" == *"MB"* ]]; then
 35 |         MEMORY_DISPLAY="${RAM_SIZE}MB"
 36 |     fi
 37 |     
 38 |     # Set VM parameters using the wrapper function
 39 |     if [[ "$LUMIER_DEBUG" == "1" ]]; then
 40 |         echo "Updating VM settings: cpu=$CPU_CORES memory=$MEMORY_DISPLAY display=$DISPLAY"
 41 |     fi
 42 |     lume_set "$VM_NAME" "$STORAGE_PATH" "$CPU_CORES" "$RAM_SIZE" "$DISPLAY"
 43 | 
 44 |     # Fetch VM configuration - quietly (don't display to console)
 45 |     CONFIG_JSON=$(lume_get "$VM_NAME" "$STORAGE_PATH" "json" "${LUMIER_DEBUG:-0}")
 46 |     
 47 |     # Setup shared directory args if necessary
 48 |     SHARED_DIR_ARGS=""
 49 |     if [ -d "/shared" ]; then
 50 |         if [ -n "$HOST_SHARED_PATH" ]; then
 51 |             SHARED_DIR_ARGS="--shared-dir=$HOST_SHARED_PATH"
 52 |         else
 53 |             echo "Warning: /shared volume exists but HOST_SHARED_PATH is not set. Cannot mount volume."
 54 |         fi
 55 |     fi
 56 | 
 57 |     # Run VM with VNC and shared directory using curl
 58 |     lume_run $SHARED_DIR_ARGS --storage "$STORAGE_PATH" "$VM_NAME" &
 59 |     # lume run "$VM_NAME" --storage "$STORAGE_PATH" --no-display
 60 | 
 61 |     # sleep 10000000
 62 | 
 63 |     # Wait for VM to be running and VNC URL to be available
 64 |     vm_ip=""
 65 |     vnc_url=""
 66 |     max_attempts=30
 67 |     attempt=0
 68 |     
 69 |     while [ $attempt -lt $max_attempts ]; do
 70 |             # Get VM info as JSON using the API function - pass debug flag
 71 |         VM_INFO=$(lume_get "$VM_NAME" "$STORAGE_PATH" "json" "${LUMIER_DEBUG:-0}")
 72 |         
 73 |         # Extract status, IP address, and VNC URL using the helper function
 74 |         vm_status=$(extract_json_field "status" "$VM_INFO")
 75 |         vm_ip=$(extract_json_field "ipAddress" "$VM_INFO")
 76 |         vnc_url=$(extract_json_field "vncUrl" "$VM_INFO")
 77 | 
 78 |         # Check if VM status is 'running' and we have IP and VNC URL
 79 |         if [ "$vm_status" = "running" ] && [ -n "$vm_ip" ] && [ -n "$vnc_url" ]; then
 80 |             break
 81 |         fi
 82 |         
 83 |         sleep 2
 84 |         attempt=$((attempt + 1))
 85 |     done
 86 |     
 87 |     if [ -z "$vm_ip" ] || [ -z "$vnc_url" ]; then
 88 |         echo "Timed out waiting for VM to start or VNC URL to become available."
 89 |         lume_stop "$VM_NAME" "$STORAGE_PATH" > /dev/null 2>&1
 90 |         # lume stop "$VM_NAME" --storage "$STORAGE_PATH" > /dev/null 2>&1
 91 |         exit 1
 92 |     fi
 93 | 
 94 |     # Parse VNC URL to extract password and port
 95 |     VNC_PASSWORD=$(echo "$vnc_url" | sed -n 's/.*:\(.*\)@.*/\1/p')
 96 |     VNC_PORT=$(echo "$vnc_url" | sed -n 's/.*:\([0-9]\+\)$/\1/p')
 97 |     
 98 |     # Wait for SSH to become available
 99 |     wait_for_ssh "$vm_ip" "$HOST_USER" "$HOST_PASSWORD" 5 20
100 | 
101 |     # Export VNC variables for entry.sh to use
102 |     export VNC_PORT
103 |     export VNC_PASSWORD
104 |     
105 |     # Execute on-logon.sh if present
106 |     on_logon_script="/run/lifecycle/on-logon.sh"
107 |     
108 |     # Only show detailed logs in debug mode
109 |     if [ "${LUMIER_DEBUG:-0}" == "1" ]; then
110 |         echo "Running on-logon.sh hook script on VM..."
111 |     fi
112 |     
113 |     # Check if script exists
114 |     if [ ! -f "$on_logon_script" ]; then
115 |         echo "Warning: on-logon.sh hook script not found at $on_logon_script"
116 |     else
117 |         # Execute the remote script
118 |         execute_remote_script "$vm_ip" "$HOST_USER" "$HOST_PASSWORD" "$on_logon_script" "$VNC_PASSWORD" "$HOST_SHARED_PATH"
119 |     fi
120 | }
121 | 
122 | # Get VM information using curl
123 | lume_get() {
124 |     local vm_name="$1"
125 |     local storage="$2"
126 |     local format="${3:-json}"
127 |     local debug="${4:-false}"
128 |     
129 |     local api_host="${LUME_API_HOST:-host.docker.internal}"
130 |     local api_port="${LUME_API_PORT:-7777}"
131 |     
132 |     # URL encode the storage path for the query parameter
133 |     # Replace special characters with their URL encoded equivalents
134 |     local encoded_storage=$(echo "$storage" | sed 's/\//%2F/g' | sed 's/ /%20/g' | sed 's/:/%3A/g')
135 |     
136 |     # Construct API URL with encoded storage parameter
137 |     local api_url="http://${api_host}:${api_port}/lume/vms/${vm_name}?storage=${encoded_storage}"
138 |     
139 |     # Construct the full curl command
140 |     local curl_cmd="curl --connect-timeout 6000 --max-time 5000 -s '$api_url'"
141 |     
142 |     # Print debug info
143 |     if [[ "$debug" == "true" || "$LUMIER_DEBUG" == "1" ]]; then
144 |         echo "[DEBUG] Calling API: $api_url"
145 |         echo "[DEBUG] Full curl command: $curl_cmd"
146 |     fi
147 |     
148 |     # Log curl commands only when in debug mode
149 |     if [[ "$debug" == "true" || "$LUMIER_DEBUG" == "1" ]]; then
150 |         echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] DEBUG: Executing curl request: $api_url" >&2
151 |     fi
152 |     
153 |     # Make the API call
154 |     local response=$(curl --connect-timeout 6000 \
155 |       --max-time 5000 \
156 |       -s \
157 |       "$api_url")
158 |     
159 |     # Print the response if debugging is enabled
160 |     if [[ "$debug" == "true" || "${LUMIER_DEBUG:-0}" == "1" ]]; then
161 |         echo "[DEBUG] API Response:"
162 |         echo "$response" | jq '.' 2>/dev/null || echo "$response"
163 |     fi
164 |     
165 |     # Output the response so callers can capture it
166 |     echo "$response"
167 | }
168 | 
169 | # Set VM properties using curl
170 | lume_set() {
171 |     local vm_name="$1"
172 |     local storage="$2"
173 |     local cpu="${3:-4}"
174 |     local memory="${4:-8192}"
175 |     local display="${5:-1024x768}"
176 |     
177 |     local api_host="${LUME_API_HOST:-host.docker.internal}"
178 |     local api_port="${LUME_API_PORT:-7777}"
179 |     
180 |     # Handle memory format for the API
181 |     if [[ "$memory" == *"GB"* ]]; then
182 |         # Already in GB format, keep as is
183 |         :  # No-op
184 |     elif [[ "$memory" =~ ^[0-9]+$ ]]; then
185 |         # If memory is a simple number, assume MB and convert to GB
186 |         memory="$(awk "BEGIN { printf \"%.1f\", $memory/1024 }")GB"
187 |     fi
188 |     
189 |     # Only show memory formatting debug in debug mode
190 |     if [[ "$LUMIER_DEBUG" == "1" ]]; then
191 |         echo "[DEBUG] Formatted memory value: $memory"
192 |     fi
193 |     
194 |     # Store response to conditionally show based on debug mode
195 |     local response=$(curl --connect-timeout 6000 \
196 |       --max-time 5000 \
197 |       -s \
198 |       -X PATCH \
199 |       -H "Content-Type: application/json" \
200 |       -d "{
201 |         \"cpu\": $cpu,
202 |         \"memory\": \"$memory\",
203 |         \"display\": \"$display\",
204 |         \"storage\": \"$storage\"
205 |       }" \
206 |       "http://${api_host}:${api_port}/lume/vms/${vm_name}")
207 |       
208 |     # Only show response in debug mode
209 |     if [[ "${LUMIER_DEBUG:-0}" == "1" ]]; then
210 |         echo "$response"
211 |     fi
212 | }
213 | 
214 | stop_vm() {
215 |     local in_cleanup=${1:-false} # Optional first argument to indicate if called from cleanup trap
216 |     echo "Stopping VM '$VM_NAME'..."
217 |     STORAGE_PATH="$HOST_STORAGE_PATH"
218 |     
219 |     # Only show storage path in debug mode
220 |     if [[ "$LUMIER_DEBUG" == "1" ]]; then
221 |         echo "STORAGE_PATH: $STORAGE_PATH"
222 |     fi
223 |     
224 |     VM_INFO=$(lume_get "$VM_NAME" "$STORAGE_PATH" "json" "${LUMIER_DEBUG:-0}")
225 |     vm_status=$(extract_json_field "status" "$VM_INFO")
226 | 
227 |     if [ "$vm_status" == "running" ]; then
228 |         lume_stop "$VM_NAME" "$STORAGE_PATH"
229 |     elif [ "$vm_status" == "stopped" ]; then
230 |         echo "VM '$VM_NAME' is already stopped."
231 |     elif [ "$in_cleanup" = true ]; then
232 |         # If we are in the cleanup trap and status is unknown or VM not found, 
233 |         # still attempt a stop just in case.
234 |         echo "VM status is unknown ('$vm_status') or VM not found during cleanup. Attempting stop anyway."
235 |         lume_stop "$VM_NAME" "$STORAGE_PATH"
236 |         sleep 5
237 |         echo "VM '$VM_NAME' stop command issued as a precaution."
238 |     else
239 |         echo "VM status is unknown ('$vm_status') or VM not found. Not attempting stop."
240 |     fi
241 | }
242 | 
243 | is_vm_running() {
244 |     # Check VM status using the API function
245 |     local vm_info
246 |     vm_info=$(lume_get "$VM_NAME" "$HOST_STORAGE_PATH")
247 |     if [[ $vm_info == *'"status" : "running"'* ]]; then
248 |         return 0 # Running
249 |     else
250 |         return 1 # Not running or doesn't exist
251 |     fi
252 |     # lume ls | grep -q "$VM_NAME" # Old CLI check
253 | }
254 | 
255 | # Stop VM with storage location specified using curl
256 | lume_stop() {
257 |     local vm_name="$1"
258 |     local storage="$2"
259 |     
260 |     local api_host="${LUME_API_HOST:-host.docker.internal}"
261 |     local api_port="${LUME_API_PORT:-7777}"
262 |     
263 |     # Only log in debug mode
264 |     if [[ "$LUMIER_DEBUG" == "1" ]]; then
265 |         echo "Stopping VM $vm_name..."
266 |     fi
267 |     
268 |     # Execute command and capture response
269 |     local response
270 |     if [[ "${LUMIER_DEBUG:-0}" == "1" ]]; then
271 |         # Show output in debug mode
272 |         response=$(curl --connect-timeout 6000 \
273 |           --max-time 5000 \
274 |           -X POST \
275 |           -H "Content-Type: application/json" \
276 |           -d '{"storage":"'$storage'"}' \
277 |           "http://${api_host}:${api_port}/lume/vms/${vm_name}/stop")
278 |         echo "$response"
279 |     else
280 |         # Run silently in normal mode
281 |         response=$(curl --connect-timeout 6000 \
282 |           --max-time 5000 \
283 |           -s \
284 |           -X POST \
285 |           -H "Content-Type: application/json" \
286 |           -d '{"storage":"'$storage'"}' \
287 |           "http://${api_host}:${api_port}/lume/vms/${vm_name}/stop")
288 |     fi
289 | }
290 | 
291 | # Pull a VM image using curl
292 | lume_pull() {
293 |     local image="$1"      # Image name with tag
294 |     local vm_name="$2"    # Name for the new VM
295 |     local storage="$3"    # Storage location
296 |     local registry="${4:-ghcr.io}"  # Registry, default is ghcr.io
297 |     local organization="${5:-trycua}" # Organization, default is trycua
298 |     
299 |     local api_host="${LUME_API_HOST:-host.docker.internal}"
300 |     local api_port="${LUME_API_PORT:-7777}"
301 |     
302 |     # Mark that pull is in progress for interrupt handling
303 |     export PULL_IN_PROGRESS=1
304 |     
305 |     # Only log full details in debug mode
306 |     if [[ "$LUMIER_DEBUG" == "1" ]]; then
307 |         echo "Pulling image $image from $registry/$organization..."
308 |     else
309 |         echo "Pulling image $image..."
310 |     fi
311 |     
312 |     # Inform users how to check pull progress
313 |     echo "You can check the pull progress using: lume logs -f"
314 |     
315 |     # Pull image via API and capture response
316 |     local response
317 |     if [[ "${LUMIER_DEBUG:-0}" == "1" ]]; then
318 |         # Show full response in debug mode - no timeout limits
319 |         response=$(curl \
320 |           -X POST \
321 |           -H "Content-Type: application/json" \
322 |           -d "{
323 |             \"image\": \"$image\",
324 |             \"name\": \"$vm_name\",
325 |             \"registry\": \"$registry\",
326 |             \"organization\": \"$organization\",
327 |             \"storage\": \"$storage\"
328 |           }" \
329 |           "http://${api_host}:${api_port}/lume/pull")
330 |         echo "$response"
331 |     else
332 |         # Run silently in normal mode - no timeout limits
333 |         response=$(curl \
334 |           -s \
335 |           -X POST \
336 |           -H "Content-Type: application/json" \
337 |           -d "{
338 |             \"image\": \"$image\",
339 |             \"name\": \"$vm_name\",
340 |             \"registry\": \"$registry\",
341 |             \"organization\": \"$organization\",
342 |             \"storage\": \"$storage\"
343 |           }" \
344 |           "http://${api_host}:${api_port}/lume/pull")
345 |     fi
346 |     
347 |     # Unset pull in progress flag
348 |     export PULL_IN_PROGRESS=0
349 | }
350 | 
351 | 
352 | # Run VM with VNC client started and shared directory using curl
353 | lume_run() {
354 |     # Parse args
355 |     local shared_dir=""
356 |     local storage=""
357 |     local vm_name="lume_vm"
358 |     local no_display=true
359 |     while [[ $# -gt 0 ]]; do
360 |         case $1 in
361 |             --shared-dir=*)
362 |                 shared_dir="${1#*=}"
363 |                 shift
364 |                 ;;
365 |             --storage)
366 |                 storage="$2"
367 |                 shift 2
368 |                 ;;
369 |             --no-display)
370 |                 no_display=true
371 |                 shift
372 |                 ;;
373 |             *)
374 |                 # Assume last arg is VM name if not an option
375 |                 vm_name="$1"
376 |                 shift
377 |                 ;;
378 |         esac
379 |     done
380 |     
381 |     local api_host="${LUME_API_HOST:-host.docker.internal}"
382 |     local api_port="${LUME_API_PORT:-7777}"
383 | 
384 |     # Only log in debug mode
385 |     if [[ "$LUMIER_DEBUG" == "1" ]]; then
386 |         echo "Running VM $vm_name..."
387 |     fi
388 |     
389 |     # Build the JSON body dynamically based on what's provided
390 |     local json_body="{\"noDisplay\": true"
391 |     
392 |     # Only include shared directories if shared_dir is provided
393 |     if [[ -n "$shared_dir" ]]; then
394 |         json_body+=", \"sharedDirectories\": [{\"hostPath\": \"$shared_dir\", \"readOnly\": false}]"
395 |     fi
396 |     
397 |     # Only include storage if it's provided
398 |     if [[ -n "$storage" ]]; then
399 |         json_body+=", \"storage\": \"$storage\""
400 |     fi
401 |     
402 |     # Add recovery mode (always false)
403 |     json_body+=", \"recoveryMode\": false}"
404 | 
405 |     # Execute the command and store the response
406 |     local response
407 |     if [[ "${LUMIER_DEBUG:-0}" == "1" ]]; then
408 |         # Show response in debug mode
409 |         response=$(curl --connect-timeout 6000 \
410 |           --max-time 5000 \
411 |           -X POST \
412 |           -H 'Content-Type: application/json' \
413 |           -d "$json_body" \
414 |           http://${api_host}:${api_port}/lume/vms/$vm_name/run)
415 |         echo "$response"
416 |     else
417 |         # Run silently in normal mode
418 |         response=$(curl --connect-timeout 6000 \
419 |           --max-time 5000 \
420 |           -s \
421 |           -X POST \
422 |           -H 'Content-Type: application/json' \
423 |           -d "$json_body" \
424 |           http://${api_host}:${api_port}/lume/vms/$vm_name/run)
425 |     fi
426 | }
427 | 
428 | # Delete a VM using curl
429 | lume_delete() {
430 |     local vm_name="$1"
431 |     local storage="$2"
432 |     
433 |     local api_host="${LUME_API_HOST:-host.docker.internal}"
434 |     local api_port="${LUME_API_PORT:-7777}"
435 |     
436 |     # URL encode the storage path for the query parameter
437 |     # Replace special characters with their URL encoded equivalents
438 |     local encoded_storage=$(echo "$storage" | sed 's/\//%2F/g' | sed 's/ /%20/g' | sed 's/:/%3A/g')
439 |     
440 |     # Construct API URL with encoded storage parameter
441 |     local api_url="http://${api_host}:${api_port}/lume/vms/${vm_name}?storage=${encoded_storage}"
442 |     
443 |     # Only log in debug mode
444 |     if [[ "$LUMIER_DEBUG" == "1" ]]; then
445 |         echo "Deleting VM $vm_name from storage $storage..."
446 |     fi
447 |     
448 |     # Execute command and capture response
449 |     local response
450 |     if [[ "${LUMIER_DEBUG:-0}" == "1" ]]; then
451 |         # Show output in debug mode
452 |         response=$(curl --connect-timeout 6000 \
453 |           --max-time 5000 \
454 |           -X DELETE \
455 |           "$api_url")
456 |         echo "$response"
457 |     else
458 |         # Run silently in normal mode
459 |         response=$(curl --connect-timeout 6000 \
460 |           --max-time 5000 \
461 |           -s \
462 |           -X DELETE \
463 |           "$api_url")
464 |     fi
465 | }
```

--------------------------------------------------------------------------------
/libs/python/agent/benchmarks/utils.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python3
  2 | """
  3 | Shared utilities for ScreenSpot-Pro benchmarking and interactive testing.
  4 | """
  5 | 
  6 | import dotenv
  7 | dotenv.load_dotenv()
  8 | 
  9 | import asyncio
 10 | import base64
 11 | import os
 12 | import sys
 13 | import subprocess as sp
 14 | import statistics
 15 | from datetime import datetime
 16 | from io import BytesIO
 17 | from typing import List, Union, Tuple, Optional
 18 | 
 19 | from PIL import Image, ImageDraw
 20 | from tqdm import tqdm
 21 | import gc
 22 | import torch
 23 | 
 24 | # Add parent directory to path for imports
 25 | sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
 26 | from agent.agent import ComputerAgent
 27 | from models.base import ModelProtocol
 28 | 
 29 | def get_gpu_memory() -> List[int]:
 30 |     """
 31 |     Get GPU memory usage using nvidia-smi.
 32 |     
 33 |     Returns:
 34 |         List of free memory values in MB for each GPU
 35 |     """
 36 |     try:
 37 |         command = "nvidia-smi --query-gpu=memory.free --format=csv"
 38 |         memory_free_info = sp.check_output(command.split()).decode('ascii').split('\n')[:-1][1:]
 39 |         memory_free_values = [int(x.split()[0]) for i, x in enumerate(memory_free_info)]
 40 |         return memory_free_values
 41 |     except (sp.CalledProcessError, FileNotFoundError, IndexError):
 42 |         # Fallback to torch if nvidia-smi is not available
 43 |         if torch.cuda.is_available():
 44 |             device = torch.cuda.current_device()
 45 |             total = torch.cuda.get_device_properties(device).total_memory / 1024 / 1024
 46 |             reserved = torch.cuda.memory_reserved(device) / 1024 / 1024
 47 |             return [int(total - reserved)]
 48 |         return [0]
 49 | 
 50 | 
 51 | def get_vram_usage() -> dict:
 52 |     """
 53 |     Get current VRAM usage statistics.
 54 |     
 55 |     Returns:
 56 |         Dictionary with VRAM usage info (in MB)
 57 |     """
 58 |     if torch.cuda.is_available():
 59 |         device = torch.cuda.current_device()
 60 |         allocated = torch.cuda.memory_allocated(device) / 1024 / 1024  # Convert to MB
 61 |         reserved = torch.cuda.memory_reserved(device) / 1024 / 1024   # Convert to MB
 62 |         total = torch.cuda.get_device_properties(device).total_memory / 1024 / 1024
 63 |         return {
 64 |             'allocated_mb': allocated,
 65 |             'reserved_mb': reserved,
 66 |             'total_mb': total,
 67 |             'free_mb': total - reserved
 68 |         }
 69 |     else:
 70 |         return {
 71 |             'allocated_mb': 0.0,
 72 |             'reserved_mb': 0.0,
 73 |             'total_mb': 0.0,
 74 |             'free_mb': 0.0
 75 |         }
 76 | 
 77 | 
 78 | def get_available_models() -> List[Union[str, ModelProtocol]]:
 79 |     """
 80 |     Get list of available models for testing.
 81 |     
 82 |     Returns:
 83 |         List of model strings and model classes
 84 |     """
 85 |     local_provider = "huggingface-local/"  # Options: huggingface-local/ or mlx/
 86 |     
 87 |     # from models.gta1 import GTA1Model
 88 | 
 89 |     models = [
 90 |         # === ComputerAgent model strings ===
 91 |         "openai/computer-use-preview",
 92 |         "anthropic/claude-opus-4-20250514",
 93 |         # f"{local_provider}HelloKKMe/GTA1-7B",
 94 |         # f"{local_provider}HelloKKMe/GTA1-32B",
 95 |         "openai/computer-use-preview+openai/gpt-4o-mini",
 96 |         "anthropic/claude-opus-4-20250514+openai/gpt-4o-mini",
 97 |         
 98 |         # === Reference model classes ===
 99 |         # GTA1Model("HelloKKMe/GTA1-7B"),
100 |         # GTA1Model("HelloKKMe/GTA1-32B"), 
101 |     ]
102 |     
103 |     return models
104 | 
105 | 
106 | def is_click_in_bbox(click_coords: Optional[Tuple[int, int]], bbox: List[int]) -> bool:
107 |     """
108 |     Check if click coordinates are within the bounding box.
109 |     
110 |     Args:
111 |         click_coords: (x, y) coordinates or None
112 |         bbox: [x1, y1, x2, y2] bounding box
113 |         
114 |     Returns:
115 |         True if click is within bbox, False otherwise
116 |     """
117 |     if click_coords is None:
118 |         return False
119 |     
120 |     x, y = click_coords
121 |     x1, y1, x2, y2 = bbox
122 |     
123 |     return x1 <= x <= x2 and y1 <= y <= y2
124 | 
125 | 
126 | def image_to_base64(image: Image.Image) -> str:
127 |     """
128 |     Convert PIL Image to base64 string.
129 |     
130 |     Args:
131 |         image: PIL Image
132 |         
133 |     Returns:
134 |         Base64 encoded image string
135 |     """
136 |     buffered = BytesIO()
137 |     image.save(buffered, format="PNG")
138 |     return base64.b64encode(buffered.getvalue()).decode()
139 | 
140 | 
141 | class ModelWrapper:
142 |     """
143 |     Wrapper to provide unified interface for both ComputerAgent and custom models.
144 |     """
145 |     
146 |     def __init__(self, model: Union[str, ModelProtocol]):
147 |         self.model = model
148 |         self.is_computer_agent = isinstance(model, str)
149 |         self.agent: Optional[ComputerAgent] = None
150 |         self.vram_usage_history: List[float] = []  # Track VRAM usage over time
151 |         
152 |         if self.is_computer_agent:
153 |             self.model_name = str(model)
154 |         else:
155 |             self.model_name = f"{model.__class__.__name__}('{getattr(model, 'model_name', 'unknown')}')"
156 |     
157 |     async def load_model(self) -> None:
158 |         """Load the model."""
159 |         if self.is_computer_agent:
160 |             self.agent = ComputerAgent(model=str(self.model))
161 |         else:
162 |             await self.model.load_model() # type: ignore
163 |         
164 |         # Record initial VRAM usage after loading
165 |         vram_info = get_vram_usage()
166 |         self.vram_usage_history.append(vram_info['allocated_mb'])
167 |     
168 |     async def unload_model(self) -> None:
169 |         """Unload the model."""
170 |         if not self.is_computer_agent:
171 |             await self.model.unload_model() # type: ignore
172 |         else:
173 |             del self.agent
174 |             self.agent = None
175 |             gc.collect()
176 |             if torch.cuda.is_available():
177 |                 torch.cuda.empty_cache()
178 |         
179 |         # Record VRAM usage after unloading
180 |         vram_info = get_vram_usage()
181 |         self.vram_usage_history.append(vram_info['allocated_mb'])
182 |     
183 |     def get_vram_stats(self) -> dict:
184 |         """Get VRAM usage statistics for this model."""
185 |         if not self.vram_usage_history:
186 |             return {'max_mb': 0.0, 'avg_mb': 0.0}
187 |         
188 |         return {
189 |             'max_mb': max(self.vram_usage_history),
190 |             'avg_mb': sum(self.vram_usage_history) / len(self.vram_usage_history)
191 |         }
192 | 
193 |     
194 |     async def predict_click(self, image: Image.Image, instruction: str) -> Optional[Tuple[int, int]]:
195 |         """Predict click coordinates."""
196 |         # Record VRAM usage before prediction
197 |         vram_info = get_vram_usage()
198 |         self.vram_usage_history.append(vram_info['allocated_mb'])
199 |         
200 |         if self.is_computer_agent:
201 |             if self.agent is None:
202 |                 await self.load_model()
203 |             
204 |             if self.agent is not None:
205 |                 image_b64 = image_to_base64(image)
206 |                 result = await self.agent.predict_click(instruction=instruction, image_b64=image_b64)
207 |                 
208 |                 # Record VRAM usage after prediction
209 |                 vram_info = get_vram_usage()
210 |                 self.vram_usage_history.append(vram_info['allocated_mb'])
211 |                 
212 |                 return result
213 |             return None
214 |         else:
215 |             result = await self.model.predict_click(image, instruction) # type: ignore
216 |             
217 |             # Record VRAM usage after prediction
218 |             vram_info = get_vram_usage()
219 |             self.vram_usage_history.append(vram_info['allocated_mb'])
220 |             
221 |             return result
222 | 
223 | 
224 | def save_results_to_markdown(all_results: List[dict],output_file: str = "screenspot_pro_results.md", title: str = "ScreenSpot-Pro Benchmark Results") -> None:
225 |     """
226 |     Save evaluation results to a markdown table.
227 |     
228 |     Args:
229 |         all_results: List of evaluation results for each model
230 |         output_file: Output markdown file path
231 |     """
232 |     with open(output_file, 'w', encoding='utf-8') as f:
233 |         f.write(f"# {title}\n\n")
234 |         f.write(f"**Evaluation Date:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
235 |         
236 |         # Summary table
237 |         f.write("## Summary\n\n")
238 |         f.write("| Model | Total Samples | Correct | Errors | Accuracy | Error Rate | Avg Time (s) | Median Time (s) | Time Range (s) | VRAM Max (GB) | VRAM Avg (GB) |\n")
239 |         f.write("|-------|---------------|---------|--------|----------|------------|--------------|-----------------|----------------|---------------|---------------|\n")
240 |         
241 |         for result in all_results:
242 |             model_name = result['model_name']
243 |             total = result['total_samples']
244 |             correct = result['correct_predictions']
245 |             errors = result['failed_predictions']
246 |             accuracy = result['accuracy'] * 100
247 |             error_rate = result['failure_rate'] * 100
248 |             avg_time = result.get('avg_prediction_time', 0.0)
249 |             median_time = result.get('median_prediction_time', 0.0)
250 |             min_time = result.get('min_prediction_time', 0.0)
251 |             max_time = result.get('max_prediction_time', 0.0)
252 |             time_range = f"{min_time:.2f} - {max_time:.2f}"
253 |             vram_max = result.get('vram_max_mb', 0.0) / 1024
254 |             vram_avg = result.get('vram_avg_mb', 0.0) / 1024
255 |             
256 |             f.write(f"| {model_name} | {total} | {correct} | {errors} | {accuracy:.2f}% | {error_rate:.2f}% | {avg_time:.2f} | {median_time:.2f} | {time_range} | {vram_max:.1f} | {vram_avg:.1f} |\n")
257 |         
258 |         # Detailed results for each model
259 |         for result in all_results:
260 |             f.write(f"\n## {result['model_name']} - Detailed Results\n\n")
261 |             f.write("| Sample Index | Instruction | BBox | Predicted | Correct | Error | Time (s) |\n")
262 |             f.write("|-----------|-------------|------|-----------|---------|-------|----------|\n")
263 |             
264 |             for sample_result in result['results'][:10]:  # Show first 10 samples
265 |                 sample_idx = sample_result['sample_idx']
266 |                 instruction = sample_result['instruction'][:50] + "..." if len(sample_result['instruction']) > 50 else sample_result['instruction']
267 |                 bbox = str(sample_result['bbox'])
268 |                 predicted = str(sample_result['predicted_coords']) if sample_result['predicted_coords'] else "None"
269 |                 correct = "PASS" if sample_result['is_correct'] else "FAIL"
270 |                 error = "YES" if sample_result['failed'] else "NO"
271 |                 pred_time = sample_result.get('prediction_time', 0.0)
272 |                 
273 |                 f.write(f"| {sample_idx} | {instruction} | {bbox} | {predicted} | {correct} | {error} | {pred_time:.2f} |\n")
274 |             
275 |             if len(result['results']) > 10:
276 |                 f.write(f"\n*Showing first 10 of {len(result['results'])} samples*\n")
277 |     
278 |     print(f"\nResults saved to: {output_file}")
279 | 
280 | 
281 | def save_visualizations(all_results: List[dict], samples, output_dir: str = "output") -> None:
282 |     """
283 |     Save visualizations of predicted coordinates vs bboxes to an output folder.
284 |     
285 |     Args:
286 |         all_results: List of evaluation results for each model
287 |         samples: List of sample dicts with image, bbox, instruction keys
288 |         output_dir: Output directory path
289 |     """
290 |     os.makedirs(output_dir, exist_ok=True)
291 |     
292 |     for result in all_results:
293 |         model_name = result['model_name'].replace('/', '_').replace('\\', '_')
294 |         model_dir = os.path.join(output_dir, model_name)
295 |         os.makedirs(model_dir, exist_ok=True)
296 |         
297 |         print(f"Saving visualizations for {result['model_name']}...")
298 |         
299 |         # Save first 10 samples for visualization
300 |         for i, sample_result in enumerate(tqdm(result['results'][:10], desc=f"Saving {model_name} visualizations")):
301 |             # Get sample data using index
302 |             sample_idx = sample_result['sample_idx']
303 |             
304 |             if sample_idx < len(samples):
305 |                 sample = samples[sample_idx]
306 |                 image = sample['image'].copy()  # Make a copy to avoid modifying original
307 |             else:
308 |                 print(f"Warning: Could not find sample at index {sample_idx}")
309 |                 continue
310 |             
311 |             bbox = sample_result['bbox']
312 |             predicted_coords = sample_result['predicted_coords']
313 |             is_correct = sample_result['is_correct']
314 |             
315 |             # Draw on image
316 |             draw = ImageDraw.Draw(image)
317 |             
318 |             # Draw bounding box (ground truth) in green
319 |             x1, y1, x2, y2 = bbox
320 |             draw.rectangle([x1, y1, x2, y2], outline="green", width=3)
321 |             draw.text((x1, y1-20), "Ground Truth", fill="green")
322 |             
323 |             # Draw predicted click in red or blue
324 |             if predicted_coords is not None:
325 |                 px, py = predicted_coords
326 |                 color = "blue" if is_correct else "red"
327 |                 # Draw crosshair
328 |                 crosshair_size = 15
329 |                 draw.line([(px-crosshair_size, py), (px+crosshair_size, py)], fill=color, width=3)
330 |                 draw.line([(px, py-crosshair_size), (px, py+crosshair_size)], fill=color, width=3)
331 |                 draw.text((px+10, py-20), f"Predicted ({px},{py})", fill=color)
332 |             
333 |             # Add status text
334 |             status = "CORRECT" if is_correct else "INCORRECT"
335 |             status_color = "blue" if is_correct else "red"
336 |             draw.text((10, 10), f"Status: {status}", fill=status_color)
337 |             draw.text((10, 30), f"Instruction: {sample_result['instruction'][:50]}...", fill="black")
338 |             
339 |             # Save image
340 |             filename = f"sample_{i+1:02d}_idx{sample_idx}_{status.lower()}.png"
341 |             filepath = os.path.join(model_dir, filename)
342 |             image.save(filepath)
343 |         
344 |         print(f"Visualizations saved to: {model_dir}")
345 | 
346 | 
347 | def save_prediction_visualization(image: Image.Image, instruction: str, predictions: List[dict], 
348 |                                 output_file: str = "interactive_prediction.png") -> None:
349 |     """
350 |     Save visualization of multiple model predictions on a single image.
351 |     
352 |     Args:
353 |         image: PIL Image to visualize
354 |         instruction: Instruction text
355 |         predictions: List of prediction dicts with keys: model_name, coords, error
356 |         output_file: Output file path
357 |     """
358 |     # Create a copy of the image
359 |     vis_image = image.copy()
360 |     draw = ImageDraw.Draw(vis_image)
361 |     
362 |     # Colors for different models
363 |     colors = ["red", "blue", "orange", "purple", "brown", "pink", "gray", "olive"]
364 |     
365 |     # Draw predictions
366 |     for i, pred in enumerate(predictions):
367 |         color = colors[i % len(colors)]
368 |         model_name = pred['model_name']
369 |         coords = pred.get('coords')
370 |         error = pred.get('error')
371 |         
372 |         if coords is not None:
373 |             px, py = coords
374 |             # Draw crosshair
375 |             crosshair_size = 20
376 |             draw.line([(px-crosshair_size, py), (px+crosshair_size, py)], fill=color, width=4)
377 |             draw.line([(px, py-crosshair_size), (px, py+crosshair_size)], fill=color, width=4)
378 |             # Draw model name
379 |             draw.text((px+15, py+15), f"{model_name}: ({px},{py})", fill=color)
380 |         else:
381 |             # Draw error text
382 |             draw.text((10, 50 + i*20), f"{model_name}: ERROR - {error}", fill=color)
383 |     
384 |     # Add instruction at the top
385 |     draw.text((10, 10), f"Instruction: {instruction}", fill="black")
386 |     
387 |     # Save image
388 |     vis_image.save(output_file)
389 |     print(f"Prediction visualization saved to: {output_file}")
390 | 
391 | 
392 | def take_screenshot() -> Image.Image:
393 |     """
394 |     Take a screenshot of the current screen.
395 |     
396 |     Returns:
397 |         PIL Image of the screenshot
398 |     """
399 |     try:
400 |         import pyautogui
401 |         screenshot = pyautogui.screenshot()
402 |         return screenshot
403 |     except ImportError:
404 |         print("pyautogui not installed. Please install it with: pip install pyautogui")
405 |         raise
406 |     except Exception as e:
407 |         print(f"Error taking screenshot: {e}")
408 |         raise
409 | 
410 | 
```

--------------------------------------------------------------------------------
/libs/python/agent/agent/callbacks/trajectory_saver.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Trajectory saving callback handler for ComputerAgent.
  3 | """
  4 | 
  5 | import os
  6 | import json
  7 | import uuid
  8 | from datetime import datetime
  9 | import base64
 10 | from pathlib import Path
 11 | from typing import List, Dict, Any, Optional, Union, override
 12 | from PIL import Image, ImageDraw
 13 | import io
 14 | from copy import deepcopy
 15 | 
 16 | from .base import AsyncCallbackHandler
 17 | 
 18 | def sanitize_image_urls(data: Any) -> Any:
 19 |     """
 20 |     Recursively search for 'image_url' keys and set their values to '[omitted]'.
 21 |     
 22 |     Args:
 23 |         data: Any data structure (dict, list, or primitive type)
 24 |         
 25 |     Returns:
 26 |         A deep copy of the data with all 'image_url' values replaced with '[omitted]'
 27 |     """
 28 |     if isinstance(data, dict):
 29 |         # Create a copy of the dictionary
 30 |         sanitized = {}
 31 |         for key, value in data.items():
 32 |             if key == "image_url":
 33 |                 sanitized[key] = "[omitted]"
 34 |             else:
 35 |                 # Recursively sanitize the value
 36 |                 sanitized[key] = sanitize_image_urls(value)
 37 |         return sanitized
 38 |     
 39 |     elif isinstance(data, list):
 40 |         # Recursively sanitize each item in the list
 41 |         return [sanitize_image_urls(item) for item in data]
 42 |     
 43 |     else:
 44 |         # For primitive types (str, int, bool, None, etc.), return as-is
 45 |         return data
 46 | 
 47 | 
 48 | def extract_computer_call_outputs(items: List[Dict[str, Any]], screenshot_dir: Optional[Path]) -> List[Dict[str, Any]]:
 49 |     """
 50 |     Save any base64-encoded screenshots from computer_call_output entries to files and
 51 |     replace their image_url with the saved file path when a call_id is present.
 52 | 
 53 |     Only operates if screenshot_dir is provided and exists; otherwise returns items unchanged.
 54 | 
 55 |     Args:
 56 |         items: List of message/result dicts potentially containing computer_call_output entries
 57 |         screenshot_dir: Directory to write screenshots into
 58 | 
 59 |     Returns:
 60 |         A new list with updated image_url fields when applicable.
 61 |     """
 62 |     if not items:
 63 |         return items
 64 |     if not screenshot_dir or not screenshot_dir.exists():
 65 |         return items
 66 | 
 67 |     updated: List[Dict[str, Any]] = []
 68 |     for item in items:
 69 |         # work on a shallow copy; deep copy nested 'output' if we modify it
 70 |         msg = dict(item)
 71 |         try:
 72 |             if msg.get("type") == "computer_call_output":
 73 |                 call_id = msg.get("call_id")
 74 |                 output = msg.get("output", {})
 75 |                 image_url = output.get("image_url")
 76 |                 if call_id and isinstance(image_url, str) and image_url.startswith("data:"):
 77 |                     # derive extension from MIME type e.g. data:image/png;base64,
 78 |                     try:
 79 |                         ext = image_url.split(";", 1)[0].split("/")[-1]
 80 |                         if not ext:
 81 |                             ext = "png"
 82 |                     except Exception:
 83 |                         ext = "png"
 84 |                     out_path = screenshot_dir / f"{call_id}.{ext}"
 85 |                     # write file if it doesn't exist
 86 |                     if not out_path.exists():
 87 |                         try:
 88 |                             b64_payload = image_url.split(",", 1)[1]
 89 |                             img_bytes = base64.b64decode(b64_payload)
 90 |                             out_path.parent.mkdir(parents=True, exist_ok=True)
 91 |                             with open(out_path, "wb") as f:
 92 |                                 f.write(img_bytes)
 93 |                         except Exception:
 94 |                             # if anything fails, skip modifying this message
 95 |                             pass
 96 |                     # update image_url to file path
 97 |                     new_output = dict(output)
 98 |                     new_output["image_url"] = str(out_path)
 99 |                     msg["output"] = new_output
100 |         except Exception:
101 |             # do not block on malformed entries; keep original
102 |             pass
103 |         updated.append(msg)
104 |     return updated
105 | 
106 | class TrajectorySaverCallback(AsyncCallbackHandler):
107 |     """
108 |     Callback handler that saves agent trajectories to disk.
109 |     
110 |     Saves each run as a separate trajectory with unique ID, and each turn
111 |     within the trajectory gets its own folder with screenshots and responses.
112 |     """
113 |     
114 |     def __init__(self, trajectory_dir: str, reset_on_run: bool = True, screenshot_dir: Optional[str] = None):
115 |         """
116 |         Initialize trajectory saver.
117 |         
118 |         Args:
119 |             trajectory_dir: Base directory to save trajectories
120 |             reset_on_run: If True, reset trajectory_id/turn/artifact on each run.
121 |                          If False, continue using existing trajectory_id if set.
122 |         """
123 |         self.trajectory_dir = Path(trajectory_dir)
124 |         self.trajectory_id: Optional[str] = None
125 |         self.current_turn: int = 0
126 |         self.current_artifact: int = 0
127 |         self.model: Optional[str] = None
128 |         self.total_usage: Dict[str, Any] = {}
129 |         self.reset_on_run = reset_on_run
130 |         # Optional directory to store extracted screenshots from metadata/new_items
131 |         self.screenshot_dir: Optional[Path] = Path(screenshot_dir) if screenshot_dir else None
132 |         
133 |         # Ensure trajectory directory exists
134 |         self.trajectory_dir.mkdir(parents=True, exist_ok=True)
135 | 
136 |     def _get_turn_dir(self) -> Path:
137 |         """Get the directory for the current turn."""
138 |         if not self.trajectory_id:
139 |             raise ValueError("Trajectory not initialized - call _on_run_start first")
140 |         
141 |         # format: trajectory_id/turn_000
142 |         turn_dir = self.trajectory_dir / self.trajectory_id / f"turn_{self.current_turn:03d}"
143 |         turn_dir.mkdir(parents=True, exist_ok=True)
144 |         return turn_dir
145 | 
146 |     def _save_artifact(self, name: str, artifact: Union[str, bytes, Dict[str, Any]]) -> None:
147 |         """Save an artifact to the current turn directory."""
148 |         turn_dir = self._get_turn_dir()
149 |         if isinstance(artifact, bytes):
150 |             # format: turn_000/0000_name.png
151 |             artifact_filename = f"{self.current_artifact:04d}_{name}"
152 |             artifact_path = turn_dir / f"{artifact_filename}.png"
153 |             with open(artifact_path, "wb") as f:
154 |                 f.write(artifact)
155 |         else:
156 |             # format: turn_000/0000_name.json
157 |             artifact_filename = f"{self.current_artifact:04d}_{name}"
158 |             artifact_path = turn_dir / f"{artifact_filename}.json"
159 |             # add created_at
160 |             if isinstance(artifact, dict):
161 |                 artifact = artifact.copy()
162 |                 artifact["created_at"] = str(uuid.uuid1().time)
163 |             with open(artifact_path, "w") as f:
164 |                 json.dump(sanitize_image_urls(artifact), f, indent=2)
165 |         self.current_artifact += 1
166 | 
167 |     def _update_usage(self, usage: Dict[str, Any]) -> None:
168 |         """Update total usage statistics."""
169 |         def add_dicts(target: Dict[str, Any], source: Dict[str, Any]) -> None:
170 |             for key, value in source.items():
171 |                 if isinstance(value, dict):
172 |                     if key not in target:
173 |                         target[key] = {}
174 |                     add_dicts(target[key], value)
175 |                 else:
176 |                     if key not in target:
177 |                         target[key] = 0
178 |                     target[key] += value
179 |         add_dicts(self.total_usage, usage)
180 |     
181 |     @override
182 |     async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
183 |         """Initialize trajectory tracking for a new run."""
184 |         model = kwargs.get("model", "unknown")
185 |         
186 |         # Only reset trajectory state if reset_on_run is True or no trajectory exists
187 |         if self.reset_on_run or not self.trajectory_id:
188 |             model_name_short = model.split("+")[-1].split("/")[-1].lower()[:16]
189 |             if "+" in model:
190 |                 model_name_short = model.split("+")[0].lower()[:4] + "_" + model_name_short
191 |             # strip non-alphanumeric characters from model_name_short
192 |             model_name_short = ''.join(c for c in model_name_short if c.isalnum() or c == '_')
193 | 
194 |             # id format: yyyy-mm-dd_model_hhmmss_uuid[:4]
195 |             now = datetime.now()
196 |             self.trajectory_id = f"{now.strftime('%Y-%m-%d')}_{model_name_short}_{now.strftime('%H%M%S')}_{str(uuid.uuid4())[:4]}"
197 |             self.current_turn = 0
198 |             self.current_artifact = 0
199 |             self.model = model
200 |             self.total_usage = {}
201 |             
202 |             # Create trajectory directory
203 |             trajectory_path = self.trajectory_dir / self.trajectory_id
204 |             trajectory_path.mkdir(parents=True, exist_ok=True)
205 |             
206 |             # Save trajectory metadata (optionally extract screenshots to screenshot_dir)
207 |             kwargs_to_save = kwargs.copy()
208 |             try:
209 |                 if "messages" in kwargs_to_save:
210 |                     kwargs_to_save["messages"] = extract_computer_call_outputs(
211 |                         kwargs_to_save["messages"], self.screenshot_dir
212 |                     )
213 |             except Exception:
214 |                 # If extraction fails, fall back to original messages
215 |                 pass
216 |             metadata = {
217 |                 "trajectory_id": self.trajectory_id,
218 |                 "created_at": str(uuid.uuid1().time),
219 |                 "status": "running",
220 |                 "kwargs": kwargs_to_save,
221 |             }
222 |             
223 |             with open(trajectory_path / "metadata.json", "w") as f:
224 |                 json.dump(metadata, f, indent=2)
225 |         else:
226 |             # Continue with existing trajectory - just update model if needed
227 |             self.model = model
228 | 
229 |     @override
230 |     async def on_run_end(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> None:
231 |         """Finalize run tracking by updating metadata with completion status, usage, and new items."""
232 |         if not self.trajectory_id:
233 |             return
234 |         
235 |         # Update metadata with completion status, total usage, and new items
236 |         trajectory_path = self.trajectory_dir / self.trajectory_id
237 |         metadata_path = trajectory_path / "metadata.json"
238 |         
239 |         # Read existing metadata
240 |         if metadata_path.exists():
241 |             with open(metadata_path, "r") as f:
242 |                 metadata = json.load(f)
243 |         else:
244 |             metadata = {}
245 |         
246 |         # Update metadata with completion info
247 |         # Optionally extract screenshots from new_items before persisting
248 |         new_items_to_save = new_items
249 |         try:
250 |             new_items_to_save = extract_computer_call_outputs(new_items, self.screenshot_dir)
251 |         except Exception:
252 |             pass
253 | 
254 |         metadata.update({
255 |             "status": "completed",
256 |             "completed_at": str(uuid.uuid1().time),
257 |             "total_usage": self.total_usage,
258 |             "new_items": new_items_to_save,
259 |             "total_turns": self.current_turn
260 |         })
261 |         
262 |         # Save updated metadata
263 |         with open(metadata_path, "w") as f:
264 |             json.dump(metadata, f, indent=2)
265 |     
266 |     @override 
267 |     async def on_api_start(self, kwargs: Dict[str, Any]) -> None:
268 |         if not self.trajectory_id:
269 |             return
270 |         
271 |         self._save_artifact("api_start", { "kwargs": kwargs })
272 |     
273 |     @override
274 |     async def on_api_end(self, kwargs: Dict[str, Any], result: Any) -> None:
275 |         """Save API call result."""
276 |         if not self.trajectory_id:
277 |             return
278 |         
279 |         self._save_artifact("api_result", { "kwargs": kwargs, "result": result })
280 | 
281 |     @override
282 |     async def on_screenshot(self, screenshot: Union[str, bytes], name: str = "screenshot") -> None:
283 |         """Save a screenshot."""
284 |         if isinstance(screenshot, str):
285 |             screenshot = base64.b64decode(screenshot)
286 |         self._save_artifact(name, screenshot)
287 | 
288 |     @override
289 |     async def on_usage(self, usage: Dict[str, Any]) -> None:
290 |         """Called when usage information is received."""
291 |         self._update_usage(usage)
292 | 
293 |     @override
294 |     async def on_responses(self, kwargs: Dict[str, Any], responses: Dict[str, Any]) -> None:
295 |         """Save responses to the current turn directory and update usage statistics."""
296 |         if not self.trajectory_id:
297 |             return
298 |         
299 |         # Save responses
300 |         turn_dir = self._get_turn_dir()
301 |         response_data = {
302 |             "timestamp": str(uuid.uuid1().time),
303 |             "model": self.model,
304 |             "kwargs": kwargs,
305 |             "response": responses
306 |         }
307 |         
308 |         self._save_artifact("agent_response", response_data)
309 |         
310 |         # Increment turn counter
311 |         self.current_turn += 1
312 | 
313 |     def _draw_crosshair_on_image(self, image_bytes: bytes, x: int, y: int) -> bytes:
314 |         """
315 |         Draw a red dot and crosshair at the specified coordinates on the image.
316 |         
317 |         Args:
318 |             image_bytes: The original image as bytes
319 |             x: X coordinate for the crosshair
320 |             y: Y coordinate for the crosshair
321 |             
322 |         Returns:
323 |             Modified image as bytes with red dot and crosshair
324 |         """
325 |         # Open the image
326 |         image = Image.open(io.BytesIO(image_bytes))
327 |         draw = ImageDraw.Draw(image)
328 |         
329 |         # Draw crosshair lines (red, 2px thick)
330 |         crosshair_size = 20
331 |         line_width = 2
332 |         color = "red"
333 |         
334 |         # Horizontal line
335 |         draw.line([(x - crosshair_size, y), (x + crosshair_size, y)], fill=color, width=line_width)
336 |         # Vertical line
337 |         draw.line([(x, y - crosshair_size), (x, y + crosshair_size)], fill=color, width=line_width)
338 |         
339 |         # Draw center dot (filled circle)
340 |         dot_radius = 3
341 |         draw.ellipse([(x - dot_radius, y - dot_radius), (x + dot_radius, y + dot_radius)], fill=color)
342 |         
343 |         # Convert back to bytes
344 |         output = io.BytesIO()
345 |         image.save(output, format='PNG')
346 |         return output.getvalue()
347 | 
348 |     @override
349 |     async def on_computer_call_end(self, item: Dict[str, Any], result: List[Dict[str, Any]]) -> None:
350 |         """
351 |         Called when a computer call has completed.
352 |         Saves screenshots and computer call output.
353 |         """
354 |         if not self.trajectory_id:
355 |             return
356 |         
357 |         self._save_artifact("computer_call_result", { "item": item, "result": result })
358 |         
359 |         # Check if action has x/y coordinates and there's a screenshot in the result
360 |         action = item.get("action", {})
361 |         if "x" in action and "y" in action:
362 |             # Look for screenshot in the result
363 |             for result_item in result:
364 |                 if (result_item.get("type") == "computer_call_output" and 
365 |                     result_item.get("output", {}).get("type") == "input_image"):
366 |                     
367 |                     image_url = result_item["output"]["image_url"]
368 |                 
369 |                     # Extract base64 image data
370 |                     if image_url.startswith("data:image/"):
371 |                         # Format: data:image/png;base64,<base64_data>
372 |                         base64_data = image_url.split(",", 1)[1]
373 |                     else:
374 |                         # Assume it's just base64 data
375 |                         base64_data = image_url
376 |                     
377 |                     try:
378 |                         # Decode the image
379 |                         image_bytes = base64.b64decode(base64_data)
380 |                         
381 |                         # Draw crosshair at the action coordinates
382 |                         annotated_image = self._draw_crosshair_on_image(
383 |                             image_bytes, 
384 |                             int(action["x"]), 
385 |                             int(action["y"])
386 |                         )
387 |                         
388 |                         # Save as screenshot_action
389 |                         self._save_artifact("screenshot_action", annotated_image)
390 |                         
391 |                     except Exception as e:
392 |                         # If annotation fails, just log and continue
393 |                         print(f"Failed to annotate screenshot: {e}")
394 |                     
395 |                     break  # Only process the first screenshot found
396 | 
397 |         # Increment turn counter
398 |         self.current_turn += 1
```

--------------------------------------------------------------------------------
/tests/test_files.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | File System Interface Tests
  3 | Tests for the file system methods of the Computer interface (macOS).
  4 | Required environment variables:
  5 | - CUA_API_KEY: API key for Cua cloud provider
  6 | - CUA_CONTAINER_NAME: Name of the container to use
  7 | """
  8 | 
  9 | import os
 10 | import asyncio
 11 | import pytest
 12 | from pathlib import Path
 13 | import sys
 14 | import traceback
 15 | 
 16 | # Load environment variables from .env file
 17 | project_root = Path(__file__).parent.parent
 18 | env_file = project_root / ".env"
 19 | print(f"Loading environment from: {env_file}")
 20 | from dotenv import load_dotenv
 21 | 
 22 | load_dotenv(env_file)
 23 | 
 24 | # Add paths to sys.path if needed
 25 | pythonpath = os.environ.get("PYTHONPATH", "")
 26 | for path in pythonpath.split(":"):
 27 |     if path and path not in sys.path:
 28 |         sys.path.insert(0, path)  # Insert at beginning to prioritize
 29 |         print(f"Added to sys.path: {path}")
 30 | 
 31 | from computer import Computer, VMProviderType
 32 | 
 33 | @pytest.fixture(scope="session")
 34 | async def computer():
 35 |     """Shared Computer instance for all test cases."""
 36 |     # Create a remote Linux computer with Cua
 37 |     computer = Computer(
 38 |         os_type="linux",
 39 |         api_key=os.getenv("CUA_API_KEY"),
 40 |         name=str(os.getenv("CUA_CONTAINER_NAME")),
 41 |         provider_type=VMProviderType.CLOUD,
 42 |     )
 43 |     
 44 |     # Create a local macOS computer with Cua
 45 |     # computer = Computer()
 46 |     
 47 |     # Connect to host computer
 48 |     # computer = Computer(use_host_computer_server=True)
 49 |     
 50 |     try:
 51 |         await computer.run()
 52 |         yield computer
 53 |     finally:
 54 |         await computer.disconnect()
 55 | 
 56 | @pytest.mark.asyncio(loop_scope="session")
 57 | async def test_file_exists(computer):
 58 |     tmp_path = "test_file_exists.txt"
 59 |     # Ensure file does not exist
 60 |     if await computer.interface.file_exists(tmp_path):
 61 |         await computer.interface.delete_file(tmp_path)
 62 |     exists = await computer.interface.file_exists(tmp_path)
 63 |     assert exists is False, f"File {tmp_path} should not exist"
 64 |     # Create file and check again
 65 |     await computer.interface.write_text(tmp_path, "hello")
 66 |     exists = await computer.interface.file_exists(tmp_path)
 67 |     assert exists is True, f"File {tmp_path} should exist"
 68 |     await computer.interface.delete_file(tmp_path)
 69 | 
 70 | 
 71 | @pytest.mark.asyncio(loop_scope="session")
 72 | async def test_directory_exists(computer):
 73 |     tmp_dir = "test_directory_exists"
 74 |     if await computer.interface.directory_exists(tmp_dir):
 75 |         # Remove all files in directory before removing directory
 76 |         files = await computer.interface.list_dir(tmp_dir)
 77 |         for fname in files:
 78 |             await computer.interface.delete_file(f"{tmp_dir}/{fname}")
 79 |         # Remove the directory itself
 80 |         await computer.interface.delete_dir(tmp_dir)
 81 |     exists = await computer.interface.directory_exists(tmp_dir)
 82 |     assert exists is False, f"Directory {tmp_dir} should not exist"
 83 |     await computer.interface.create_dir(tmp_dir)
 84 |     exists = await computer.interface.directory_exists(tmp_dir)
 85 |     assert exists is True, f"Directory {tmp_dir} should exist"
 86 |     # Cleanup: remove files and directory
 87 |     files = await computer.interface.list_dir(tmp_dir)
 88 |     for fname in files:
 89 |         await computer.interface.delete_file(f"{tmp_dir}/{fname}")
 90 |     await computer.interface.delete_dir(tmp_dir)
 91 | 
 92 | 
 93 | @pytest.mark.asyncio(loop_scope="session")
 94 | async def test_list_dir(computer):
 95 |     tmp_dir = "test_list_dir"
 96 |     if not await computer.interface.directory_exists(tmp_dir):
 97 |         await computer.interface.create_dir(tmp_dir)
 98 |     files = ["foo.txt", "bar.txt"]
 99 |     for fname in files:
100 |         await computer.interface.write_text(f"{tmp_dir}/{fname}", "hi")
101 |     result = await computer.interface.list_dir(tmp_dir)
102 |     assert set(result) >= set(files), f"Directory {tmp_dir} should contain files {files}"
103 |     for fname in files:
104 |         await computer.interface.delete_file(f"{tmp_dir}/{fname}")
105 |     await computer.interface.delete_dir(tmp_dir)
106 | 
107 | 
108 | @pytest.mark.asyncio(loop_scope="session")
109 | async def test_read_write_text(computer):
110 |     tmp_path = "test_rw_text.txt"
111 |     content = "sample text"
112 |     await computer.interface.write_text(tmp_path, content)
113 |     read = await computer.interface.read_text(tmp_path)
114 |     assert read == content, "File content should match"
115 |     await computer.interface.delete_file(tmp_path)
116 | 
117 | 
118 | @pytest.mark.asyncio(loop_scope="session")
119 | async def test_delete_file(computer):
120 |     tmp_path = "test_delete_file.txt"
121 |     await computer.interface.write_text(tmp_path, "bye")
122 |     exists = await computer.interface.file_exists(tmp_path)
123 |     assert exists is True, "File should exist"
124 |     await computer.interface.delete_file(tmp_path)
125 |     exists = await computer.interface.file_exists(tmp_path)
126 |     assert exists is False, "File should not exist"
127 | 
128 | 
129 | @pytest.mark.asyncio(loop_scope="session")
130 | async def test_create_dir(computer):
131 |     tmp_dir = "test_create_dir"
132 |     if await computer.interface.directory_exists(tmp_dir):
133 |         await computer.interface.delete_dir(tmp_dir)
134 |     await computer.interface.create_dir(tmp_dir)
135 |     exists = await computer.interface.directory_exists(tmp_dir)
136 |     assert exists is True, "Directory should exist"
137 |     await computer.interface.delete_dir(tmp_dir)
138 | 
139 | 
140 | @pytest.mark.asyncio(loop_scope="session")
141 | async def test_read_bytes_basic(computer):
142 |     """Test basic read_bytes functionality."""
143 |     tmp_path = "test_read_bytes.bin"
144 |     test_data = b"Hello, World! This is binary data \x00\x01\x02\x03"
145 |     
146 |     # Write binary data using write_text (assuming it handles bytes)
147 |     await computer.interface.write_text(tmp_path, test_data.decode('latin-1'))
148 |     
149 |     # Read all bytes
150 |     read_data = await computer.interface.read_bytes(tmp_path)
151 |     assert read_data == test_data, "Binary data should match"
152 |     
153 |     await computer.interface.delete_file(tmp_path)
154 | 
155 | 
156 | @pytest.mark.asyncio(loop_scope="session")
157 | async def test_read_bytes_with_offset_and_length(computer):
158 |     """Test read_bytes with offset and length parameters."""
159 |     tmp_path = "test_read_bytes_offset.bin"
160 |     test_data = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
161 |     
162 |     # Write test data
163 |     await computer.interface.write_text(tmp_path, test_data.decode('latin-1'))
164 |     
165 |     # Test reading with offset only
166 |     read_data = await computer.interface.read_bytes(tmp_path, offset=5)
167 |     expected = test_data[5:]
168 |     assert read_data == expected, f"Data from offset 5 should match. Got: {read_data}, Expected: {expected}"
169 |     
170 |     # Test reading with offset and length
171 |     read_data = await computer.interface.read_bytes(tmp_path, offset=10, length=5)
172 |     expected = test_data[10:15]
173 |     assert read_data == expected, f"Data from offset 10, length 5 should match. Got: {read_data}, Expected: {expected}"
174 |     
175 |     # Test reading from beginning with length
176 |     read_data = await computer.interface.read_bytes(tmp_path, offset=0, length=10)
177 |     expected = test_data[:10]
178 |     assert read_data == expected, f"Data from beginning, length 10 should match. Got: {read_data}, Expected: {expected}"
179 |     
180 |     await computer.interface.delete_file(tmp_path)
181 | 
182 | 
183 | @pytest.mark.asyncio(loop_scope="session")
184 | async def test_get_file_size(computer):
185 |     """Test get_file_size functionality."""
186 |     tmp_path = "test_file_size.txt"
187 |     test_content = "A" * 1000  # 1000 bytes
188 |     
189 |     await computer.interface.write_text(tmp_path, test_content)
190 |     
191 |     file_size = await computer.interface.get_file_size(tmp_path)
192 |     assert file_size == 1000, f"File size should be 1000 bytes, got {file_size}"
193 |     
194 |     await computer.interface.delete_file(tmp_path)
195 | 
196 | 
197 | @pytest.mark.asyncio(loop_scope="session")
198 | async def test_read_large_file(computer):
199 |     """Test reading a file larger than 10MB to verify chunked reading."""
200 |     tmp_path = "test_large_file.bin"
201 |     
202 |     # Create a file larger than 10MB (10 * 1024 * 1024 = 10,485,760 bytes)
203 |     total_size = 12 * 1024 * 1024  # 12MB
204 |     
205 |     print(f"Creating large file of {total_size} bytes ({total_size / (1024*1024):.1f}MB)...")
206 |     
207 |     # Create large file content (this will test the chunked writing functionality)
208 |     large_content = b"X" * total_size
209 |     
210 |     # Write the large file using write_bytes (will automatically use chunked writing)
211 |     await computer.interface.write_bytes(tmp_path, large_content)
212 |     
213 |     # Verify file size
214 |     file_size = await computer.interface.get_file_size(tmp_path)
215 |     assert file_size == total_size, f"Large file size should be {total_size} bytes, got {file_size}"
216 |     
217 |     print(f"Large file created successfully: {file_size} bytes")
218 |     
219 |     # Test reading the entire large file (should use chunked reading)
220 |     print("Reading large file...")
221 |     read_data = await computer.interface.read_bytes(tmp_path)
222 |     assert len(read_data) == total_size, f"Read data size should match file size. Got {len(read_data)}, expected {total_size}"
223 |     
224 |     # Verify content (should be all 'X' characters)
225 |     expected_data = b"X" * total_size
226 |     assert read_data == expected_data, "Large file content should be all 'X' characters"
227 |     
228 |     print("Large file read successfully!")
229 |     
230 |     # Test reading with offset and length on large file
231 |     offset = 5 * 1024 * 1024  # 5MB offset
232 |     length = 2 * 1024 * 1024  # 2MB length
233 |     read_data = await computer.interface.read_bytes(tmp_path, offset=offset, length=length)
234 |     assert len(read_data) == length, f"Partial read size should be {length}, got {len(read_data)}"
235 |     assert read_data == b"X" * length, "Partial read content should be all 'X' characters"
236 |     
237 |     print("Large file partial read successful!")
238 |     
239 |     # Clean up
240 |     await computer.interface.delete_file(tmp_path)
241 |     print("Large file test completed successfully!")
242 | 
243 | @pytest.mark.asyncio(loop_scope="session")
244 | async def test_read_write_text_with_encoding(computer):
245 |     """Test reading and writing text files with different encodings."""
246 |     print("Testing text file operations with different encodings...")
247 |     
248 |     tmp_path = "test_encoding.txt"
249 |     
250 |     # Test UTF-8 encoding (default)
251 |     utf8_content = "Hello, 世界! 🌍 Ñoño café"
252 |     await computer.interface.write_text(tmp_path, utf8_content, encoding='utf-8')
253 |     read_utf8 = await computer.interface.read_text(tmp_path, encoding='utf-8')
254 |     assert read_utf8 == utf8_content, "UTF-8 content should match"
255 |     
256 |     # Test ASCII encoding
257 |     ascii_content = "Hello, World! Simple ASCII text."
258 |     await computer.interface.write_text(tmp_path, ascii_content, encoding='ascii')
259 |     read_ascii = await computer.interface.read_text(tmp_path, encoding='ascii')
260 |     assert read_ascii == ascii_content, "ASCII content should match"
261 |     
262 |     # Test Latin-1 encoding
263 |     latin1_content = "Café, naïve, résumé"
264 |     await computer.interface.write_text(tmp_path, latin1_content, encoding='latin-1')
265 |     read_latin1 = await computer.interface.read_text(tmp_path, encoding='latin-1')
266 |     assert read_latin1 == latin1_content, "Latin-1 content should match"
267 |     
268 |     # Clean up
269 |     await computer.interface.delete_file(tmp_path)
270 |     print("Text encoding test completed successfully!")
271 | 
272 | @pytest.mark.asyncio(loop_scope="session")
273 | async def test_write_text_append_mode(computer):
274 |     """Test appending text to files."""
275 |     print("Testing text file append mode...")
276 |     
277 |     tmp_path = "test_append.txt"
278 |     
279 |     # Write initial content
280 |     initial_content = "First line\n"
281 |     await computer.interface.write_text(tmp_path, initial_content)
282 |     
283 |     # Append more content
284 |     append_content = "Second line\n"
285 |     await computer.interface.write_text(tmp_path, append_content, append=True)
286 |     
287 |     # Read and verify
288 |     final_content = await computer.interface.read_text(tmp_path)
289 |     expected_content = initial_content + append_content
290 |     assert final_content == expected_content, f"Expected '{expected_content}', got '{final_content}'"
291 |     
292 |     # Append one more line
293 |     third_content = "Third line\n"
294 |     await computer.interface.write_text(tmp_path, third_content, append=True)
295 |     
296 |     # Read and verify final result
297 |     final_content = await computer.interface.read_text(tmp_path)
298 |     expected_content = initial_content + append_content + third_content
299 |     assert final_content == expected_content, f"Expected '{expected_content}', got '{final_content}'"
300 |     
301 |     # Clean up
302 |     await computer.interface.delete_file(tmp_path)
303 |     print("Text append test completed successfully!")
304 | 
305 | @pytest.mark.asyncio(loop_scope="session")
306 | async def test_large_text_file(computer):
307 |     """Test reading and writing large text files (>5MB) to verify chunked operations."""
308 |     print("Testing large text file operations...")
309 |     
310 |     tmp_path = "test_large_text.txt"
311 |     
312 |     # Create a large text content (approximately 6MB)
313 |     # Each line is about 100 characters, so 60,000 lines ≈ 6MB
314 |     line_template = "This is line {:06d} with some additional text to make it longer and reach about 100 chars.\n"
315 |     large_content = ""
316 |     num_lines = 60000
317 |     
318 |     print(f"Generating large text content with {num_lines} lines...")
319 |     for i in range(num_lines):
320 |         large_content += line_template.format(i)
321 |     
322 |     content_size_mb = len(large_content.encode('utf-8')) / (1024 * 1024)
323 |     print(f"Generated text content size: {content_size_mb:.2f} MB")
324 |     
325 |     # Write the large text file
326 |     print("Writing large text file...")
327 |     await computer.interface.write_text(tmp_path, large_content)
328 |     
329 |     # Read the entire file back
330 |     print("Reading large text file...")
331 |     read_content = await computer.interface.read_text(tmp_path)
332 |     
333 |     # Verify content matches
334 |     assert read_content == large_content, "Large text file content should match exactly"
335 |     
336 |     # Test partial reading by reading as bytes and decoding specific portions
337 |     print("Testing partial text reading...")
338 |     
339 |     # Read first 1000 characters worth of bytes
340 |     first_1000_chars = large_content[:1000]
341 |     first_1000_bytes = first_1000_chars.encode('utf-8')
342 |     read_bytes = await computer.interface.read_bytes(tmp_path, offset=0, length=len(first_1000_bytes))
343 |     decoded_partial = read_bytes.decode('utf-8')
344 |     assert decoded_partial == first_1000_chars, "Partial text reading should match"
345 |     
346 |     # Test appending to large file
347 |     print("Testing append to large text file...")
348 |     append_text = "\n--- APPENDED CONTENT ---\nThis content was appended to the large file.\n"
349 |     await computer.interface.write_text(tmp_path, append_text, append=True)
350 |     
351 |     # Read and verify appended content
352 |     final_content = await computer.interface.read_text(tmp_path)
353 |     expected_final = large_content + append_text
354 |     assert final_content == expected_final, "Appended large text file should match"
355 |     
356 |     # Clean up
357 |     await computer.interface.delete_file(tmp_path)
358 |     print("Large text file test completed successfully!")
359 | 
360 | @pytest.mark.asyncio(loop_scope="session")
361 | async def test_text_file_edge_cases(computer):
362 |     """Test edge cases for text file operations."""
363 |     print("Testing text file edge cases...")
364 |     
365 |     tmp_path = "test_edge_cases.txt"
366 |     
367 |     # Test empty file
368 |     empty_content = ""
369 |     await computer.interface.write_text(tmp_path, empty_content)
370 |     read_empty = await computer.interface.read_text(tmp_path)
371 |     assert read_empty == empty_content, "Empty file should return empty string"
372 |     
373 |     # Test file with only whitespace
374 |     whitespace_content = "   \n\t\r\n   \n"
375 |     await computer.interface.write_text(tmp_path, whitespace_content)
376 |     read_whitespace = await computer.interface.read_text(tmp_path)
377 |     assert read_whitespace == whitespace_content, "Whitespace content should be preserved"
378 |     
379 |     # Test file with special characters and newlines
380 |     special_content = "Line 1\nLine 2\r\nLine 3\tTabbed\nSpecial: !@#$%^&*()\n"
381 |     await computer.interface.write_text(tmp_path, special_content)
382 |     read_special = await computer.interface.read_text(tmp_path)
383 |     assert read_special == special_content, "Special characters should be preserved"
384 |     
385 |     # Test very long single line (no newlines)
386 |     long_line = "A" * 10000  # 10KB single line
387 |     await computer.interface.write_text(tmp_path, long_line)
388 |     read_long_line = await computer.interface.read_text(tmp_path)
389 |     assert read_long_line == long_line, "Long single line should be preserved"
390 |     
391 |     # Clean up
392 |     await computer.interface.delete_file(tmp_path)
393 |     print("Text file edge cases test completed successfully!")
394 | 
395 | if __name__ == "__main__":
396 |     # Run tests directly
397 |     pytest.main([__file__, "-v"])
398 | 
```

--------------------------------------------------------------------------------
/docs/src/app/(home)/[[...slug]]/page.tsx:
--------------------------------------------------------------------------------

```typescript
  1 | import { getApiVersions, source } from '@/lib/source';
  2 | import { getMDXComponents } from '@/mdx-components';
  3 | import { buttonVariants } from 'fumadocs-ui/components/ui/button';
  4 | import {
  5 |   Popover,
  6 |   PopoverContent,
  7 |   PopoverTrigger,
  8 | } from 'fumadocs-ui/components/ui/popover';
  9 | import { createRelativeLink } from 'fumadocs-ui/mdx';
 10 | import {
 11 |   DocsBody,
 12 |   DocsDescription,
 13 |   DocsPage,
 14 |   DocsTitle,
 15 | } from 'fumadocs-ui/page';
 16 | import { cn } from 'fumadocs-ui/utils/cn';
 17 | import { ChevronDown, CodeXml, ExternalLink } from 'lucide-react';
 18 | import type { Metadata } from 'next';
 19 | import Link from 'next/link';
 20 | import { notFound, redirect } from 'next/navigation';
 21 | 
 22 | export default async function Page(props: {
 23 |   params: Promise<{ slug?: string[] }>;
 24 | }) {
 25 |   const params = await props.params;
 26 |   const slug = params.slug || [];
 27 |   const page = source.getPage(slug);
 28 |   if (!page) notFound(); //redirect('/docs');
 29 | 
 30 |   // Detect if this is an API reference page: /api/[section] or /api/[section]/[version]
 31 |   let apiSection: string | null = null;
 32 |   let apiVersionSlug: string[] = [];
 33 |   if (slug[0] === 'api' && slug.length >= 2) {
 34 |     apiSection = slug[1];
 35 |     if (slug.length > 2) {
 36 |       apiVersionSlug = slug.slice(2);
 37 |     }
 38 |   }
 39 | 
 40 |   let versionItems: { label: string; slug: string[] }[] = [];
 41 |   if (apiSection) {
 42 |     versionItems = await getApiVersions(apiSection);
 43 |   }
 44 | 
 45 |   const macos = page.data.macos;
 46 |   const windows = page.data.windows;
 47 |   const linux = page.data.linux;
 48 |   const pypi = page.data.pypi;
 49 |   const npm = page.data.npm;
 50 |   const github = page.data.github;
 51 | 
 52 |   const MDXContent = page.data.body;
 53 | 
 54 |   // Platform icons component
 55 |   const PlatformIcons = () => {
 56 |     const hasAnyPlatform = macos || windows || linux;
 57 |     if (!hasAnyPlatform && !pypi) return null;
 58 | 
 59 |     return (
 60 |       <div className="flex flex-col gap-2">
 61 |         {hasAnyPlatform && (
 62 |           <div className="flex flex-row gap-2 items-left dark:text-neutral-400">
 63 |             {windows && (
 64 |               <svg
 65 |                 xmlns="http://www.w3.org/2000/svg"
 66 |                 fill="currentColor"
 67 |                 className="h-5"
 68 |                 viewBox="0 0 448 512">
 69 |                 <title>Windows</title>
 70 |                 <path d="M0 93.7l183.6-25.3v177.4H0V93.7zm0 324.6l183.6 25.3V268.4H0v149.9zm203.8 28L448 480V268.4H203.8v177.9zm0-380.6v180.1H448V32L203.8 65.7z" />
 71 |               </svg>
 72 |             )}
 73 |             {macos && (
 74 |               <svg
 75 |                 xmlns="http://www.w3.org/2000/svg"
 76 |                 fill="currentColor"
 77 |                 className="h-5"
 78 |                 viewBox="0 0 384 512">
 79 |                 <title>macOS</title>
 80 |                 <path d="M318.7 268.7c-.2-36.7 16.4-64.4 50-84.8-18.8-26.9-47.2-41.7-84.7-44.6-35.5-2.8-74.3 20.7-88.5 20.7-15 0-49.4-19.7-76.4-19.7C63.3 141.2 4 184.8 4 273.5q0 39.3 14.4 81.2c12.8 36.7 59 126.7 107.2 125.2 25.2-.6 43-17.9 75.8-17.9 31.8 0 48.3 17.9 76.4 17.9 48.6-.7 90.4-82.5 102.6-119.3-65.2-30.7-61.7-90-61.7-91.9zm-56.6-164.2c27.3-32.4 24.8-61.9 24-72.5-24.1 1.4-52 16.4-67.9 34.9-17.5 19.8-27.8 44.3-25.6 71.9 26.1 2 49.9-11.4 69.5-34.3z" />
 81 |               </svg>
 82 |             )}
 83 |             {linux && (
 84 |               <svg
 85 |                 xmlns="http://www.w3.org/2000/svg"
 86 |                 fill="currentColor"
 87 |                 className="h-5"
 88 |                 viewBox="0 0 448 512">
 89 |                 <title>Linux</title>
 90 |                 <path d="M220.8 123.3c1 .5 1.8 1.7 3 1.7 1.1 0 2.8-.4 2.9-1.5 .2-1.4-1.9-2.3-3.2-2.9-1.7-.7-3.9-1-5.5-.1-.4 .2-.8 .7-.6 1.1 .3 1.3 2.3 1.1 3.4 1.7zm-21.9 1.7c1.2 0 2-1.2 3-1.7 1.1-.6 3.1-.4 3.5-1.6 .2-.4-.2-.9-.6-1.1-1.6-.9-3.8-.6-5.5 .1-1.3 .6-3.4 1.5-3.2 2.9 .1 1 1.8 1.5 2.8 1.4zM420 403.8c-3.6-4-5.3-11.6-7.2-19.7-1.8-8.1-3.9-16.8-10.5-22.4-1.3-1.1-2.6-2.1-4-2.9-1.3-.8-2.7-1.5-4.1-2 9.2-27.3 5.6-54.5-3.7-79.1-11.4-30.1-31.3-56.4-46.5-74.4-17.1-21.5-33.7-41.9-33.4-72C311.1 85.4 315.7 .1 234.8 0 132.4-.2 158 103.4 156.9 135.2c-1.7 23.4-6.4 41.8-22.5 64.7-18.9 22.5-45.5 58.8-58.1 96.7-6 17.9-8.8 36.1-6.2 53.3-6.5 5.8-11.4 14.7-16.6 20.2-4.2 4.3-10.3 5.9-17 8.3s-14 6-18.5 14.5c-2.1 3.9-2.8 8.1-2.8 12.4 0 3.9 .6 7.9 1.2 11.8 1.2 8.1 2.5 15.7 .8 20.8-5.2 14.4-5.9 24.4-2.2 31.7 3.8 7.3 11.4 10.5 20.1 12.3 17.3 3.6 40.8 2.7 59.3 12.5 19.8 10.4 39.9 14.1 55.9 10.4 11.6-2.6 21.1-9.6 25.9-20.2 12.5-.1 26.3-5.4 48.3-6.6 14.9-1.2 33.6 5.3 55.1 4.1 .6 2.3 1.4 4.6 2.5 6.7v.1c8.3 16.7 23.8 24.3 40.3 23 16.6-1.3 34.1-11 48.3-27.9 13.6-16.4 36-23.2 50.9-32.2 7.4-4.5 13.4-10.1 13.9-18.3 .4-8.2-4.4-17.3-15.5-29.7zM223.7 87.3c9.8-22.2 34.2-21.8 44-.4 6.5 14.2 3.6 30.9-4.3 40.4-1.6-.8-5.9-2.6-12.6-4.9 1.1-1.2 3.1-2.7 3.9-4.6 4.8-11.8-.2-27-9.1-27.3-7.3-.5-13.9 10.8-11.8 23-4.1-2-9.4-3.5-13-4.4-1-6.9-.3-14.6 2.9-21.8zM183 75.8c10.1 0 20.8 14.2 19.1 33.5-3.5 1-7.1 2.5-10.2 4.6 1.2-8.9-3.3-20.1-9.6-19.6-8.4 .7-9.8 21.2-1.8 28.1 1 .8 1.9-.2-5.9 5.5-15.6-14.6-10.5-52.1 8.4-52.1zm-13.6 60.7c6.2-4.6 13.6-10 14.1-10.5 4.7-4.4 13.5-14.2 27.9-14.2 7.1 0 15.6 2.3 25.9 8.9 6.3 4.1 11.3 4.4 22.6 9.3 8.4 3.5 13.7 9.7 10.5 18.2-2.6 7.1-11 14.4-22.7 18.1-11.1 3.6-19.8 16-38.2 14.9-3.9-.2-7-1-9.6-2.1-8-3.5-12.2-10.4-20-15-8.6-4.8-13.2-10.4-14.7-15.3-1.4-4.9 0-9 4.2-12.3zm3.3 334c-2.7 35.1-43.9 34.4-75.3 18-29.9-15.8-68.6-6.5-76.5-21.9-2.4-4.7-2.4-12.7 2.6-26.4v-.2c2.4-7.6 .6-16-.6-23.9-1.2-7.8-1.8-15 .9-20 3.5-6.7 8.5-9.1 14.8-11.3 10.3-3.7 11.8-3.4 19.6-9.9 5.5-5.7 9.5-12.9 14.3-18 5.1-5.5 10-8.1 17.7-6.9 8.1 1.2 15.1 6.8 21.9 16l19.6 35.6c9.5 19.9 43.1 48.4 41 68.9zm-1.4-25.9c-4.1-6.6-9.6-13.6-14.4-19.6 7.1 0 14.2-2.2 16.7-8.9 2.3-6.2 0-14.9-7.4-24.9-13.5-18.2-38.3-32.5-38.3-32.5-13.5-8.4-21.1-18.7-24.6-29.9s-3-23.3-.3-35.2c5.2-22.9 18.6-45.2 27.2-59.2 2.3-1.7 .8 3.2-8.7 20.8-8.5 16.1-24.4 53.3-2.6 82.4 .6-20.7 5.5-41.8 13.8-61.5 12-27.4 37.3-74.9 39.3-112.7 1.1 .8 4.6 3.2 6.2 4.1 4.6 2.7 8.1 6.7 12.6 10.3 12.4 10 28.5 9.2 42.4 1.2 6.2-3.5 11.2-7.5 15.9-9 9.9-3.1 17.8-8.6 22.3-15 7.7 30.4 25.7 74.3 37.2 95.7 6.1 11.4 18.3 35.5 23.6 64.6 3.3-.1 7 .4 10.9 1.4 13.8-35.7-11.7-74.2-23.3-84.9-4.7-4.6-4.9-6.6-2.6-6.5 12.6 11.2 29.2 33.7 35.2 59 2.8 11.6 3.3 23.7 .4 35.7 16.4 6.8 35.9 17.9 30.7 34.8-2.2-.1-3.2 0-4.2 0 3.2-10.1-3.9-17.6-22.8-26.1-19.6-8.6-36-8.6-38.3 12.5-12.1 4.2-18.3 14.7-21.4 27.3-2.8 11.2-3.6 24.7-4.4 39.9-.5 7.7-3.6 18-6.8 29-32.1 22.9-76.7 32.9-114.3 7.2zm257.4-11.5c-.9 16.8-41.2 19.9-63.2 46.5-13.2 15.7-29.4 24.4-43.6 25.5s-26.5-4.8-33.7-19.3c-4.7-11.1-2.4-23.1 1.1-36.3 3.7-14.2 9.2-28.8 9.9-40.6 .8-15.2 1.7-28.5 4.2-38.7 2.6-10.3 6.6-17.2 13.7-21.1 .3-.2 .7-.3 1-.5 .8 13.2 7.3 26.6 18.8 29.5 12.6 3.3 30.7-7.5 38.4-16.3 9-.3 15.7-.9 22.6 5.1 9.9 8.5 7.1 30.3 17.1 41.6 10.6 11.6 14 19.5 13.7 24.6zM173.3 148.7c2 1.9 4.7 4.5 8 7.1 6.6 5.2 15.8 10.6 27.3 10.6 11.6 0 22.5-5.9 31.8-10.8 4.9-2.6 10.9-7 14.8-10.4s5.9-6.3 3.1-6.6-2.6 2.6-6 5.1c-4.4 3.2-9.7 7.4-13.9 9.8-7.4 4.2-19.5 10.2-29.9 10.2s-18.7-4.8-24.9-9.7c-3.1-2.5-5.7-5-7.7-6.9-1.5-1.4-1.9-4.6-4.3-4.9-1.4-.1-1.8 3.7 1.7 6.5z" />
 91 |               </svg>
 92 |             )}
 93 |           </div>
 94 |         )}
 95 | 
 96 |         <div className="flex flex-row gap-2 items-left">
 97 |           {pypi && (
 98 |             <a
 99 |               target="_blank"
100 |               href={`https://pypi.org/project/${pypi}/`}
101 |               rel="noreferrer">
102 |               <img
103 |                 src={`https://img.shields.io/pypi/v/${pypi}?color=blue`}
104 |                 className="h-5"
105 |                 alt="PyPI"
106 |               />
107 |             </a>
108 |           )}
109 |           {npm && (
110 |             <a
111 |               target="_blank"
112 |               href={`https://www.npmjs.com/package/${npm}`}
113 |               rel="noreferrer">
114 |               <img
115 |                 src={`https://img.shields.io/npm/v/${npm}?color=bf4c4b`}
116 |                 className="h-5"
117 |                 alt="NPM"
118 |               />
119 |             </a>
120 |           )}
121 |         </div>
122 |       </div>
123 |     );
124 |   };
125 | 
126 |   const tocHeader = () => {
127 |     return (
128 |       <div className="w-fit">
129 |         <PlatformIcons />
130 |         <div className="flex gap-2 mt-2">
131 |           {github &&
132 |             github.length > 0 &&
133 |             (github.length === 1 ? (
134 |               <a
135 |                 href={github[0]}
136 |                 rel="noreferrer noopener"
137 |                 target="_blank"
138 |                 className="inline-flex gap-2 w-fit items-center justify-center rounded-md text-sm font-medium transition-colors duration-100 disabled:pointer-events-none disabled:opacity-50 focus-visible:outline-none hover:bg-fd-accent hover:text-fd-accent-foreground p-1.5 [&amp;_svg]:size-5 text-fd-muted-foreground md:[&amp;_svg]:size-4.5"
139 |                 aria-label="Source"
140 |                 data-active="false">
141 |                 <svg role="img" viewBox="0 0 24 24" fill="currentColor">
142 |                   <path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"></path>
143 |                 </svg>
144 |                 Source
145 |                 <ExternalLink className="w-4 h-4 ml-auto" />
146 |               </a>
147 |             ) : (
148 |               <Popover>
149 |                 <PopoverTrigger className="inline-flex gap-2 w-fit items-center justify-center rounded-md text-sm font-medium transition-colors duration-100 disabled:pointer-events-none disabled:opacity-50 focus-visible:outline-none hover:bg-fd-accent hover:text-fd-accent-foreground p-1.5 [&_svg]:size-5 text-fd-muted-foreground md:[&_svg]:size-4.5">
150 |                   <svg role="img" viewBox="0 0 24 24" fill="currentColor">
151 |                     <path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"></path>
152 |                   </svg>
153 |                   Source
154 |                   <ChevronDown className="h-4 w-4" />
155 |                 </PopoverTrigger>
156 |                 <PopoverContent className="w-48 p-1">
157 |                   <div className="flex flex-col gap-1">
158 |                     {github.map((link, index) => (
159 |                       <a
160 |                         key={index}
161 |                         href={link}
162 |                         rel="noreferrer noopener"
163 |                         target="_blank"
164 |                         className="inline-flex gap-2 w-full items-center rounded-md p-2 text-sm hover:bg-fd-accent hover:text-fd-accent-foreground">
165 |                         {link.includes('python')
166 |                           ? 'Python'
167 |                           : link.includes('typescript')
168 |                             ? 'TypeScript'
169 |                             : `Source ${index + 1}`}
170 |                         <ExternalLink className="w-4 h-4 ml-auto" />
171 |                       </a>
172 |                     ))}
173 |                   </div>
174 |                 </PopoverContent>
175 |               </Popover>
176 |             ))}
177 |           {slug.includes('libraries') && (
178 |             <a
179 |               className="inline-flex gap-2 w-fit items-center justify-center rounded-md text-sm font-medium transition-colors duration-100 disabled:pointer-events-none disabled:opacity-50 focus-visible:outline-none hover:bg-fd-accent hover:text-fd-accent-foreground p-1.5 [&amp;_svg]:size-5 text-fd-muted-foreground md:[&amp;_svg]:size-4.5"
180 |               href={`/api/${page.data.title.toLowerCase()}`}>
181 |               <CodeXml size={12} />
182 |               Reference
183 |             </a>
184 |           )}
185 |         </div>
186 |         <hr className="my-2 border-t border-fd-border" />
187 |       </div>
188 |     );
189 |   };
190 | 
191 |   return (
192 |     <DocsPage
193 |       toc={page.data.toc}
194 |       tableOfContent={{ header: tocHeader() }}
195 |       full={page.data.full}>
196 |       <div className="flex flex-row w-full items-start">
197 |         <div className="flex-1">
198 |           <div className="flex flex-row w-full">
199 |             <DocsTitle>{page.data.title}</DocsTitle>
200 | 
201 |             <div className="ml-auto">
202 |               {apiSection && versionItems.length > 1 && (
203 |                 <Popover>
204 |                   <PopoverTrigger
205 |                     className={cn(
206 |                       buttonVariants({
207 |                         color: 'secondary',
208 |                         size: 'sm',
209 |                         className: 'gap-2',
210 |                       })
211 |                     )}>
212 |                     {(() => {
213 |                       // Find the current version label
214 |                       let currentLabel = 'Current';
215 |                       if (apiVersionSlug.length > 0) {
216 |                         const found = versionItems.find(
217 |                           (item) =>
218 |                             item.label !== 'Current' &&
219 |                             apiVersionSlug[0] === item.label
220 |                         );
221 |                         if (found) currentLabel = found.label;
222 |                       }
223 |                       return (
224 |                         <>
225 |                           API Version: {currentLabel}
226 |                           <ChevronDown className="size-3.5 text-fd-muted-foreground" />
227 |                         </>
228 |                       );
229 |                     })()}
230 |                   </PopoverTrigger>
231 |                   <PopoverContent className="flex flex-col overflow-auto">
232 |                     {versionItems.map((item) => {
233 |                       // Build the href for each version
234 |                       const href =
235 |                         item.label === 'Current'
236 |                           ? `/api/${apiSection}`
237 |                           : `/api/${apiSection}/${item.label}`;
238 |                       // Highlight current version
239 |                       const isCurrent =
240 |                         (item.label === 'Current' &&
241 |                           apiVersionSlug.length === 0) ||
242 |                         (item.label !== 'Current' &&
243 |                           apiVersionSlug[0] === item.label);
244 |                       return (
245 |                         <Link
246 |                           key={item.label}
247 |                           href={href}
248 |                           className={cn(
249 |                             'px-3 py-1 rounded hover:bg-fd-muted',
250 |                             isCurrent && 'font-bold bg-fd-muted'
251 |                           )}>
252 |                           API version: {item.label}
253 |                         </Link>
254 |                       );
255 |                     })}
256 |                   </PopoverContent>
257 |                 </Popover>
258 |               )}
259 |             </div>
260 |           </div>
261 |           <DocsDescription className="text-md mt-1">
262 |             {page.data.description}
263 |           </DocsDescription>
264 |         </div>
265 |       </div>
266 |       <DocsBody>
267 |         <MDXContent
268 |           components={getMDXComponents({
269 |             // this allows you to link to other pages with relative file paths
270 |             a: createRelativeLink(source, page),
271 |           })}
272 |         />
273 |       </DocsBody>
274 |     </DocsPage>
275 |   );
276 | }
277 | 
278 | export async function generateStaticParams() {
279 |   return source.generateParams();
280 | }
281 | 
282 | export async function generateMetadata(props: {
283 |   params: Promise<{ slug?: string[] }>;
284 | }): Promise<Metadata> {
285 |   const params = await props.params;
286 |   const page = source.getPage(params.slug);
287 |   if (!page) notFound();
288 | 
289 |   let title = `${page.data.title} | Cua Docs`;
290 |   if (page.url.includes('api')) title = `${page.data.title} | Cua API Docs`;
291 |   if (page.url.includes('guide'))
292 |     title = ` Guide: ${page.data.title} | Cua Docs`;
293 | 
294 |   return {
295 |     title,
296 |     description: page.data.description,
297 |     openGraph: {
298 |       title,
299 |       description: page.data.description,
300 |       type: 'article',
301 |       siteName: 'Cua Docs',
302 |       url: 'https://trycua.com/docs',
303 |     },
304 |   };
305 | }
306 | 
```

--------------------------------------------------------------------------------
/libs/python/mcp-server/mcp_server/server.py:
--------------------------------------------------------------------------------

```python
  1 | import asyncio
  2 | import base64
  3 | import inspect
  4 | import logging
  5 | import os
  6 | import signal
  7 | import sys
  8 | import traceback
  9 | import uuid
 10 | from typing import Any, Dict, List, Optional, Union, Tuple
 11 | 
 12 | import anyio
 13 | 
 14 | # Configure logging to output to stderr for debug visibility
 15 | logging.basicConfig(
 16 |     level=logging.DEBUG,  # Changed to DEBUG
 17 |     format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
 18 |     stream=sys.stderr,
 19 | )
 20 | logger = logging.getLogger("mcp-server")
 21 | 
 22 | # More visible startup message
 23 | logger.debug("MCP Server module loading...")
 24 | 
 25 | try:
 26 |     from mcp.server.fastmcp import Context, FastMCP
 27 |     # Use the canonical Image type
 28 |     from mcp.server.fastmcp.utilities.types import Image
 29 | 
 30 |     logger.debug("Successfully imported FastMCP")
 31 | except ImportError as e:
 32 |     logger.error(f"Failed to import FastMCP: {e}")
 33 |     traceback.print_exc(file=sys.stderr)
 34 |     sys.exit(1)
 35 | 
 36 | try:
 37 |     from computer import Computer
 38 |     from agent import ComputerAgent
 39 | 
 40 |     logger.debug("Successfully imported Computer and Agent modules")
 41 | except ImportError as e:
 42 |     logger.error(f"Failed to import Computer/Agent modules: {e}")
 43 |     traceback.print_exc(file=sys.stderr)
 44 |     sys.exit(1)
 45 | 
 46 | try:
 47 |     from .session_manager import get_session_manager, initialize_session_manager, shutdown_session_manager
 48 |     logger.debug("Successfully imported session manager")
 49 | except ImportError as e:
 50 |     logger.error(f"Failed to import session manager: {e}")
 51 |     traceback.print_exc(file=sys.stderr)
 52 |     sys.exit(1)
 53 | 
 54 | def get_env_bool(key: str, default: bool = False) -> bool:
 55 |     """Get boolean value from environment variable."""
 56 |     return os.getenv(key, str(default)).lower() in ("true", "1", "yes")
 57 | 
 58 | async def _maybe_call_ctx_method(ctx: Context, method_name: str, *args, **kwargs) -> None:
 59 |     """Call a context helper if it exists, awaiting the result when necessary."""
 60 |     method = getattr(ctx, method_name, None)
 61 |     if not callable(method):
 62 |         return
 63 |     result = method(*args, **kwargs)
 64 |     if inspect.isawaitable(result):
 65 |         await result
 66 | 
 67 | def _normalise_message_content(content: Union[str, List[Dict[str, Any]]]) -> List[Dict[str, Any]]:
 68 |     """Normalise message content to a list of structured parts."""
 69 |     if isinstance(content, list):
 70 |         return content
 71 |     if content is None:
 72 |         return []
 73 |     return [{"type": "output_text", "text": str(content)}]
 74 | 
 75 | def _extract_text_from_content(content: Union[str, List[Dict[str, Any]]]) -> str:
 76 |     """Extract textual content for inclusion in the aggregated result string."""
 77 |     if isinstance(content, str):
 78 |         return content
 79 |     texts: List[str] = []
 80 |     for part in content or []:
 81 |         if not isinstance(part, dict):
 82 |             continue
 83 |         if part.get("type") in {"output_text", "text"} and part.get("text"):
 84 |             texts.append(str(part["text"]))
 85 |     return "\n".join(texts)
 86 | 
 87 | def _serialise_tool_content(content: Any) -> str:
 88 |     """Convert tool outputs into a string for aggregation."""
 89 |     if isinstance(content, str):
 90 |         return content
 91 |     if isinstance(content, list):
 92 |         texts: List[str] = []
 93 |         for part in content:
 94 |             if isinstance(part, dict) and part.get("type") in {"output_text", "text"} and part.get("text"):
 95 |                 texts.append(str(part["text"]))
 96 |         if texts:
 97 |             return "\n".join(texts)
 98 |     if content is None:
 99 |         return ""
100 |     return str(content)
101 | 
102 | def serve() -> FastMCP:
103 |     """Create and configure the MCP server."""
104 |     # NOTE: Do not pass model_config here; FastMCP 2.12.x doesn't support it.
105 |     server = FastMCP(name="cua-agent")
106 | 
107 |     @server.tool(structured_output=False)
108 |     async def screenshot_cua(ctx: Context, session_id: Optional[str] = None) -> Any:
109 |         """
110 |         Take a screenshot of the current MacOS VM screen and return the image.
111 |         
112 |         Args:
113 |             session_id: Optional session ID for multi-client support. If not provided, a new session will be created.
114 |         """
115 |         session_manager = get_session_manager()
116 |         
117 |         async with session_manager.get_session(session_id) as session:
118 |             screenshot = await session.computer.interface.screenshot()
119 |             # Returning Image object is fine when structured_output=False
120 |             return Image(format="png", data=screenshot)
121 | 
122 |     @server.tool(structured_output=False)
123 |     async def run_cua_task(ctx: Context, task: str, session_id: Optional[str] = None) -> Any:
124 |         """
125 |         Run a Computer-Use Agent (CUA) task in a MacOS VM and return (combined text, final screenshot).
126 |         
127 |         Args:
128 |             task: The task description for the agent to execute
129 |             session_id: Optional session ID for multi-client support. If not provided, a new session will be created.
130 |         """
131 |         session_manager = get_session_manager()
132 |         task_id = str(uuid.uuid4())
133 |         
134 |         try:
135 |             logger.info(f"Starting CUA task: {task} (task_id: {task_id})")
136 | 
137 |             async with session_manager.get_session(session_id) as session:
138 |                 # Register this task with the session
139 |                 await session_manager.register_task(session.session_id, task_id)
140 |                 
141 |                 try:
142 |                     # Get model name
143 |                     model_name = os.getenv("CUA_MODEL_NAME", "anthropic/claude-3-5-sonnet-20241022")
144 |                     logger.info(f"Using model: {model_name}")
145 | 
146 |                     # Create agent with the new v0.4.x API
147 |                     agent = ComputerAgent(
148 |                         model=model_name,
149 |                         only_n_most_recent_images=int(os.getenv("CUA_MAX_IMAGES", "3")),
150 |                         verbosity=logging.INFO,
151 |                         tools=[session.computer],
152 |                     )
153 | 
154 |                     messages = [{"role": "user", "content": task}]
155 | 
156 |                     # Collect all results
157 |                     aggregated_messages: List[str] = []
158 |                     async for result in agent.run(messages):
159 |                         logger.info("Agent processing step")
160 |                         ctx.info("Agent processing step")
161 | 
162 |                         outputs = result.get("output", [])
163 |                         for output in outputs:
164 |                             output_type = output.get("type")
165 | 
166 |                             if output_type == "message":
167 |                                 logger.debug("Streaming assistant message: %s", output)
168 |                                 content = _normalise_message_content(output.get("content"))
169 |                                 aggregated_text = _extract_text_from_content(content)
170 |                                 if aggregated_text:
171 |                                     aggregated_messages.append(aggregated_text)
172 |                                 await _maybe_call_ctx_method(
173 |                                     ctx,
174 |                                     "yield_message",
175 |                                     role=output.get("role", "assistant"),
176 |                                     content=content,
177 |                                 )
178 | 
179 |                             elif output_type in {"tool_use", "computer_call", "function_call"}:
180 |                                 logger.debug("Streaming tool call: %s", output)
181 |                                 call_id = output.get("id") or output.get("call_id")
182 |                                 tool_name = output.get("name") or output.get("action", {}).get("type")
183 |                                 tool_input = output.get("input") or output.get("arguments") or output.get("action")
184 |                                 if call_id:
185 |                                     await _maybe_call_ctx_method(
186 |                                         ctx,
187 |                                         "yield_tool_call",
188 |                                         name=tool_name,
189 |                                         call_id=call_id,
190 |                                         input=tool_input,
191 |                                     )
192 | 
193 |                             elif output_type in {"tool_result", "computer_call_output", "function_call_output"}:
194 |                                 logger.debug("Streaming tool output: %s", output)
195 |                                 call_id = output.get("call_id") or output.get("id")
196 |                                 content = output.get("content") or output.get("output")
197 |                                 aggregated_text = _serialise_tool_content(content)
198 |                                 if aggregated_text:
199 |                                     aggregated_messages.append(aggregated_text)
200 |                                 if call_id:
201 |                                     await _maybe_call_ctx_method(
202 |                                         ctx,
203 |                                         "yield_tool_output",
204 |                                         call_id=call_id,
205 |                                         output=content,
206 |                                         is_error=output.get("status") == "failed" or output.get("is_error", False),
207 |                                     )
208 | 
209 |                     logger.info("CUA task completed successfully")
210 |                     ctx.info("CUA task completed successfully")
211 | 
212 |                     screenshot_image = Image(
213 |                         format="png",
214 |                         data=await session.computer.interface.screenshot(),
215 |                     )
216 | 
217 |                     return (
218 |                         "\n".join(aggregated_messages).strip() or "Task completed with no text output.",
219 |                         screenshot_image,
220 |                     )
221 |                     
222 |                 finally:
223 |                     # Unregister the task from the session
224 |                     await session_manager.unregister_task(session.session_id, task_id)
225 | 
226 |         except Exception as e:
227 |             error_msg = f"Error running CUA task: {str(e)}\n{traceback.format_exc()}"
228 |             logger.error(error_msg)
229 |             ctx.error(error_msg)
230 |             
231 |             # Try to get a screenshot from the session if available
232 |             try:
233 |                 if session_id:
234 |                     async with session_manager.get_session(session_id) as session:
235 |                         screenshot = await session.computer.interface.screenshot()
236 |                         return (
237 |                             f"Error during task execution: {str(e)}",
238 |                             Image(format="png", data=screenshot),
239 |                         )
240 |             except Exception:
241 |                 pass
242 |                 
243 |             # If we can't get a screenshot, return a placeholder
244 |             return (
245 |                 f"Error during task execution: {str(e)}",
246 |                 Image(format="png", data=b""),
247 |             )
248 | 
249 |     @server.tool(structured_output=False)
250 |     async def run_multi_cua_tasks(ctx: Context, tasks: List[str], session_id: Optional[str] = None, concurrent: bool = False) -> Any:
251 |         """
252 |         Run multiple CUA tasks and return a list of (combined text, screenshot).
253 |         
254 |         Args:
255 |             tasks: List of task descriptions to execute
256 |             session_id: Optional session ID for multi-client support. If not provided, a new session will be created.
257 |             concurrent: If True, run tasks concurrently. If False, run sequentially (default).
258 |         """
259 |         total_tasks = len(tasks)
260 |         if total_tasks == 0:
261 |             ctx.report_progress(1.0)
262 |             return []
263 | 
264 |         session_manager = get_session_manager()
265 |         
266 |         if concurrent and total_tasks > 1:
267 |             # Run tasks concurrently
268 |             logger.info(f"Running {total_tasks} tasks concurrently")
269 |             ctx.info(f"Running {total_tasks} tasks concurrently")
270 |             
271 |             # Create tasks with progress tracking
272 |             async def run_task_with_progress(task_index: int, task: str) -> Tuple[int, Tuple[str, Image]]:
273 |                 ctx.report_progress(task_index / total_tasks)
274 |                 result = await run_cua_task(ctx, task, session_id)
275 |                 ctx.report_progress((task_index + 1) / total_tasks)
276 |                 return task_index, result
277 |             
278 |             # Create all task coroutines
279 |             task_coroutines = [run_task_with_progress(i, task) for i, task in enumerate(tasks)]
280 |             
281 |             # Wait for all tasks to complete
282 |             results_with_indices = await asyncio.gather(*task_coroutines, return_exceptions=True)
283 |             
284 |             # Sort results by original task order and handle exceptions
285 |             results: List[Tuple[str, Image]] = []
286 |             for result in results_with_indices:
287 |                 if isinstance(result, Exception):
288 |                     logger.error(f"Task failed with exception: {result}")
289 |                     ctx.error(f"Task failed: {str(result)}")
290 |                     results.append((f"Task failed: {str(result)}", Image(format="png", data=b"")))
291 |                 else:
292 |                     _, task_result = result
293 |                     results.append(task_result)
294 |             
295 |             return results
296 |         else:
297 |             # Run tasks sequentially (original behavior)
298 |             logger.info(f"Running {total_tasks} tasks sequentially")
299 |             ctx.info(f"Running {total_tasks} tasks sequentially")
300 |             
301 |             results: List[Tuple[str, Image]] = []
302 |             for i, task in enumerate(tasks):
303 |                 logger.info(f"Running task {i+1}/{total_tasks}: {task}")
304 |                 ctx.info(f"Running task {i+1}/{total_tasks}: {task}")
305 | 
306 |                 ctx.report_progress(i / total_tasks)
307 |                 task_result = await run_cua_task(ctx, task, session_id)
308 |                 results.append(task_result)
309 |                 ctx.report_progress((i + 1) / total_tasks)
310 | 
311 |             return results
312 | 
313 |     @server.tool(structured_output=False)
314 |     async def get_session_stats(ctx: Context) -> Dict[str, Any]:
315 |         """
316 |         Get statistics about active sessions and resource usage.
317 |         """
318 |         session_manager = get_session_manager()
319 |         return session_manager.get_session_stats()
320 | 
321 |     @server.tool(structured_output=False)
322 |     async def cleanup_session(ctx: Context, session_id: str) -> str:
323 |         """
324 |         Cleanup a specific session and release its resources.
325 |         
326 |         Args:
327 |             session_id: The session ID to cleanup
328 |         """
329 |         session_manager = get_session_manager()
330 |         await session_manager.cleanup_session(session_id)
331 |         return f"Session {session_id} cleanup initiated"
332 | 
333 |     return server
334 | 
335 | 
336 | server = serve()
337 | 
338 | async def run_server():
339 |     """Run the MCP server with proper lifecycle management."""
340 |     session_manager = None
341 |     try:
342 |         logger.debug("Starting MCP server...")
343 |         
344 |         # Initialize session manager
345 |         session_manager = await initialize_session_manager()
346 |         logger.info("Session manager initialized")
347 |         
348 |         # Set up signal handlers for graceful shutdown
349 |         def signal_handler(signum, frame):
350 |             logger.info(f"Received signal {signum}, initiating graceful shutdown...")
351 |             # Create a task to shutdown gracefully
352 |             asyncio.create_task(graceful_shutdown())
353 |         
354 |         signal.signal(signal.SIGINT, signal_handler)
355 |         signal.signal(signal.SIGTERM, signal_handler)
356 |         
357 |         # Start the server
358 |         logger.info("Starting FastMCP server...")
359 |         # Use run_stdio_async directly instead of server.run() to avoid nested event loops
360 |         await server.run_stdio_async()
361 |         
362 |     except Exception as e:
363 |         logger.error(f"Error starting server: {e}")
364 |         traceback.print_exc(file=sys.stderr)
365 |         raise
366 |     finally:
367 |         # Ensure cleanup happens
368 |         if session_manager:
369 |             logger.info("Shutting down session manager...")
370 |             await shutdown_session_manager()
371 | 
372 | async def graceful_shutdown():
373 |     """Gracefully shutdown the server and all sessions."""
374 |     logger.info("Initiating graceful shutdown...")
375 |     try:
376 |         await shutdown_session_manager()
377 |         logger.info("Graceful shutdown completed")
378 |     except Exception as e:
379 |         logger.error(f"Error during graceful shutdown: {e}")
380 |     finally:
381 |         # Exit the process
382 |         import os
383 |         os._exit(0)
384 | 
385 | def main():
386 |     """Run the MCP server with proper async lifecycle management."""
387 |     try:
388 |         # Use anyio.run instead of asyncio.run to avoid nested event loop issues
389 |         anyio.run(run_server)
390 |     except KeyboardInterrupt:
391 |         logger.info("Server interrupted by user")
392 |     except Exception as e:
393 |         logger.error(f"Error starting server: {e}")
394 |         traceback.print_exc(file=sys.stderr)
395 |         sys.exit(1)
396 | 
397 | if __name__ == "__main__":
398 |     main()
399 | 
```

--------------------------------------------------------------------------------
/libs/lume/src/Commands/Logs.swift:
--------------------------------------------------------------------------------

```swift
  1 | import ArgumentParser
  2 | import Foundation
  3 | 
  4 | struct Logs: ParsableCommand {
  5 |     static let configuration = CommandConfiguration(
  6 |         abstract: "View lume serve logs",
  7 |         subcommands: [Info.self, Error.self, All.self],
  8 |         defaultSubcommand: All.self
  9 |     )
 10 |     
 11 |     // Common functionality for reading log files
 12 |     static func readLogFile(path: String, lines: Int? = nil, follow: Bool = false) -> String {
 13 |         let fileManager = FileManager.default
 14 |         
 15 |         // Check if file exists
 16 |         guard fileManager.fileExists(atPath: path) else {
 17 |             return "Log file not found at \(path)"
 18 |         }
 19 |         
 20 |         do {
 21 |             // Read file content
 22 |             let content = try String(contentsOfFile: path, encoding: .utf8)
 23 |             
 24 |             // If lines parameter is provided, return only the specified number of lines from the end
 25 |             if let lineCount = lines {
 26 |                 let allLines = content.components(separatedBy: .newlines)
 27 |                 let startIndex = max(0, allLines.count - lineCount)
 28 |                 let lastLines = Array(allLines[startIndex...])
 29 |                 return lastLines.joined(separator: "\n")
 30 |             }
 31 |             
 32 |             return content
 33 |         } catch {
 34 |             return "Error reading log file: \(error.localizedDescription)"
 35 |         }
 36 |     }
 37 |     
 38 |     // Method for tailing a log file (following new changes)
 39 |     static func tailLogFile(path: String, initialLines: Int? = 10) {
 40 |         let fileManager = FileManager.default
 41 |         
 42 |         // Check if file exists
 43 |         guard fileManager.fileExists(atPath: path) else {
 44 |             print("Log file not found at \(path)")
 45 |             return
 46 |         }
 47 |         
 48 |         do {
 49 |             // Get initial content with only the specified number of lines from the end
 50 |             var lastPosition: UInt64 = 0
 51 |             let fileHandle = try FileHandle(forReadingFrom: URL(fileURLWithPath: path))
 52 |             
 53 |             // First, print the last few lines of the file
 54 |             if let lines = initialLines {
 55 |                 let content = try String(contentsOfFile: path, encoding: .utf8)
 56 |                 let allLines = content.components(separatedBy: .newlines)
 57 |                 let startIndex = max(0, allLines.count - lines)
 58 |                 let lastLines = Array(allLines[startIndex...])
 59 |                 print(lastLines.joined(separator: "\n"))
 60 |             }
 61 |             
 62 |             // Get current file size
 63 |             lastPosition = UInt64(try fileManager.attributesOfItem(atPath: path)[.size] as? UInt64 ?? 0)
 64 |             
 65 |             // Set up for continuous monitoring
 66 |             print("\nTailing log file... Press Ctrl+C to stop")
 67 |             
 68 |             // Monitor file for changes
 69 |             while true {
 70 |                 // Brief pause to reduce CPU usage
 71 |                 Thread.sleep(forTimeInterval: 0.5)
 72 |                 
 73 |                 // Get current size
 74 |                 let currentSize = try fileManager.attributesOfItem(atPath: path)[.size] as? UInt64 ?? 0
 75 |                 
 76 |                 // If file has grown
 77 |                 if currentSize > lastPosition {
 78 |                     // Seek to where we last read
 79 |                     fileHandle.seek(toFileOffset: lastPosition)
 80 |                     
 81 |                     // Read new content
 82 |                     if let newData = try? fileHandle.readToEnd() {
 83 |                         if let newContent = String(data: newData, encoding: .utf8) {
 84 |                             // Print new content without trailing newline
 85 |                             if newContent.hasSuffix("\n") {
 86 |                                 print(newContent, terminator: "")
 87 |                             } else {
 88 |                                 print(newContent)
 89 |                             }
 90 |                         }
 91 |                     }
 92 |                     
 93 |                     // Update position
 94 |                     lastPosition = currentSize
 95 |                 }
 96 |                 
 97 |                 // Handle file rotation (if file became smaller)
 98 |                 else if currentSize < lastPosition {
 99 |                     // File was probably rotated, start from beginning
100 |                     lastPosition = 0
101 |                     fileHandle.seek(toFileOffset: 0)
102 |                     
103 |                     if let newData = try? fileHandle.readToEnd() {
104 |                         if let newContent = String(data: newData, encoding: .utf8) {
105 |                             print(newContent, terminator: "")
106 |                         }
107 |                     }
108 |                     
109 |                     lastPosition = currentSize
110 |                 }
111 |             }
112 |         } catch {
113 |             print("Error tailing log file: \(error.localizedDescription)")
114 |         }
115 |     }
116 |     
117 |     // MARK: - Info Logs Subcommand
118 |     
119 |     struct Info: ParsableCommand {
120 |         static let configuration = CommandConfiguration(
121 |             commandName: "info",
122 |             abstract: "View info logs from the daemon"
123 |         )
124 |         
125 |         @Option(name: .shortAndLong, help: "Number of lines to display from the end of the file")
126 |         var lines: Int?
127 |         
128 |         @Flag(name: .shortAndLong, help: "Follow log file continuously (like tail -f)")
129 |         var follow: Bool = false
130 |         
131 |         func run() throws {
132 |             let logPath = "/tmp/lume_daemon.log"
133 |             
134 |             print("=== Info Logs ===")
135 |             
136 |             if follow {
137 |                 // Use tailing functionality to continuously monitor the log
138 |                 Logs.tailLogFile(path: logPath, initialLines: lines ?? 10)
139 |             } else {
140 |                 // Regular one-time viewing of logs
141 |                 let content = Logs.readLogFile(path: logPath, lines: lines)
142 |                 print(content)
143 |             }
144 |         }
145 |     }
146 |     
147 |     // MARK: - Error Logs Subcommand
148 |     
149 |     struct Error: ParsableCommand {
150 |         static let configuration = CommandConfiguration(
151 |             commandName: "error",
152 |             abstract: "View error logs from the daemon"
153 |         )
154 |         
155 |         @Option(name: .shortAndLong, help: "Number of lines to display from the end of the file")
156 |         var lines: Int?
157 |         
158 |         @Flag(name: .shortAndLong, help: "Follow log file continuously (like tail -f)")
159 |         var follow: Bool = false
160 |         
161 |         func run() throws {
162 |             let logPath = "/tmp/lume_daemon.error.log"
163 |             
164 |             print("=== Error Logs ===")
165 |             
166 |             if follow {
167 |                 // Use tailing functionality to continuously monitor the log
168 |                 Logs.tailLogFile(path: logPath, initialLines: lines ?? 10)
169 |             } else {
170 |                 // Regular one-time viewing of logs
171 |                 let content = Logs.readLogFile(path: logPath, lines: lines)
172 |                 print(content)
173 |             }
174 |         }
175 |     }
176 |     
177 |     // MARK: - All Logs Subcommand
178 |     
179 |     struct All: ParsableCommand {
180 |         static let configuration = CommandConfiguration(
181 |             commandName: "all",
182 |             abstract: "View both info and error logs from the daemon"
183 |         )
184 |         
185 |         @Option(name: .shortAndLong, help: "Number of lines to display from the end of each file")
186 |         var lines: Int?
187 |         
188 |         @Flag(name: .shortAndLong, help: "Follow log files continuously (like tail -f)")
189 |         var follow: Bool = false
190 |         
191 |         // Custom implementation to tail both logs simultaneously
192 |         private func tailBothLogs(infoPath: String, errorPath: String, initialLines: Int? = 10) {
193 |             let fileManager = FileManager.default
194 |             var infoExists = fileManager.fileExists(atPath: infoPath)
195 |             var errorExists = fileManager.fileExists(atPath: errorPath)
196 |             
197 |             if !infoExists && !errorExists {
198 |                 print("Neither info nor error log files found")
199 |                 return
200 |             }
201 |             
202 |             // Print initial content
203 |             print("=== Info Logs ===")
204 |             if infoExists {
205 |                 if let lines = initialLines {
206 |                     let content = (try? String(contentsOfFile: infoPath, encoding: .utf8)) ?? ""
207 |                     let allLines = content.components(separatedBy: .newlines)
208 |                     let startIndex = max(0, allLines.count - lines)
209 |                     let lastLines = Array(allLines[startIndex...])
210 |                     print(lastLines.joined(separator: "\n"))
211 |                 }
212 |             } else {
213 |                 print("Info log file not found")
214 |             }
215 |             
216 |             print("\n=== Error Logs ===")
217 |             if errorExists {
218 |                 if let lines = initialLines {
219 |                     let content = (try? String(contentsOfFile: errorPath, encoding: .utf8)) ?? ""
220 |                     let allLines = content.components(separatedBy: .newlines)
221 |                     let startIndex = max(0, allLines.count - lines)
222 |                     let lastLines = Array(allLines[startIndex...])
223 |                     print(lastLines.joined(separator: "\n"))
224 |                 }
225 |             } else {
226 |                 print("Error log file not found")
227 |             }
228 |             
229 |             print("\nTailing both log files... Press Ctrl+C to stop")
230 |             
231 |             // Initialize file handles and positions
232 |             var infoHandle: FileHandle? = nil
233 |             var errorHandle: FileHandle? = nil
234 |             var infoPosition: UInt64 = 0
235 |             var errorPosition: UInt64 = 0
236 |             
237 |             // Set up file handles
238 |             if infoExists {
239 |                 do {
240 |                     infoHandle = try FileHandle(forReadingFrom: URL(fileURLWithPath: infoPath))
241 |                     infoPosition = UInt64(try fileManager.attributesOfItem(atPath: infoPath)[.size] as? UInt64 ?? 0)
242 |                 } catch {
243 |                     print("Error opening info log file: \(error.localizedDescription)")
244 |                 }
245 |             }
246 |             
247 |             if errorExists {
248 |                 do {
249 |                     errorHandle = try FileHandle(forReadingFrom: URL(fileURLWithPath: errorPath))
250 |                     errorPosition = UInt64(try fileManager.attributesOfItem(atPath: errorPath)[.size] as? UInt64 ?? 0)
251 |                 } catch {
252 |                     print("Error opening error log file: \(error.localizedDescription)")
253 |                 }
254 |             }
255 |             
256 |             // Monitor both files for changes
257 |             while true {
258 |                 Thread.sleep(forTimeInterval: 0.5)
259 |                 
260 |                 // Check for new content in info log
261 |                 if let handle = infoHandle {
262 |                     do {
263 |                         // Re-check existence in case file was deleted
264 |                         infoExists = fileManager.fileExists(atPath: infoPath)
265 |                         if !infoExists {
266 |                             print("\n[Info log file was removed]")
267 |                             infoHandle = nil
268 |                             continue
269 |                         }
270 |                         
271 |                         let currentSize = try fileManager.attributesOfItem(atPath: infoPath)[.size] as? UInt64 ?? 0
272 |                         
273 |                         if currentSize > infoPosition {
274 |                             handle.seek(toFileOffset: infoPosition)
275 |                             if let newData = try? handle.readToEnd() {
276 |                                 if let newContent = String(data: newData, encoding: .utf8) {
277 |                                     print("\n--- New Info Log Content ---")
278 |                                     if newContent.hasSuffix("\n") {
279 |                                         print(newContent, terminator: "")
280 |                                     } else {
281 |                                         print(newContent)
282 |                                     }
283 |                                 }
284 |                             }
285 |                             infoPosition = currentSize
286 |                         } else if currentSize < infoPosition {
287 |                             // File was rotated
288 |                             print("\n[Info log was rotated]")
289 |                             infoPosition = 0
290 |                             handle.seek(toFileOffset: 0)
291 |                             if let newData = try? handle.readToEnd() {
292 |                                 if let newContent = String(data: newData, encoding: .utf8) {
293 |                                     print("\n--- New Info Log Content ---")
294 |                                     print(newContent, terminator: "")
295 |                                 }
296 |                             }
297 |                             infoPosition = currentSize
298 |                         }
299 |                     } catch {
300 |                         print("\nError reading info log: \(error.localizedDescription)")
301 |                     }
302 |                 } else if fileManager.fileExists(atPath: infoPath) && !infoExists {
303 |                     // File exists again after being deleted
304 |                     do {
305 |                         infoHandle = try FileHandle(forReadingFrom: URL(fileURLWithPath: infoPath))
306 |                         infoPosition = 0
307 |                         infoExists = true
308 |                         print("\n[Info log file reappeared]")
309 |                     } catch {
310 |                         print("\nError reopening info log: \(error.localizedDescription)")
311 |                     }
312 |                 }
313 |                 
314 |                 // Check for new content in error log
315 |                 if let handle = errorHandle {
316 |                     do {
317 |                         // Re-check existence in case file was deleted
318 |                         errorExists = fileManager.fileExists(atPath: errorPath)
319 |                         if !errorExists {
320 |                             print("\n[Error log file was removed]")
321 |                             errorHandle = nil
322 |                             continue
323 |                         }
324 |                         
325 |                         let currentSize = try fileManager.attributesOfItem(atPath: errorPath)[.size] as? UInt64 ?? 0
326 |                         
327 |                         if currentSize > errorPosition {
328 |                             handle.seek(toFileOffset: errorPosition)
329 |                             if let newData = try? handle.readToEnd() {
330 |                                 if let newContent = String(data: newData, encoding: .utf8) {
331 |                                     print("\n--- New Error Log Content ---")
332 |                                     if newContent.hasSuffix("\n") {
333 |                                         print(newContent, terminator: "")
334 |                                     } else {
335 |                                         print(newContent)
336 |                                     }
337 |                                 }
338 |                             }
339 |                             errorPosition = currentSize
340 |                         } else if currentSize < errorPosition {
341 |                             // File was rotated
342 |                             print("\n[Error log was rotated]")
343 |                             errorPosition = 0
344 |                             handle.seek(toFileOffset: 0)
345 |                             if let newData = try? handle.readToEnd() {
346 |                                 if let newContent = String(data: newData, encoding: .utf8) {
347 |                                     print("\n--- New Error Log Content ---")
348 |                                     print(newContent, terminator: "")
349 |                                 }
350 |                             }
351 |                             errorPosition = currentSize
352 |                         }
353 |                     } catch {
354 |                         print("\nError reading error log: \(error.localizedDescription)")
355 |                     }
356 |                 } else if fileManager.fileExists(atPath: errorPath) && !errorExists {
357 |                     // File exists again after being deleted
358 |                     do {
359 |                         errorHandle = try FileHandle(forReadingFrom: URL(fileURLWithPath: errorPath))
360 |                         errorPosition = 0
361 |                         errorExists = true
362 |                         print("\n[Error log file reappeared]")
363 |                     } catch {
364 |                         print("\nError reopening error log: \(error.localizedDescription)")
365 |                     }
366 |                 }
367 |             }
368 |         }
369 |         
370 |         func run() throws {
371 |             let infoLogPath = "/tmp/lume_daemon.log"
372 |             let errorLogPath = "/tmp/lume_daemon.error.log"
373 |             
374 |             if follow {
375 |                 // Use custom tailing implementation for both logs
376 |                 tailBothLogs(infoPath: infoLogPath, errorPath: errorLogPath, initialLines: lines ?? 10)
377 |             } else {
378 |                 // Regular one-time viewing of logs
379 |                 let infoContent = Logs.readLogFile(path: infoLogPath, lines: lines)
380 |                 let errorContent = Logs.readLogFile(path: errorLogPath, lines: lines)
381 |                 
382 |                 print("=== Info Logs ===")
383 |                 print(infoContent)
384 |                 print("\n=== Error Logs ===")
385 |                 print(errorContent)
386 |             }
387 |         }
388 |     }
389 | }
390 | 
```