This is page 5 of 21. Use http://codebase.md/trycua/cua?lines=true&page={x} to view the full context. # Directory Structure ``` ├── .all-contributorsrc ├── .cursorignore ├── .devcontainer │ ├── devcontainer.json │ ├── post-install.sh │ └── README.md ├── .dockerignore ├── .gitattributes ├── .github │ ├── FUNDING.yml │ ├── scripts │ │ ├── get_pyproject_version.py │ │ └── tests │ │ ├── __init__.py │ │ ├── README.md │ │ └── test_get_pyproject_version.py │ └── workflows │ ├── ci-lume.yml │ ├── docker-publish-kasm.yml │ ├── docker-publish-xfce.yml │ ├── docker-reusable-publish.yml │ ├── npm-publish-computer.yml │ ├── npm-publish-core.yml │ ├── publish-lume.yml │ ├── pypi-publish-agent.yml │ ├── pypi-publish-computer-server.yml │ ├── pypi-publish-computer.yml │ ├── pypi-publish-core.yml │ ├── pypi-publish-mcp-server.yml │ ├── pypi-publish-pylume.yml │ ├── pypi-publish-som.yml │ ├── pypi-reusable-publish.yml │ └── test-validation-script.yml ├── .gitignore ├── .vscode │ ├── docs.code-workspace │ ├── launch.json │ ├── libs-ts.code-workspace │ ├── lume.code-workspace │ ├── lumier.code-workspace │ ├── py.code-workspace │ └── settings.json ├── blog │ ├── app-use.md │ ├── assets │ │ ├── composite-agents.png │ │ ├── docker-ubuntu-support.png │ │ ├── hack-booth.png │ │ ├── hack-closing-ceremony.jpg │ │ ├── hack-cua-ollama-hud.jpeg │ │ ├── hack-leaderboard.png │ │ ├── hack-the-north.png │ │ ├── hack-winners.jpeg │ │ ├── hack-workshop.jpeg │ │ ├── hud-agent-evals.png │ │ └── trajectory-viewer.jpeg │ ├── bringing-computer-use-to-the-web.md │ ├── build-your-own-operator-on-macos-1.md │ ├── build-your-own-operator-on-macos-2.md │ ├── composite-agents.md │ ├── cua-hackathon.md │ ├── hack-the-north.md │ ├── hud-agent-evals.md │ ├── human-in-the-loop.md │ ├── introducing-cua-cloud-containers.md │ ├── lume-to-containerization.md │ ├── sandboxed-python-execution.md │ ├── training-computer-use-models-trajectories-1.md │ ├── trajectory-viewer.md │ ├── ubuntu-docker-support.md │ └── windows-sandbox.md ├── CONTRIBUTING.md ├── Development.md ├── Dockerfile ├── docs │ ├── .gitignore │ ├── .prettierrc │ ├── content │ │ └── docs │ │ ├── agent-sdk │ │ │ ├── agent-loops.mdx │ │ │ ├── benchmarks │ │ │ │ ├── index.mdx │ │ │ │ ├── interactive.mdx │ │ │ │ ├── introduction.mdx │ │ │ │ ├── meta.json │ │ │ │ ├── osworld-verified.mdx │ │ │ │ ├── screenspot-pro.mdx │ │ │ │ └── screenspot-v2.mdx │ │ │ ├── callbacks │ │ │ │ ├── agent-lifecycle.mdx │ │ │ │ ├── cost-saving.mdx │ │ │ │ ├── index.mdx │ │ │ │ ├── logging.mdx │ │ │ │ ├── meta.json │ │ │ │ ├── pii-anonymization.mdx │ │ │ │ └── trajectories.mdx │ │ │ ├── chat-history.mdx │ │ │ ├── custom-computer-handlers.mdx │ │ │ ├── custom-tools.mdx │ │ │ ├── customizing-computeragent.mdx │ │ │ ├── integrations │ │ │ │ ├── hud.mdx │ │ │ │ └── meta.json │ │ │ ├── message-format.mdx │ │ │ ├── meta.json │ │ │ ├── migration-guide.mdx │ │ │ ├── prompt-caching.mdx │ │ │ ├── supported-agents │ │ │ │ ├── composed-agents.mdx │ │ │ │ ├── computer-use-agents.mdx │ │ │ │ ├── grounding-models.mdx │ │ │ │ ├── human-in-the-loop.mdx │ │ │ │ └── meta.json │ │ │ ├── supported-model-providers │ │ │ │ ├── index.mdx │ │ │ │ └── local-models.mdx │ │ │ └── usage-tracking.mdx │ │ ├── computer-sdk │ │ │ ├── cloud-vm-management.mdx │ │ │ ├── commands.mdx │ │ │ ├── computer-ui.mdx │ │ │ ├── computers.mdx │ │ │ ├── meta.json │ │ │ └── sandboxed-python.mdx │ │ ├── index.mdx │ │ ├── libraries │ │ │ ├── agent │ │ │ │ └── index.mdx │ │ │ ├── computer │ │ │ │ └── index.mdx │ │ │ ├── computer-server │ │ │ │ ├── Commands.mdx │ │ │ │ ├── index.mdx │ │ │ │ ├── REST-API.mdx │ │ │ │ └── WebSocket-API.mdx │ │ │ ├── core │ │ │ │ └── index.mdx │ │ │ ├── lume │ │ │ │ ├── cli-reference.mdx │ │ │ │ ├── faq.md │ │ │ │ ├── http-api.mdx │ │ │ │ ├── index.mdx │ │ │ │ ├── installation.mdx │ │ │ │ ├── meta.json │ │ │ │ └── prebuilt-images.mdx │ │ │ ├── lumier │ │ │ │ ├── building-lumier.mdx │ │ │ │ ├── docker-compose.mdx │ │ │ │ ├── docker.mdx │ │ │ │ ├── index.mdx │ │ │ │ ├── installation.mdx │ │ │ │ └── meta.json │ │ │ ├── mcp-server │ │ │ │ ├── client-integrations.mdx │ │ │ │ ├── configuration.mdx │ │ │ │ ├── index.mdx │ │ │ │ ├── installation.mdx │ │ │ │ ├── llm-integrations.mdx │ │ │ │ ├── meta.json │ │ │ │ ├── tools.mdx │ │ │ │ └── usage.mdx │ │ │ └── som │ │ │ ├── configuration.mdx │ │ │ └── index.mdx │ │ ├── meta.json │ │ ├── quickstart-cli.mdx │ │ ├── quickstart-devs.mdx │ │ └── telemetry.mdx │ ├── next.config.mjs │ ├── package-lock.json │ ├── package.json │ ├── pnpm-lock.yaml │ ├── postcss.config.mjs │ ├── public │ │ └── img │ │ ├── agent_gradio_ui.png │ │ ├── agent.png │ │ ├── cli.png │ │ ├── computer.png │ │ ├── som_box_threshold.png │ │ └── som_iou_threshold.png │ ├── README.md │ ├── source.config.ts │ ├── src │ │ ├── app │ │ │ ├── (home) │ │ │ │ ├── [[...slug]] │ │ │ │ │ └── page.tsx │ │ │ │ └── layout.tsx │ │ │ ├── api │ │ │ │ └── search │ │ │ │ └── route.ts │ │ │ ├── favicon.ico │ │ │ ├── global.css │ │ │ ├── layout.config.tsx │ │ │ ├── layout.tsx │ │ │ ├── llms.mdx │ │ │ │ └── [[...slug]] │ │ │ │ └── route.ts │ │ │ └── llms.txt │ │ │ └── route.ts │ │ ├── assets │ │ │ ├── discord-black.svg │ │ │ ├── discord-white.svg │ │ │ ├── logo-black.svg │ │ │ └── logo-white.svg │ │ ├── components │ │ │ ├── iou.tsx │ │ │ └── mermaid.tsx │ │ ├── lib │ │ │ ├── llms.ts │ │ │ └── source.ts │ │ └── mdx-components.tsx │ └── tsconfig.json ├── examples │ ├── agent_examples.py │ ├── agent_ui_examples.py │ ├── cloud_api_examples.py │ ├── computer_examples_windows.py │ ├── computer_examples.py │ ├── computer_ui_examples.py │ ├── computer-example-ts │ │ ├── .env.example │ │ ├── .gitignore │ │ ├── .prettierrc │ │ ├── package-lock.json │ │ ├── package.json │ │ ├── pnpm-lock.yaml │ │ ├── README.md │ │ ├── src │ │ │ ├── helpers.ts │ │ │ └── index.ts │ │ └── tsconfig.json │ ├── docker_examples.py │ ├── evals │ │ ├── hud_eval_examples.py │ │ └── wikipedia_most_linked.txt │ ├── pylume_examples.py │ ├── sandboxed_functions_examples.py │ ├── som_examples.py │ ├── utils.py │ └── winsandbox_example.py ├── img │ ├── agent_gradio_ui.png │ ├── agent.png │ ├── cli.png │ ├── computer.png │ ├── logo_black.png │ └── logo_white.png ├── libs │ ├── kasm │ │ ├── Dockerfile │ │ ├── LICENSE │ │ ├── README.md │ │ └── src │ │ └── ubuntu │ │ └── install │ │ └── firefox │ │ ├── custom_startup.sh │ │ ├── firefox.desktop │ │ └── install_firefox.sh │ ├── lume │ │ ├── .cursorignore │ │ ├── CONTRIBUTING.md │ │ ├── Development.md │ │ ├── img │ │ │ └── cli.png │ │ ├── Package.resolved │ │ ├── Package.swift │ │ ├── README.md │ │ ├── resources │ │ │ └── lume.entitlements │ │ ├── scripts │ │ │ ├── build │ │ │ │ ├── build-debug.sh │ │ │ │ ├── build-release-notarized.sh │ │ │ │ └── build-release.sh │ │ │ └── install.sh │ │ ├── src │ │ │ ├── Commands │ │ │ │ ├── Clone.swift │ │ │ │ ├── Config.swift │ │ │ │ ├── Create.swift │ │ │ │ ├── Delete.swift │ │ │ │ ├── Get.swift │ │ │ │ ├── Images.swift │ │ │ │ ├── IPSW.swift │ │ │ │ ├── List.swift │ │ │ │ ├── Logs.swift │ │ │ │ ├── Options │ │ │ │ │ └── FormatOption.swift │ │ │ │ ├── Prune.swift │ │ │ │ ├── Pull.swift │ │ │ │ ├── Push.swift │ │ │ │ ├── Run.swift │ │ │ │ ├── Serve.swift │ │ │ │ ├── Set.swift │ │ │ │ └── Stop.swift │ │ │ ├── ContainerRegistry │ │ │ │ ├── ImageContainerRegistry.swift │ │ │ │ ├── ImageList.swift │ │ │ │ └── ImagesPrinter.swift │ │ │ ├── Errors │ │ │ │ └── Errors.swift │ │ │ ├── FileSystem │ │ │ │ ├── Home.swift │ │ │ │ ├── Settings.swift │ │ │ │ ├── VMConfig.swift │ │ │ │ ├── VMDirectory.swift │ │ │ │ └── VMLocation.swift │ │ │ ├── LumeController.swift │ │ │ ├── Main.swift │ │ │ ├── Server │ │ │ │ ├── Handlers.swift │ │ │ │ ├── HTTP.swift │ │ │ │ ├── Requests.swift │ │ │ │ ├── Responses.swift │ │ │ │ └── Server.swift │ │ │ ├── Utils │ │ │ │ ├── CommandRegistry.swift │ │ │ │ ├── CommandUtils.swift │ │ │ │ ├── Logger.swift │ │ │ │ ├── NetworkUtils.swift │ │ │ │ ├── Path.swift │ │ │ │ ├── ProcessRunner.swift │ │ │ │ ├── ProgressLogger.swift │ │ │ │ ├── String.swift │ │ │ │ └── Utils.swift │ │ │ ├── Virtualization │ │ │ │ ├── DarwinImageLoader.swift │ │ │ │ ├── DHCPLeaseParser.swift │ │ │ │ ├── ImageLoaderFactory.swift │ │ │ │ └── VMVirtualizationService.swift │ │ │ ├── VM │ │ │ │ ├── DarwinVM.swift │ │ │ │ ├── LinuxVM.swift │ │ │ │ ├── VM.swift │ │ │ │ ├── VMDetails.swift │ │ │ │ ├── VMDetailsPrinter.swift │ │ │ │ ├── VMDisplayResolution.swift │ │ │ │ └── VMFactory.swift │ │ │ └── VNC │ │ │ ├── PassphraseGenerator.swift │ │ │ └── VNCService.swift │ │ └── tests │ │ ├── Mocks │ │ │ ├── MockVM.swift │ │ │ ├── MockVMVirtualizationService.swift │ │ │ └── MockVNCService.swift │ │ ├── VM │ │ │ └── VMDetailsPrinterTests.swift │ │ ├── VMTests.swift │ │ ├── VMVirtualizationServiceTests.swift │ │ └── VNCServiceTests.swift │ ├── lumier │ │ ├── .dockerignore │ │ ├── Dockerfile │ │ ├── README.md │ │ └── src │ │ ├── bin │ │ │ └── entry.sh │ │ ├── config │ │ │ └── constants.sh │ │ ├── hooks │ │ │ └── on-logon.sh │ │ └── lib │ │ ├── utils.sh │ │ └── vm.sh │ ├── python │ │ ├── agent │ │ │ ├── .bumpversion.cfg │ │ │ ├── agent │ │ │ │ ├── __init__.py │ │ │ │ ├── __main__.py │ │ │ │ ├── adapters │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── huggingfacelocal_adapter.py │ │ │ │ │ ├── human_adapter.py │ │ │ │ │ ├── mlxvlm_adapter.py │ │ │ │ │ └── models │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── generic.py │ │ │ │ │ ├── internvl.py │ │ │ │ │ ├── opencua.py │ │ │ │ │ └── qwen2_5_vl.py │ │ │ │ ├── agent.py │ │ │ │ ├── callbacks │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── budget_manager.py │ │ │ │ │ ├── image_retention.py │ │ │ │ │ ├── logging.py │ │ │ │ │ ├── operator_validator.py │ │ │ │ │ ├── pii_anonymization.py │ │ │ │ │ ├── prompt_instructions.py │ │ │ │ │ ├── telemetry.py │ │ │ │ │ └── trajectory_saver.py │ │ │ │ ├── cli.py │ │ │ │ ├── computers │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── cua.py │ │ │ │ │ └── custom.py │ │ │ │ ├── decorators.py │ │ │ │ ├── human_tool │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── __main__.py │ │ │ │ │ ├── server.py │ │ │ │ │ └── ui.py │ │ │ │ ├── integrations │ │ │ │ │ └── hud │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── agent.py │ │ │ │ │ └── proxy.py │ │ │ │ ├── loops │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── anthropic.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── composed_grounded.py │ │ │ │ │ ├── gemini.py │ │ │ │ │ ├── glm45v.py │ │ │ │ │ ├── gta1.py │ │ │ │ │ ├── holo.py │ │ │ │ │ ├── internvl.py │ │ │ │ │ ├── model_types.csv │ │ │ │ │ ├── moondream3.py │ │ │ │ │ ├── omniparser.py │ │ │ │ │ ├── openai.py │ │ │ │ │ ├── opencua.py │ │ │ │ │ └── uitars.py │ │ │ │ ├── proxy │ │ │ │ │ ├── examples.py │ │ │ │ │ └── handlers.py │ │ │ │ ├── responses.py │ │ │ │ ├── types.py │ │ │ │ └── ui │ │ │ │ ├── __init__.py │ │ │ │ ├── __main__.py │ │ │ │ └── gradio │ │ │ │ ├── __init__.py │ │ │ │ ├── app.py │ │ │ │ └── ui_components.py │ │ │ ├── benchmarks │ │ │ │ ├── .gitignore │ │ │ │ ├── contrib.md │ │ │ │ ├── interactive.py │ │ │ │ ├── models │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ └── gta1.py │ │ │ │ ├── README.md │ │ │ │ ├── ss-pro.py │ │ │ │ ├── ss-v2.py │ │ │ │ └── utils.py │ │ │ ├── example.py │ │ │ ├── pyproject.toml │ │ │ └── README.md │ │ ├── computer │ │ │ ├── .bumpversion.cfg │ │ │ ├── computer │ │ │ │ ├── __init__.py │ │ │ │ ├── computer.py │ │ │ │ ├── diorama_computer.py │ │ │ │ ├── helpers.py │ │ │ │ ├── interface │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── factory.py │ │ │ │ │ ├── generic.py │ │ │ │ │ ├── linux.py │ │ │ │ │ ├── macos.py │ │ │ │ │ ├── models.py │ │ │ │ │ └── windows.py │ │ │ │ ├── logger.py │ │ │ │ ├── models.py │ │ │ │ ├── providers │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── cloud │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── provider.py │ │ │ │ │ ├── docker │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── provider.py │ │ │ │ │ ├── factory.py │ │ │ │ │ ├── lume │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── provider.py │ │ │ │ │ ├── lume_api.py │ │ │ │ │ ├── lumier │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── provider.py │ │ │ │ │ ├── types.py │ │ │ │ │ └── winsandbox │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── provider.py │ │ │ │ │ └── setup_script.ps1 │ │ │ │ ├── ui │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── __main__.py │ │ │ │ │ └── gradio │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── app.py │ │ │ │ └── utils.py │ │ │ ├── poetry.toml │ │ │ ├── pyproject.toml │ │ │ └── README.md │ │ ├── computer-server │ │ │ ├── .bumpversion.cfg │ │ │ ├── computer_server │ │ │ │ ├── __init__.py │ │ │ │ ├── __main__.py │ │ │ │ ├── cli.py │ │ │ │ ├── diorama │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── diorama_computer.py │ │ │ │ │ ├── diorama.py │ │ │ │ │ ├── draw.py │ │ │ │ │ ├── macos.py │ │ │ │ │ └── safezone.py │ │ │ │ ├── handlers │ │ │ │ │ ├── base.py │ │ │ │ │ ├── factory.py │ │ │ │ │ ├── generic.py │ │ │ │ │ ├── linux.py │ │ │ │ │ ├── macos.py │ │ │ │ │ └── windows.py │ │ │ │ ├── main.py │ │ │ │ ├── server.py │ │ │ │ └── watchdog.py │ │ │ ├── examples │ │ │ │ ├── __init__.py │ │ │ │ └── usage_example.py │ │ │ ├── pyproject.toml │ │ │ ├── README.md │ │ │ ├── run_server.py │ │ │ └── test_connection.py │ │ ├── core │ │ │ ├── .bumpversion.cfg │ │ │ ├── core │ │ │ │ ├── __init__.py │ │ │ │ └── telemetry │ │ │ │ ├── __init__.py │ │ │ │ └── posthog.py │ │ │ ├── poetry.toml │ │ │ ├── pyproject.toml │ │ │ └── README.md │ │ ├── mcp-server │ │ │ ├── .bumpversion.cfg │ │ │ ├── CONCURRENT_SESSIONS.md │ │ │ ├── mcp_server │ │ │ │ ├── __init__.py │ │ │ │ ├── __main__.py │ │ │ │ ├── server.py │ │ │ │ └── session_manager.py │ │ │ ├── pdm.lock │ │ │ ├── pyproject.toml │ │ │ ├── README.md │ │ │ └── scripts │ │ │ ├── install_mcp_server.sh │ │ │ └── start_mcp_server.sh │ │ ├── pylume │ │ │ ├── __init__.py │ │ │ ├── .bumpversion.cfg │ │ │ ├── pylume │ │ │ │ ├── __init__.py │ │ │ │ ├── client.py │ │ │ │ ├── exceptions.py │ │ │ │ ├── lume │ │ │ │ ├── models.py │ │ │ │ ├── pylume.py │ │ │ │ └── server.py │ │ │ ├── pyproject.toml │ │ │ └── README.md │ │ └── som │ │ ├── .bumpversion.cfg │ │ ├── LICENSE │ │ ├── poetry.toml │ │ ├── pyproject.toml │ │ ├── README.md │ │ ├── som │ │ │ ├── __init__.py │ │ │ ├── detect.py │ │ │ ├── detection.py │ │ │ ├── models.py │ │ │ ├── ocr.py │ │ │ ├── util │ │ │ │ └── utils.py │ │ │ └── visualization.py │ │ └── tests │ │ └── test_omniparser.py │ ├── typescript │ │ ├── .gitignore │ │ ├── .nvmrc │ │ ├── agent │ │ │ ├── examples │ │ │ │ ├── playground-example.html │ │ │ │ └── README.md │ │ │ ├── package.json │ │ │ ├── README.md │ │ │ ├── src │ │ │ │ ├── client.ts │ │ │ │ ├── index.ts │ │ │ │ └── types.ts │ │ │ ├── tests │ │ │ │ └── client.test.ts │ │ │ ├── tsconfig.json │ │ │ ├── tsdown.config.ts │ │ │ └── vitest.config.ts │ │ ├── biome.json │ │ ├── computer │ │ │ ├── .editorconfig │ │ │ ├── .gitattributes │ │ │ ├── .gitignore │ │ │ ├── LICENSE │ │ │ ├── package.json │ │ │ ├── README.md │ │ │ ├── src │ │ │ │ ├── computer │ │ │ │ │ ├── index.ts │ │ │ │ │ ├── providers │ │ │ │ │ │ ├── base.ts │ │ │ │ │ │ ├── cloud.ts │ │ │ │ │ │ └── index.ts │ │ │ │ │ └── types.ts │ │ │ │ ├── index.ts │ │ │ │ ├── interface │ │ │ │ │ ├── base.ts │ │ │ │ │ ├── factory.ts │ │ │ │ │ ├── index.ts │ │ │ │ │ ├── linux.ts │ │ │ │ │ ├── macos.ts │ │ │ │ │ └── windows.ts │ │ │ │ └── types.ts │ │ │ ├── tests │ │ │ │ ├── computer │ │ │ │ │ └── cloud.test.ts │ │ │ │ ├── interface │ │ │ │ │ ├── factory.test.ts │ │ │ │ │ ├── index.test.ts │ │ │ │ │ ├── linux.test.ts │ │ │ │ │ ├── macos.test.ts │ │ │ │ │ └── windows.test.ts │ │ │ │ └── setup.ts │ │ │ ├── tsconfig.json │ │ │ ├── tsdown.config.ts │ │ │ └── vitest.config.ts │ │ ├── core │ │ │ ├── .editorconfig │ │ │ ├── .gitattributes │ │ │ ├── .gitignore │ │ │ ├── LICENSE │ │ │ ├── package.json │ │ │ ├── README.md │ │ │ ├── src │ │ │ │ ├── index.ts │ │ │ │ └── telemetry │ │ │ │ ├── clients │ │ │ │ │ ├── index.ts │ │ │ │ │ └── posthog.ts │ │ │ │ └── index.ts │ │ │ ├── tests │ │ │ │ └── telemetry.test.ts │ │ │ ├── tsconfig.json │ │ │ ├── tsdown.config.ts │ │ │ └── vitest.config.ts │ │ ├── package.json │ │ ├── pnpm-lock.yaml │ │ ├── pnpm-workspace.yaml │ │ └── README.md │ └── xfce │ ├── .dockerignore │ ├── .gitignore │ ├── Dockerfile │ ├── README.md │ └── src │ ├── scripts │ │ ├── resize-display.sh │ │ ├── start-computer-server.sh │ │ ├── start-novnc.sh │ │ ├── start-vnc.sh │ │ └── xstartup.sh │ ├── supervisor │ │ └── supervisord.conf │ └── xfce-config │ ├── helpers.rc │ ├── xfce4-power-manager.xml │ └── xfce4-session.xml ├── LICENSE.md ├── Makefile ├── notebooks │ ├── agent_nb.ipynb │ ├── blog │ │ ├── build-your-own-operator-on-macos-1.ipynb │ │ └── build-your-own-operator-on-macos-2.ipynb │ ├── composite_agents_docker_nb.ipynb │ ├── computer_nb.ipynb │ ├── computer_server_nb.ipynb │ ├── customizing_computeragent.ipynb │ ├── eval_osworld.ipynb │ ├── ollama_nb.ipynb │ ├── pylume_nb.ipynb │ ├── README.md │ ├── sota_hackathon_cloud.ipynb │ └── sota_hackathon.ipynb ├── pdm.lock ├── pyproject.toml ├── pyrightconfig.json ├── README.md ├── samples │ └── community │ ├── global-online │ │ └── README.md │ └── hack-the-north │ └── README.md ├── scripts │ ├── build-uv.sh │ ├── build.ps1 │ ├── build.sh │ ├── cleanup.sh │ ├── playground-docker.sh │ ├── playground.sh │ └── run-docker-dev.sh └── tests ├── pytest.ini ├── shell_cmd.py ├── test_files.py ├── test_mcp_server_session_management.py ├── test_mcp_server_streaming.py ├── test_shell_bash.py ├── test_telemetry.py ├── test_venv.py └── test_watchdog.py ``` # Files -------------------------------------------------------------------------------- /examples/computer_examples.py: -------------------------------------------------------------------------------- ```python 1 | import os 2 | import asyncio 3 | from pathlib import Path 4 | import sys 5 | import traceback 6 | 7 | # Load environment variables from .env file 8 | project_root = Path(__file__).parent.parent 9 | env_file = project_root / ".env" 10 | print(f"Loading environment from: {env_file}") 11 | from dotenv import load_dotenv 12 | 13 | load_dotenv(env_file) 14 | 15 | # Add paths to sys.path if needed 16 | pythonpath = os.environ.get("PYTHONPATH", "") 17 | for path in pythonpath.split(":"): 18 | if path and path not in sys.path: 19 | sys.path.insert(0, path) # Insert at beginning to prioritize 20 | print(f"Added to sys.path: {path}") 21 | 22 | from computer.computer import Computer 23 | from computer.providers.base import VMProviderType 24 | from computer.logger import LogLevel 25 | 26 | async def main(): 27 | try: 28 | print("\n=== Using direct initialization ===") 29 | 30 | # Create a local macOS computer 31 | computer = Computer( 32 | display="1024x768", 33 | memory="8GB", 34 | cpu="4", 35 | os_type="macos", 36 | name="macos", 37 | verbosity=LogLevel.VERBOSE, 38 | provider_type=VMProviderType.LUME, 39 | storage="/Users/<USER>/repos/trycua/computer/examples/storage", 40 | shared_directories=[ 41 | "/Users/<USER>/repos/trycua/computer/examples/shared" 42 | ], 43 | ephemeral=False, 44 | ) 45 | 46 | # Create a remote Linux computer with Cua 47 | # computer = Computer( 48 | # os_type="linux", 49 | # api_key=os.getenv("CUA_API_KEY"), 50 | # name=os.getenv("CONTAINER_NAME"), 51 | # provider_type=VMProviderType.CLOUD, 52 | # ) 53 | 54 | try: 55 | # Run the computer with default parameters 56 | await computer.run() 57 | 58 | screenshot = await computer.interface.screenshot() 59 | 60 | # Create output directory if it doesn't exist 61 | output_dir = Path("./output") 62 | output_dir.mkdir(exist_ok=True) 63 | 64 | screenshot_path = output_dir / "screenshot.png" 65 | with open(screenshot_path, "wb") as f: 66 | f.write(screenshot) 67 | print(f"Screenshot saved to: {screenshot_path.absolute()}") 68 | 69 | # await computer.interface.hotkey("command", "space") 70 | 71 | # res = await computer.interface.run_command("touch ./Downloads/empty_file") 72 | # print(f"Run command result: {res}") 73 | 74 | accessibility_tree = await computer.interface.get_accessibility_tree() 75 | print(f"Accessibility tree: {accessibility_tree}") 76 | 77 | # Screen Actions Examples 78 | # print("\n=== Screen Actions ===") 79 | # screenshot = await computer.interface.screenshot() 80 | # with open("screenshot_direct.png", "wb") as f: 81 | # f.write(screenshot) 82 | 83 | screen_size = await computer.interface.get_screen_size() 84 | print(f"Screen size: {screen_size}") 85 | 86 | # Demonstrate coordinate conversion 87 | center_x, center_y = 733, 736 88 | print(f"Center in screen coordinates: ({center_x}, {center_y})") 89 | 90 | screenshot_center = await computer.to_screenshot_coordinates(center_x, center_y) 91 | print(f"Center in screenshot coordinates: {screenshot_center}") 92 | 93 | screen_center = await computer.to_screen_coordinates(*screenshot_center) 94 | print(f"Back to screen coordinates: {screen_center}") 95 | 96 | # Mouse Actions Examples 97 | print("\n=== Mouse Actions ===") 98 | await computer.interface.move_cursor(100, 100) 99 | await computer.interface.left_click() 100 | await computer.interface.right_click(300, 300) 101 | await computer.interface.double_click(400, 400) 102 | 103 | # Keyboard Actions Examples 104 | print("\n=== Keyboard Actions ===") 105 | await computer.interface.type_text("Hello, World!") 106 | await computer.interface.press_key("enter") 107 | 108 | # Clipboard Actions Examples 109 | print("\n=== Clipboard Actions ===") 110 | await computer.interface.set_clipboard("Test clipboard") 111 | content = await computer.interface.copy_to_clipboard() 112 | print(f"Clipboard content: {content}") 113 | 114 | finally: 115 | # Important to clean up resources 116 | await computer.stop() 117 | except Exception as e: 118 | print(f"Error in main: {e}") 119 | traceback.print_exc() 120 | 121 | 122 | if __name__ == "__main__": 123 | asyncio.run(main()) 124 | ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/loops/opencua.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | OpenCUA agent loop implementation for click prediction using litellm.acompletion 3 | Based on OpenCUA model for GUI grounding tasks. 4 | """ 5 | 6 | import asyncio 7 | import json 8 | import re 9 | import base64 10 | from typing import Dict, List, Any, AsyncGenerator, Union, Optional, Tuple 11 | from io import BytesIO 12 | import uuid 13 | from PIL import Image 14 | import litellm 15 | import math 16 | 17 | from .composed_grounded import ComposedGroundedConfig 18 | from ..decorators import register_agent 19 | from ..types import Messages, AgentResponse, Tools, AgentCapability 20 | from ..loops.base import AsyncAgentConfig 21 | 22 | def extract_coordinates_from_pyautogui(text: str) -> Optional[Tuple[int, int]]: 23 | """Extract coordinates from pyautogui.click(x=..., y=...) format.""" 24 | try: 25 | # Look for pyautogui.click(x=1443, y=343) pattern 26 | pattern = r"pyautogui\.click\(x=(\d+),\s*y=(\d+)\)" 27 | match = re.search(pattern, text) 28 | if match: 29 | x, y = int(match.group(1)), int(match.group(2)) 30 | return (x, y) 31 | return None 32 | except Exception: 33 | return None 34 | 35 | @register_agent(models=r"(?i).*OpenCUA.*") 36 | class OpenCUAConfig(ComposedGroundedConfig): 37 | """OpenCUA agent configuration implementing AsyncAgentConfig protocol for click prediction.""" 38 | 39 | def __init__(self): 40 | super().__init__() 41 | self.current_model = None 42 | self.last_screenshot_b64 = None 43 | 44 | async def predict_step( 45 | self, 46 | messages: List[Dict[str, Any]], 47 | model: str, 48 | tools: Optional[List[Dict[str, Any]]] = None, 49 | max_retries: Optional[int] = None, 50 | stream: bool = False, 51 | computer_handler=None, 52 | _on_api_start=None, 53 | _on_api_end=None, 54 | _on_usage=None, 55 | _on_screenshot=None, 56 | **kwargs 57 | ) -> Dict[str, Any]: 58 | """Fallback to a self-composed model""" 59 | return await super().predict_step( 60 | messages=messages, 61 | model=f"{model}+{model}", 62 | tools=tools, 63 | max_retries=max_retries, 64 | stream=stream, 65 | computer_handler=computer_handler, 66 | _on_api_start=_on_api_start, 67 | _on_api_end=_on_api_end, 68 | _on_usage=_on_usage, 69 | _on_screenshot=_on_screenshot, 70 | **kwargs 71 | ) 72 | 73 | async def predict_click( 74 | self, 75 | model: str, 76 | image_b64: str, 77 | instruction: str, 78 | **kwargs 79 | ) -> Optional[Tuple[int, int]]: 80 | """ 81 | Predict click coordinates using OpenCUA model via litellm.acompletion. 82 | 83 | Args: 84 | model: The OpenCUA model name 85 | image_b64: Base64 encoded image 86 | instruction: Instruction for where to click 87 | 88 | Returns: 89 | Tuple of (x, y) coordinates or None if prediction fails 90 | """ 91 | # Prepare system message 92 | system_prompt = ( 93 | "You are a GUI agent. You are given a task and a screenshot of the screen. " 94 | "You need to perform a series of pyautogui actions to complete the task." 95 | ) 96 | 97 | system_message = { 98 | "role": "system", 99 | "content": system_prompt 100 | } 101 | 102 | # Prepare user message with image and instruction 103 | user_message = { 104 | "role": "user", 105 | "content": [ 106 | { 107 | "type": "image_url", 108 | "image_url": { 109 | "url": f"data:image/png;base64,{image_b64}" 110 | } 111 | }, 112 | { 113 | "type": "text", 114 | "text": f"Click on {instruction}" 115 | } 116 | ] 117 | } 118 | 119 | # Prepare API call kwargs 120 | api_kwargs = { 121 | "model": model, 122 | "messages": [system_message, user_message], 123 | "max_new_tokens": 2056, 124 | "temperature": 0, 125 | **kwargs 126 | } 127 | 128 | # Use liteLLM acompletion 129 | response = await litellm.acompletion(**api_kwargs) 130 | 131 | # Extract response text 132 | output_text = response.choices[0].message.content 133 | # print(output_text) 134 | 135 | # Extract coordinates from pyautogui format 136 | coordinates = extract_coordinates_from_pyautogui(output_text) 137 | 138 | return coordinates 139 | 140 | def get_capabilities(self) -> List[AgentCapability]: 141 | """Return the capabilities supported by this agent.""" 142 | return ["click"] 143 | ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/lumier/docker.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Docker 3 | --- 4 | 5 | You can use Lumier through Docker: 6 | 7 | ### Run a macOS VM (ephemeral) 8 | ```bash 9 | # Run the container with temporary storage (using pre-built image from Docker Hub) 10 | docker run -it --rm \ 11 | --name macos-vm \ 12 | -p 8006:8006 \ 13 | -e VM_NAME=macos-vm \ 14 | -e VERSION=ghcr.io/trycua/macos-sequoia-cua:latest \ 15 | -e CPU_CORES=4 \ 16 | -e RAM_SIZE=8192 \ 17 | trycua/lumier:latest 18 | ``` 19 | Access the VM in your browser at [http://localhost:8006](http://localhost:8006). 20 | 21 | After running the command above, you can access your macOS VM through a web browser (e.g., http://localhost:8006). 22 | 23 | <Callout title="Note"> 24 | With the basic setup above, your VM will be reset when you stop the container (ephemeral mode). This means any changes you make inside the macOS VM will be lost. See the section below for how to save your VM state. 25 | </Callout> 26 | 27 | ## Saving Your VM State 28 | 29 | To save your VM state between sessions (so your changes persist when you stop and restart the container), you'll need to set up a storage location: 30 | 31 | ```bash 32 | # First, create a storage directory if it doesn't exist 33 | mkdir -p storage 34 | 35 | # Then run the container with persistent storage 36 | docker run -it --rm \ 37 | --name lumier-vm \ 38 | -p 8006:8006 \ 39 | -v $(pwd)/storage:/storage \ 40 | -e VM_NAME=lumier-vm \ 41 | -e VERSION=ghcr.io/trycua/macos-sequoia-cua:latest \ 42 | -e CPU_CORES=4 \ 43 | -e RAM_SIZE=8192 \ 44 | -e HOST_STORAGE_PATH=$(pwd)/storage \ 45 | trycua/lumier:latest 46 | ``` 47 | 48 | This command creates a connection between a folder on your Mac (`$(pwd)/storage`) and a folder inside the Docker container (`/storage`). The `-v` flag (volume mount) and the `HOST_STORAGE_PATH` variable work together to ensure your VM data is saved on your host Mac. 49 | 50 | ## Sharing Files with Your VM 51 | 52 | To share files between your Mac and the virtual machine, you can set up a shared folder: 53 | 54 | ```bash 55 | # Create both storage and shared folders 56 | mkdir -p storage shared 57 | 58 | # Run with both persistent storage and a shared folder 59 | docker run -it --rm \ 60 | --name lumier-vm \ 61 | -p 8006:8006 \ 62 | -v $(pwd)/storage:/storage \ 63 | -v $(pwd)/shared:/shared \ 64 | -e VM_NAME=lumier-vm \ 65 | -e VERSION=ghcr.io/trycua/macos-sequoia-cua:latest \ 66 | -e CPU_CORES=4 \ 67 | -e RAM_SIZE=8192 \ 68 | -e HOST_STORAGE_PATH=$(pwd)/storage \ 69 | -e HOST_SHARED_PATH=$(pwd)/shared \ 70 | trycua/lumier:latest 71 | ``` 72 | 73 | With this setup, any files you place in the `shared` folder on your Mac will be accessible from within the macOS VM, and vice versa. 74 | 75 | ## Automating VM Startup with on-logon.sh 76 | 77 | You can automatically run scripts when the VM starts up by placing an `on-logon.sh` script in the shared folder's lifecycle directory. This is useful for setting up your VM environment each time it starts. 78 | 79 | ```bash 80 | # Create the lifecycle directory in your shared folder 81 | mkdir -p shared/lifecycle 82 | 83 | # Create a sample on-logon.sh script 84 | cat > shared/lifecycle/on-logon.sh << 'EOF' 85 | #!/usr/bin/env bash 86 | 87 | # Create a file on the desktop 88 | echo "Hello from Lumier!" > /Users/lume/Desktop/hello_lume.txt 89 | 90 | # You can add more commands to execute at VM startup 91 | # For example: 92 | # - Configure environment variables 93 | # - Start applications 94 | # - Mount network drives 95 | # - Set up development environments 96 | EOF 97 | 98 | # Make the script executable 99 | chmod +x shared/lifecycle/on-logon.sh 100 | ``` 101 | 102 | The script will be automatically executed when the VM starts up. It runs in the VM context and has access to: 103 | 104 | - The `/Users/lume` user directory (home directory in the VM) 105 | - The shared folder at `/Volumes/My Shared Files` inside the VM 106 | - Any resources available to the VM 107 | 108 | This feature enables automation of VM setup without modifying the base VM image. 109 | 110 | ## Configuration Options 111 | 112 | When running Lumier, you'll need to configure a few things: 113 | 114 | - **Port forwarding** (`-p 8006:8006`): Makes the VM's VNC interface accessible in your browser. If port 8006 is already in use, you can use a different port like `-p 8007:8006`. 115 | 116 | - **Environment variables** (`-e`): Configure your VM settings: 117 | - `VM_NAME`: A name for your virtual machine 118 | - `VERSION`: The macOS image to use 119 | - `CPU_CORES`: Number of CPU cores to allocate 120 | - `RAM_SIZE`: Memory in MB to allocate 121 | - `HOST_STORAGE_PATH`: Path to save VM state (when using persistent storage) 122 | - `HOST_SHARED_PATH`: Path to the shared folder (optional) 123 | 124 | - **Background service**: The `lume serve` service should be running on your host (starts automatically when you install Lume using the `install.sh` script above). ``` -------------------------------------------------------------------------------- /libs/typescript/agent/src/types.ts: -------------------------------------------------------------------------------- ```typescript 1 | // #region Request 2 | export type ConnectionType = 'http' | 'https' | 'peer'; 3 | export interface AgentClientOptions { 4 | timeout?: number; 5 | retries?: number; 6 | /** Optional CUA API key to send as X-API-Key header for HTTP requests */ 7 | apiKey?: string; 8 | } 9 | // Request types matching the Python proxy API 10 | export interface AgentRequest { 11 | model: string; 12 | input: string | AgentMessage[]; 13 | agent_kwargs?: { 14 | save_trajectory?: boolean; 15 | verbosity?: number; 16 | [key: string]: any; 17 | }; 18 | computer_kwargs?: { 19 | os_type?: string; 20 | provider_type?: string; 21 | [key: string]: any; 22 | }; 23 | /** 24 | * Optional per-request environment variable overrides. 25 | * Keys and values are strings and will be forwarded to the backend proxy. 26 | */ 27 | env?: Record<string, string>; 28 | } 29 | // #endregion 30 | 31 | 32 | // #region Response 33 | // Response types 34 | export interface AgentResponse { 35 | output: AgentMessage[]; 36 | usage: Usage; 37 | status: 'completed' | 'failed'; 38 | error?: string; 39 | } 40 | // Usage information 41 | export interface Usage { 42 | prompt_tokens: number; 43 | completion_tokens: number; 44 | total_tokens: number; 45 | response_cost: number; 46 | } 47 | // #endregion 48 | 49 | 50 | 51 | // #region Messages 52 | // Agent message types - can be one of several different message types 53 | export type AgentMessage = 54 | | UserMessage 55 | | AssistantMessage 56 | | ReasoningMessage 57 | | ComputerCallMessage 58 | | ComputerCallOutputMessage 59 | | FunctionCallMessage 60 | | FunctionCallOutputMessage; 61 | // Input message 62 | export interface UserMessage { 63 | type?: 'message'; 64 | role: 'user' | 'system' | 'developer'; 65 | content: string | InputContent[]; 66 | } 67 | // Output message 68 | export interface AssistantMessage { 69 | type: 'message'; 70 | role: 'assistant'; 71 | content: OutputContent[]; 72 | } 73 | // Output reasoning/thinking message 74 | export interface ReasoningMessage { 75 | type: 'reasoning'; 76 | summary: SummaryContent[]; 77 | } 78 | // Output computer action call 79 | export interface ComputerCallMessage { 80 | type: 'computer_call'; 81 | call_id: string; 82 | status: 'completed' | 'failed' | 'pending'; 83 | action: ComputerAction; 84 | } 85 | // Output computer action result (always a screenshot) 86 | export interface ComputerCallOutputMessage { 87 | type: 'computer_call_output'; 88 | call_id: string; 89 | output: ComputerResultContent; 90 | } 91 | // Output function call 92 | export interface FunctionCallMessage { 93 | type: 'function_call'; 94 | call_id: string; 95 | status: 'completed' | 'failed' | 'pending'; 96 | name: string; 97 | arguments: string; // JSON dict of kwargs 98 | } 99 | // Output function call result (always text) 100 | export interface FunctionCallOutputMessage { 101 | type: 'function_call_output'; 102 | call_id: string; 103 | output: string; 104 | } 105 | // #endregion 106 | 107 | 108 | 109 | // #region Message Content 110 | export interface InputContent { 111 | type: 'input_image' | 'input_text'; 112 | text?: string; 113 | image_url?: string; 114 | } 115 | export interface OutputContent { 116 | type: 'output_text'; 117 | text: string; 118 | } 119 | export interface SummaryContent { 120 | type: 'summary_text'; 121 | text: string; 122 | } 123 | export interface ComputerResultContent { 124 | type: 'computer_screenshot' | 'input_image'; 125 | image_url: string; 126 | } 127 | // #endregion 128 | 129 | 130 | 131 | // #region Actions 132 | export type ComputerAction = 133 | | ComputerActionOpenAI 134 | | ComputerActionAnthropic; 135 | // OpenAI Computer Actions 136 | export type ComputerActionOpenAI = 137 | | ClickAction 138 | | DoubleClickAction 139 | | DragAction 140 | | KeyPressAction 141 | | MoveAction 142 | | ScreenshotAction 143 | | ScrollAction 144 | | TypeAction 145 | | WaitAction; 146 | export interface ClickAction { 147 | type: 'click'; 148 | button: 'left' | 'right' | 'wheel' | 'back' | 'forward'; 149 | x: number; 150 | y: number; 151 | } 152 | export interface DoubleClickAction { 153 | type: 'double_click'; 154 | button?: 'left' | 'right' | 'wheel' | 'back' | 'forward'; 155 | x: number; 156 | y: number; 157 | } 158 | export interface DragAction { 159 | type: 'drag'; 160 | button?: 'left' | 'right' | 'wheel' | 'back' | 'forward'; 161 | path: Array<[number, number]>; 162 | } 163 | export interface KeyPressAction { 164 | type: 'keypress'; 165 | keys: string[]; 166 | } 167 | export interface MoveAction { 168 | type: 'move'; 169 | x: number; 170 | y: number; 171 | } 172 | export interface ScreenshotAction { 173 | type: 'screenshot'; 174 | } 175 | export interface ScrollAction { 176 | type: 'scroll'; 177 | scroll_x: number; 178 | scroll_y: number; 179 | x: number; 180 | y: number; 181 | } 182 | export interface TypeAction { 183 | type: 'type'; 184 | text: string; 185 | } 186 | export interface WaitAction { 187 | type: 'wait'; 188 | } 189 | // Anthropic Computer Actions 190 | export type ComputerActionAnthropic = 191 | | LeftMouseDownAction 192 | | LeftMouseUpAction; 193 | export interface LeftMouseDownAction { 194 | type: 'left_mouse_down'; 195 | x: number; 196 | y: number; 197 | } 198 | export interface LeftMouseUpAction { 199 | type: 'left_mouse_up'; 200 | x: number; 201 | y: number; 202 | } 203 | // #endregion ``` -------------------------------------------------------------------------------- /libs/python/agent/example.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Example usage of the agent library with docstring-based tool definitions. 3 | """ 4 | 5 | import asyncio 6 | import logging 7 | 8 | from agent import ComputerAgent 9 | from computer import Computer 10 | from computer.helpers import sandboxed 11 | 12 | @sandboxed() 13 | def read_file(location: str) -> str: 14 | """Read contents of a file 15 | 16 | Parameters 17 | ---------- 18 | location : str 19 | Path to the file to read 20 | 21 | Returns 22 | ------- 23 | str 24 | Contents of the file or error message 25 | """ 26 | try: 27 | with open(location, 'r') as f: 28 | return f.read() 29 | except Exception as e: 30 | return f"Error reading file: {str(e)}" 31 | 32 | def save_note(content: str, filename: str = "note.txt") -> str: 33 | """Save content to a note file 34 | 35 | Parameters 36 | ---------- 37 | content : str 38 | Content to save to the file 39 | filename : str, optional 40 | Name of the file to save to (default is "note.txt") 41 | 42 | Returns 43 | ------- 44 | str 45 | Success or error message 46 | """ 47 | try: 48 | with open(filename, 'w') as f: 49 | f.write(content) 50 | return f"Saved note to {filename}" 51 | except Exception as e: 52 | return f"Error saving note: {str(e)}" 53 | 54 | def calculate(a: int, b: int) -> int: 55 | """Calculate the sum of two integers 56 | 57 | Parameters 58 | ---------- 59 | a : int 60 | First integer 61 | b : int 62 | Second integer 63 | 64 | Returns 65 | ------- 66 | int 67 | Sum of the two integers 68 | """ 69 | return a + b 70 | 71 | async def main(): 72 | """Example usage of ComputerAgent with different models""" 73 | 74 | # Example 1: Using Claude with computer and custom tools 75 | print("=== Example 1: Claude with Computer ===") 76 | 77 | import os 78 | import dotenv 79 | import json 80 | dotenv.load_dotenv() 81 | 82 | assert os.getenv("CUA_CONTAINER_NAME") is not None, "CUA_CONTAINER_NAME is not set" 83 | assert os.getenv("CUA_API_KEY") is not None, "CUA_API_KEY is not set" 84 | 85 | async with Computer( 86 | os_type="linux", 87 | provider_type="cloud", 88 | name=os.getenv("CUA_CONTAINER_NAME") or "", 89 | api_key=os.getenv("CUA_API_KEY") or "" 90 | ) as computer: 91 | agent = ComputerAgent( 92 | # Supported models: 93 | 94 | # == OpenAI CUA (computer-use-preview) == 95 | model="openai/computer-use-preview", 96 | 97 | # == Anthropic CUA (Claude > 3.5) == 98 | # model="anthropic/claude-opus-4-20250514", 99 | # model="anthropic/claude-sonnet-4-20250514", 100 | # model="anthropic/claude-3-7-sonnet-20250219", 101 | # model="anthropic/claude-3-5-sonnet-20241022", 102 | 103 | # == UI-TARS == 104 | # model="huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B", 105 | # TODO: add local mlx provider 106 | # model="mlx-community/UI-TARS-1.5-7B-6bit", 107 | # model="ollama_chat/0000/ui-tars-1.5-7b", 108 | 109 | # == Omniparser + Any LLM == 110 | # model="omniparser+..." 111 | # model="omniparser+anthropic/claude-opus-4-20250514", 112 | 113 | tools=[computer], 114 | only_n_most_recent_images=3, 115 | verbosity=logging.INFO, 116 | trajectory_dir="trajectories", 117 | use_prompt_caching=True, 118 | max_trajectory_budget={ "max_budget": 1.0, "raise_error": True, "reset_after_each_run": False }, 119 | ) 120 | 121 | history = [] 122 | while True: 123 | user_input = input("> ") 124 | history.append({"role": "user", "content": user_input}) 125 | 126 | # Non-streaming usage 127 | async for result in agent.run(history, stream=False): 128 | history += result["output"] 129 | 130 | # # Print output 131 | # for item in result["output"]: 132 | # if item["type"] == "message": 133 | # print(item["content"][0]["text"]) 134 | # elif item["type"] == "computer_call": 135 | # action = item["action"] 136 | # action_type = action["type"] 137 | # action_args = {k: v for k, v in action.items() if k != "type"} 138 | # print(f"{action_type}({action_args})") 139 | # elif item["type"] == "function_call": 140 | # action = item["name"] 141 | # action_args = item["arguments"] 142 | # print(f"{action}({action_args})") 143 | # elif item["type"] == "function_call_output": 144 | # print("===>", item["output"]) 145 | 146 | if __name__ == "__main__": 147 | asyncio.run(main()) ``` -------------------------------------------------------------------------------- /blog/trajectory-viewer.md: -------------------------------------------------------------------------------- ```markdown 1 | # Trajectory Viewer for Cua 2 | 3 | *Published on May 13, 2025 by Dillon DuPont* 4 | 5 | Don’t forget to check out [Part 1: Building your own Computer-Use Operator](build-your-own-operator-on-macos-1) and [Part 2: Using the Agent framework](build-your-own-operator-on-macos-2) for setting up your Cua environment and basic tips and tricks! 6 | 7 | ## Introduction 8 | 9 | Okay, so you’ve gotten your environment up and also tested a few agent runs. You’ll likely have encountered cases where your agent was successful at doing some tasks but also places where it got stuck or outright failed. 10 | Now what? 11 | If you’ve ever wondered exactly what your computer agent is doing and why it sometimes doesn’t do what you expected, then the Trajectory Viewer for Cua is here to help! Whether you’re a seasoned developer or someone who just wants to dive in and see results, this tool makes it easy to explore every step your agent takes on your screen. 12 | Plus, if you want to start thinking about generating data to train your own agentic model (we’ll cover training in an upcoming blog, so look forward to it), then our Trajectory Viewer might be for you. 13 | 14 | ## So, what’s a “trajectory”? 15 | 16 | Think of a trajectory as a detailed video recording of your agent’s journey: 17 | 18 | - **Observations**: What did the agent see (the exact screen content) at each point in time? 19 | - **Actions**: What clicks, keystrokes, or commands did it perform in response? 20 | - **Decisions**: Which options did it choose, and why? 21 | Especially for longer and more complex tasks, your agent will make multiple steps, take multiple actions, and make multiple observations. By examining this record, you can pinpoint where things go right, and more importantly, where they go wrong. 22 | 23 | ## So, what’s Cua’s Trajectory Viewer and why use it? 24 | 25 | The Trajectory Player for Cua is a GUI tool that helps you explore saved trajectories generated from your Cua computer agent runs. This tool provides a powerful way to: 26 | 27 | - **Debug your agents**: See exactly what your agent saw to reproduce bugs 28 | - **Analyze failure cases**: Identify the moment when your agent went off-script 29 | - **Collect training data**: Export your trajectories for your own processing, training, and more! 30 | 31 | The viewer allows you to see exactly what your agent observed and how it interacted with the computer all through your browser. 32 | 33 | ## Opening Trajectory Viewer in 3 Simple Steps 34 | 35 | 1. **Visit**: Open your browser and go to [https://www.trycua.com/trajectory-viewer](https://www.trycua.com/trajectory-viewer). 36 | 2. **Upload**: Drag and drop a trajectories folder or click Select Folder. 37 | 3. **Explore**: View your agent’s trajectories! All data stays in your browser unless you give permission otherwise. 38 | 39 |  40 | 41 | ## Recording a Trajectory 42 | 43 | ### Using the Gradio UI 44 | 45 | The simplest way to create agent trajectories is through the [Cua Agent Gradio UI](https://www.trycua.com/docs/quickstart-ui) by checking the "Save Trajectory" option. 46 | 47 | ### Using the ComputerAgent API 48 | 49 | Trajectories are saved by default when using the ComputerAgent API: 50 | 51 | ```python 52 | agent.run("book a flight for me") 53 | ``` 54 | 55 | You can explicitly control trajectory saving with the `save_trajectory` parameter: 56 | 57 | ```python 58 | from cua import ComputerAgent 59 | 60 | agent = ComputerAgent(save_trajectory=True) 61 | agent.run("search for hotels in Boston") 62 | ``` 63 | 64 | Each trajectory folder is saved in a `trajectories` directory with a timestamp format, for example: `trajectories/20250501_222749` 65 | 66 | ## Exploring and Analyzing Trajectories 67 | 68 | Our Trajectory Viewer is designed to allow for thorough analysis and debugging in a friendly way. Once loaded, the viewer presents: 69 | 70 | - **Timeline Slider**: Jump to any step in the session 71 | - **Screen Preview**: See exactly what the agent saw 72 | - **Action Details**: Review clicks, keypresses, and API calls 73 | - **Logs & Metadata**: Inspect debug logs or performance stats 74 | 75 | Use these features to: 76 | 77 | - Step through each action and observation; understand your agent’s decision-making 78 | - Understand why and where your agent failed 79 | - Collect insights for improving your instructions, prompts, tasks, agent, etc. 80 | 81 | The trajectory viewer provides a visual interface for stepping through each action your agent took, making it easy to see what your agent “sees”. 82 | 83 | ## Getting Started 84 | 85 | Ready to see your agent in action? Head over to the Trajectory Viewer and load up your first session. Debug smarter, train faster, and stay in control (all within your browser). 86 | 87 | Happy tinkering and Cua on! 88 | 89 | Have questions or want to share feedback? Join our community on Discord or open an issue on GitHub. 90 | ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/supported-agents/composed-agents.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Composed Agents 3 | description: Combine grounding models with any LLM for computer-use capabilities 4 | --- 5 | 6 | Composed agents combine the best of both worlds: specialized grounding models for precise click prediction and powerful LLMs for task planning and reasoning. 7 | 8 | Use the format `"grounding_model+planning_model"` to create a composed agent with any vision-enabled LiteLLM-compatible model. 9 | 10 | ## How Composed Agents Work 11 | 12 | 1. **Planning Phase**: The planning model (LLM) analyzes the task and decides what actions to take (e.g., `click("find the login button")`, `type("username")`) 13 | 2. **Grounding Phase**: The grounding model converts element descriptions to precise coordinates 14 | 3. **Execution**: Actions are performed using the predicted coordinates 15 | 16 | ## Supported Grounding Models 17 | 18 | Any model that supports `predict_click()` can be used as the grounding component. See the full list on [Grounding Models](./grounding-models). 19 | 20 | - OpenCUA: `huggingface-local/xlangai/OpenCUA-{7B,32B}` 21 | - GTA1 family: `huggingface-local/HelloKKMe/GTA1-{7B,32B,72B}` 22 | - Holo 1.5 family: `huggingface-local/Hcompany/Holo1.5-{3B,7B,72B}` 23 | - InternVL 3.5 family: `huggingface-local/OpenGVLab/InternVL3_5-{1B,2B,4B,8B,...}` 24 | - UI‑TARS 1.5: `huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B` (also supports full CU) 25 | - OmniParser (OCR): `omniparser` (requires combination with a LiteLLM vision model) 26 | - Moondream3: `moondream3` (requires combination with a LiteLLM vision/text model) 27 | 28 | ## Supported Planning Models 29 | 30 | Any vision-enabled LiteLLM-compatible model can be used as the planning component: 31 | 32 | - Any All‑in‑one CUA (planning-capable). See [All‑in‑one CUAs](./computer-use-agents). 33 | - Any VLM via LiteLLM providers: `anthropic/*`, `openai/*`, `openrouter/*`, `gemini/*`, `vertex_ai/*`, `huggingface-local/*`, `mlx/*`, etc. 34 | - Examples: 35 | - **Anthropic**: `anthropic/claude-3-5-sonnet-20241022`, `anthropic/claude-opus-4-1-20250805` 36 | - **OpenAI**: `openai/gpt-5`, `openai/gpt-o3`, `openai/gpt-4o` 37 | - **Google**: `gemini/gemini-1.5-pro`, `vertex_ai/gemini-pro-vision` 38 | - **Local models**: Any Hugging Face vision-language model 39 | 40 | ## Usage Examples 41 | 42 | ### GTA1 + GPT-5 43 | 44 | Use Google's Gemini for planning with specialized grounding: 45 | 46 | ```python 47 | agent = ComputerAgent( 48 | "huggingface-local/HelloKKMe/GTA1-7B+openai/gpt-5", 49 | tools=[computer] 50 | ) 51 | 52 | async for _ in agent.run("Take a screenshot, analyze the UI, and click on the most prominent button"): 53 | pass 54 | ``` 55 | 56 | ### GTA1 + Claude 3.5 Sonnet 57 | 58 | Combine state-of-the-art grounding with powerful reasoning: 59 | 60 | ```python 61 | agent = ComputerAgent( 62 | "huggingface-local/HelloKKMe/GTA1-7B+anthropic/claude-3-5-sonnet-20241022", 63 | tools=[computer] 64 | ) 65 | 66 | async for _ in agent.run("Open Firefox, navigate to github.com, and search for 'computer-use'"): 67 | pass 68 | # Success! 🎉 69 | # - Claude 3.5 Sonnet plans the sequence of actions 70 | # - GTA1-7B provides precise click coordinates for each UI element 71 | ``` 72 | 73 | ### UI-TARS + GPT-4o 74 | 75 | Combine two different vision models for enhanced capabilities: 76 | 77 | ```python 78 | agent = ComputerAgent( 79 | "huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B+openai/gpt-4o", 80 | tools=[computer] 81 | ) 82 | 83 | async for _ in agent.run("Help me fill out this form with my personal information"): 84 | pass 85 | ``` 86 | 87 | ### Moondream3 + GPT-4o 88 | 89 | Use the built-in Moondream3 grounding with any planning model. Moondream3 will detect UI elements on the latest screenshot, label them, and provide a user message listing detected element names. 90 | 91 | ```python 92 | from agent import ComputerAgent 93 | from computer import computer 94 | 95 | agent = ComputerAgent( 96 | "moondream3+openai/gpt-4o", 97 | tools=[computer] 98 | ) 99 | 100 | async for _ in agent.run("Close the settings window, then open the Downloads folder"): 101 | pass 102 | ``` 103 | 104 | ## Benefits of Composed Agents 105 | 106 | - **Specialized Grounding**: Use models optimized for click prediction accuracy 107 | - **Flexible Planning**: Choose any LLM for task reasoning and planning 108 | - **Cost Optimization**: Use smaller grounding models with larger planning models only when needed 109 | - **Performance**: Leverage the strengths of different model architectures 110 | 111 | ## Capabilities 112 | 113 | Composed agents support both capabilities: 114 | 115 | ```python 116 | agent = ComputerAgent("huggingface-local/HelloKKMe/GTA1-7B+anthropic/claude-3-5-sonnet-20241022") 117 | 118 | # Full computer-use agent capabilities 119 | async for _ in agent.run("Complete this online form"): 120 | pass 121 | 122 | # Direct click prediction (uses grounding model only) 123 | coords = agent.predict_click("find the submit button") 124 | ``` 125 | 126 | --- 127 | 128 | For more information on individual model capabilities, see [Computer-Use Agents](./computer-use-agents) and [Grounding Models](./grounding-models). 129 | ``` -------------------------------------------------------------------------------- /blog/composite-agents.md: -------------------------------------------------------------------------------- ```markdown 1 | # Announcing Cua Agent framework 0.4 and Composite Agents 2 | 3 | *Published on August 26, 2025 by Dillon DuPont* 4 | 5 | <img src="./assets/composite-agents.png" alt="Composite Agents"> 6 | 7 | So you want to build an agent that can use a computer. Great! You've probably discovered that there are now dozens of different AI models that claim they can click GUI buttons and fill out forms. Less great: actually getting them to work together is like trying to coordinate a group project where everyone speaks a different language and has invented seventeen different ways to say "click here". 8 | 9 | Here's the thing about new GUI models: they're all special snowflakes. One model wants you to feed it images and expects coordinates back as percentages from 0 to 1. Another wants absolute pixel coordinates. A third model has invented its own numeral system with `<|loc095|><|loc821|>` tokens inside tool calls. Some models output Python code that calls `pyautogui.click(x, y)`. Others will start hallucinating coordinates if you forget to format all previous messages within a very specific GUI system prompt. 10 | 11 | This is the kind of problem that makes you wonder if we're building the future of computing or just recreating the Tower of Babel with more GPUs. 12 | 13 | ## What we fixed 14 | 15 | Agent framework 0.4 solves this by doing something radical: making all these different models speak the same language. 16 | 17 | Instead of writing separate code for each model's peculiarities, you now just pick a model with a string like `"anthropic/claude-3-5-sonnet-20241022"` or `"huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B"`, and everything else Just Works™. Behind the scenes, we handle all the coordinate normalization, token parsing, and image preprocessing so you don't have to. 18 | 19 | ```python 20 | # This works the same whether you're using Anthropic, OpenAI, or that new model you found on Hugging Face 21 | agent = ComputerAgent( 22 | model="anthropic/claude-3-5-sonnet-20241022", # or any other supported model 23 | tools=[computer] 24 | ) 25 | ``` 26 | 27 | The output format is consistent across all providers (OpenAI, Anthropic, Vertex, Hugging Face, OpenRouter, etc.). No more writing different parsers for each model's creative interpretation of how to represent a mouse click. 28 | 29 | ## Composite Agents: Two Brains Are Better Than One 30 | 31 | Here's where it gets interesting. We realized that you don't actually need one model to be good at everything. Some models are excellent at understanding what's on the screen—they can reliably identify buttons and text fields and figure out where to click. Other models are great at planning and reasoning but might be a bit fuzzy on the exact pixel coordinates. 32 | 33 | So we let you combine them with a `+` sign: 34 | 35 | ```python 36 | agent = ComputerAgent( 37 | # specify the grounding model first, then the planning model 38 | model="huggingface-local/HelloKKMe/GTA1-7B+huggingface-local/OpenGVLab/InternVL3_5-8B", 39 | tools=[computer] 40 | ) 41 | ``` 42 | 43 | This creates a composite agent where one model (the "grounding" model) handles the visual understanding and precise UI interactions, while the other (the "planning" model) handles the high-level reasoning and task orchestration. It's like having a pilot and a navigator, except they're both AI models and they're trying to help you star a GitHub repository. 44 | 45 | You can even take a model that was never designed for computer use—like GPT-4o—and give it GUI capabilities by pairing it with a specialized vision model: 46 | 47 | ```python 48 | agent = ComputerAgent( 49 | model="huggingface-local/HelloKKMe/GTA1-7B+openai/gpt-4o", 50 | tools=[computer] 51 | ) 52 | ``` 53 | 54 | ## Example notebook 55 | 56 | For a full, ready-to-run demo (install deps, local computer using Docker, and a composed agent example), see the notebook: 57 | 58 | - https://github.com/trycua/cua/blob/models/opencua/notebooks/composite_agents_docker_nb.ipynb 59 | 60 | ## What's next 61 | 62 | We're building integration with HUD evals, allowing us to curate and benchmark model combinations. This will help us identify which composite agent pairs work best for different types of tasks, and provide you with tested recommendations rather than just throwing model names at the wall to see what sticks. 63 | 64 | If you try out version 0.4.x, we'd love to hear how it goes. Join us on Discord to share your results and let us know what model combinations work best for your projects. 65 | 66 | 67 | --- 68 | 69 | ## Links 70 | 71 | * **Composite Agent Docs:** [https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents](https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents) 72 | * **Discord:** [https://discord.gg/cua-ai](https://discord.gg/cua-ai) 73 | 74 | Questions or weird edge cases? Ping us on Discord—we’re curious to see what you build. ``` -------------------------------------------------------------------------------- /docs/content/docs/computer-sdk/computers.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Cua Computers 3 | description: Understanding cua computer types and connection methods 4 | --- 5 | 6 | <Callout>A corresponding <a href="https://github.com/trycua/cua/blob/main/notebooks/computer_nb.ipynb" target="_blank">Jupyter Notebook</a> and <a href="https://github.com/trycua/cua/tree/main/examples/computer-example-ts" target="_blank">NodeJS project</a> are available for this documentation.</Callout> 7 | 8 | Before we can automate apps using AI, we need to first connect to a Computer Server to give the AI a safe environment to execute workflows in. 9 | 10 | Cua Computers are preconfigured virtual machines running the Computer Server. They can be either macOS, Linux, or Windows. They're found in either a cloud-native container, or on your host desktop. 11 | 12 | ## Cloud Sandbox 13 | 14 | **Easiest & safest way to get started - works on any host OS** 15 | 16 | This is a Cloud Sandbox running the Computer Server. Get a container at [trycua.com](https://www.trycua.com/). 17 | 18 | <Tabs items={['Python', 'TypeScript']}> 19 | <Tab value="Python"> 20 | ```python 21 | from computer import Computer 22 | 23 | computer = Computer( 24 | os_type="linux", 25 | provider_type="cloud", 26 | name="your-sandbox-name", 27 | api_key="your-api-key" 28 | ) 29 | 30 | await computer.run() # Connect to the sandbox 31 | ``` 32 | 33 | </Tab> 34 | <Tab value="TypeScript"> 35 | ```typescript 36 | import { Computer, OSType } from '@trycua/computer'; 37 | 38 | const computer = new Computer({ 39 | osType: OSType.LINUX, 40 | name: "your-sandbox-name", 41 | apiKey: "your-api-key" 42 | }); 43 | 44 | await computer.run(); // Connect to the sandbox 45 | ``` 46 | 47 | </Tab> 48 | </Tabs> 49 | 50 | ## Linux on Docker 51 | 52 | **Run Linux desktop locally on macOS, Windows, or Linux hosts** 53 | 54 | Cua provides two Docker images for running Linux desktops: 55 | 56 | <Tabs items={['XFCE (Lightweight)', 'KASM (Full-Featured)']}> 57 | <Tab value="XFCE (Lightweight)"> 58 | 59 | **Recommended for most use cases** - lightweight XFCE desktop with Firefox 60 | 61 | 1. Install Docker Desktop or Docker Engine 62 | 63 | 2. Pull the CUA XFCE image 64 | 65 | ```bash 66 | docker pull --platform=linux/amd64 trycua/cua-xfce:latest 67 | ``` 68 | 69 | 3. Connect with Computer 70 | 71 | ```python 72 | from computer import Computer 73 | 74 | computer = Computer( 75 | os_type="linux", 76 | provider_type="docker", 77 | image="trycua/cua-xfce:latest", 78 | name="my-xfce-container" 79 | ) 80 | 81 | await computer.run() # Launch & connect to Docker sandbox 82 | ``` 83 | 84 | </Tab> 85 | <Tab value="KASM (Full-Featured)"> 86 | 87 | **Full-featured Ubuntu desktop** with additional applications 88 | 89 | 1. Install Docker Desktop or Docker Engine 90 | 91 | 2. Build or pull the CUA KASM image 92 | 93 | ```bash 94 | # Option 1: Pull from Docker Hub 95 | docker pull --platform=linux/amd64 trycua/cua-ubuntu:latest 96 | 97 | # Option 2: Build locally 98 | cd libs/kasm 99 | docker build -t cua-ubuntu:latest . 100 | ``` 101 | 102 | 3. Connect with Computer 103 | 104 | ```python 105 | from computer import Computer 106 | 107 | computer = Computer( 108 | os_type="linux", 109 | provider_type="docker", 110 | image="trycua/cua-ubuntu:latest", 111 | name="my-kasm-container" 112 | ) 113 | 114 | await computer.run() # Launch & connect to Docker sandbox 115 | ``` 116 | 117 | </Tab> 118 | </Tabs> 119 | 120 | ## Windows Sandbox 121 | 122 | **Windows hosts only - requires Windows 10 Pro/Enterprise or Windows 11** 123 | 124 | 1. Enable Windows Sandbox 125 | 2. Install pywinsandbox dependency 126 | 127 | ```bash 128 | pip install -U git+git://github.com/karkason/pywinsandbox.git 129 | ``` 130 | 131 | 3. Connect with Computer 132 | 133 | ```python 134 | from computer import Computer 135 | 136 | computer = Computer( 137 | os_type="windows", 138 | provider_type="winsandbox", 139 | ephemeral=True # Windows Sandbox is always ephemeral 140 | ) 141 | 142 | await computer.run() # Launch & connect to Windows Sandbox 143 | ``` 144 | 145 | ## macOS VM 146 | 147 | **macOS hosts only - requires Lume CLI** 148 | 149 | 1. Install lume cli 150 | 151 | ```bash 152 | /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" 153 | ``` 154 | 155 | 2. Start a local cua macOS VM 156 | 157 | ```bash 158 | lume run macos-sequoia-cua:latest 159 | ``` 160 | 161 | 3. Connect with Computer 162 | 163 | ```python 164 | from computer import Computer 165 | 166 | computer = Computer( 167 | os_type="macos", 168 | provider_type="lume", 169 | name="macos-sequoia-cua:latest" 170 | ) 171 | 172 | await computer.run() # Launch & connect to the sandbox 173 | ``` 174 | 175 | ## Your host desktop 176 | 177 | You can also have agents control your desktop directly by running Computer Server without any containerization layer. Beware that AI models may perform risky actions. 178 | 179 | ```bash 180 | pip install cua-computer-server 181 | python -m computer_server 182 | ``` 183 | 184 | Connect with: 185 | 186 | <Tabs items={['Python']}> 187 | <Tab value="Python"> 188 | ```python 189 | 190 | computer = Computer(use_host_computer_server=True) 191 | await computer.run() # Connect to the host desktop 192 | 193 | ``` 194 | 195 | </Tab> 196 | </Tabs> 197 | ``` -------------------------------------------------------------------------------- /libs/lumier/src/bin/entry.sh: -------------------------------------------------------------------------------- ```bash 1 | #!/usr/bin/env bash 2 | 3 | # Configure SSH to prevent known hosts warnings 4 | export SSHPASS_PROMPT= 5 | export SSH_ASKPASS=/bin/echo 6 | # Set SSH quiet mode via the SSHPASS environment variable 7 | export SSHPASS_OPTS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR -q" 8 | 9 | # We'll enable strict error checking AFTER initialization 10 | # to prevent premature exits 11 | 12 | # Source configuration files 13 | CONFIG_DIR="/run/config" 14 | LIB_DIR="/run/lib" 15 | 16 | # Source constants if available 17 | if [ -f "${CONFIG_DIR}/constants.sh" ]; then 18 | source "${CONFIG_DIR}/constants.sh" 19 | fi 20 | 21 | # Import utilities 22 | for lib in "${LIB_DIR}"/*.sh; do 23 | if [ -f "$lib" ]; then 24 | source "$lib" 25 | fi 26 | done 27 | 28 | # Set VM_NAME to env or fallback to container name (from --name) 29 | if [ -z "${VM_NAME:-}" ]; then 30 | VM_NAME="$(cat /etc/hostname)" 31 | export VM_NAME 32 | fi 33 | 34 | # Set HOST_STORAGE_PATH to a lume ephemeral storage if not set 35 | if [ -z "${HOST_STORAGE_PATH:-}" ]; then 36 | HOST_STORAGE_PATH="ephemeral" 37 | 38 | # Tell user that ephemeral storage is being used 39 | echo "Using ephemeral storage. VM state will be lost when macOS cleans up temporary files." 40 | 41 | export HOST_STORAGE_PATH 42 | fi 43 | 44 | # Only check and report mountpoints in debug mode 45 | if [ "${LUMIER_DEBUG:-0}" == "1" ]; then 46 | if mountpoint -q /storage; then 47 | echo "/storage is mounted" 48 | fi 49 | if mountpoint -q /shared; then 50 | echo "/shared is mounted" 51 | fi 52 | # if mountpoint -q /data; then 53 | # echo "/data is mounted" 54 | # fi 55 | fi 56 | 57 | # Check if we're running as PID 1 (important for Docker signal handling) 58 | if [ $$ -ne 1 ]; then 59 | echo "Warning: This script is not running as PID 1 (current PID: $$)." 60 | echo "Docker signal handling may not work properly when stopped from Docker Desktop." 61 | fi 62 | 63 | # Log startup info 64 | echo "Lumier VM is starting..." 65 | 66 | # Cleanup function to ensure VM and noVNC proxy shutdown on container stop 67 | # Counter for signal handling 68 | SIGNAL_COUNT=0 69 | 70 | cleanup() { 71 | local signal_name=$1 72 | set +e # Don't exit on error in cleanup 73 | 74 | # Increment signal counter 75 | SIGNAL_COUNT=$((SIGNAL_COUNT + 1)) 76 | 77 | # If this is the first signal, try graceful shutdown 78 | if [ $SIGNAL_COUNT -eq 1 ]; then 79 | echo "[cleanup] Caught $signal_name signal, shutting down..." 80 | 81 | # Check if we're in the middle of an image pull 82 | if [[ "$PULL_IN_PROGRESS" == "1" ]]; then 83 | echo "[cleanup] Interrupted during image pull, skipping VM stop." 84 | else 85 | echo "[cleanup] Stopping VM..." 86 | stop_vm true 87 | fi 88 | 89 | # Attempt to clean up ephemeral storage if it's in the /private/tmp directory 90 | if [[ "$HOST_STORAGE_PATH" == "ephemeral" ]]; then 91 | # First check if VM actually exists 92 | VM_INFO=$(lume_get "$VM_NAME" "$HOST_STORAGE_PATH" "json" "false") 93 | 94 | # Only try VM deletion if VM exists and not in the middle of a pull 95 | if [[ "$PULL_IN_PROGRESS" != "1" && $VM_INFO != *"Virtual machine not found"* ]]; then 96 | echo "[cleanup] Cleaning up VM..." 97 | lume_delete "$VM_NAME" "$HOST_STORAGE_PATH" > /dev/null 2>&1 98 | fi 99 | fi 100 | else 101 | # For multiple signals, force an immediate exit 102 | echo "got $SIGNAL_COUNT SIGTERM/SIGINTs, forcefully exiting" 103 | fi 104 | 105 | # If we've received multiple signals, just exit immediately 106 | if [ $SIGNAL_COUNT -ge 3 ]; then 107 | exit 1 108 | fi 109 | 110 | # Exit with success for the first signal 111 | if [ $SIGNAL_COUNT -eq 1 ]; then 112 | exit 0 113 | fi 114 | } 115 | # Ensure we catch all typical container termination signals 116 | trap 'cleanup SIGTERM' SIGTERM 117 | trap 'cleanup SIGINT' SIGINT 118 | trap 'cleanup SIGHUP' SIGHUP 119 | 120 | # Now enable strict error handling after initialization 121 | set -euo pipefail 122 | 123 | # Start the VM with error handling 124 | if ! start_vm; then 125 | echo "ERROR: Failed to start VM!" >&2 126 | exit 1 127 | fi 128 | 129 | # Start noVNC for VNC access 130 | NOVNC_PID="" 131 | if [ -n "${VNC_PORT:-}" ] && [ -n "${VNC_PASSWORD:-}" ]; then 132 | # Only show this in debug mode 133 | if [ "${LUMIER_DEBUG:-0}" == "1" ]; then 134 | echo "Starting noVNC proxy with optimized color settings..." 135 | fi 136 | ${NOVNC_PATH}/utils/novnc_proxy --vnc host.docker.internal:${VNC_PORT} --listen 8006 --web ${NOVNC_PATH} > /dev/null 2>&1 & 137 | NOVNC_PID=$! 138 | disown $NOVNC_PID 139 | echo "noVNC interface available at: http://localhost:8006/vnc.html?password=${VNC_PASSWORD}&autoconnect=true (replace PORT with the port you forwarded to 8006)" 140 | fi 141 | 142 | echo "Lumier is running. Press Ctrl+C to stop." 143 | 144 | # Instead of tail -f /dev/null, use a wait loop that can be interrupted by signals 145 | while true; do 146 | # Sleep in small increments to make signal handling more responsive 147 | sleep 1 & 148 | wait $! 149 | # Break the loop if we've received a signal 150 | if [ $SIGNAL_COUNT -gt 0 ]; then 151 | break 152 | fi 153 | done ``` -------------------------------------------------------------------------------- /libs/lume/src/Server/Requests.swift: -------------------------------------------------------------------------------- ```swift 1 | import ArgumentParser 2 | import Foundation 3 | import Virtualization 4 | 5 | struct RunVMRequest: Codable { 6 | let noDisplay: Bool? 7 | let sharedDirectories: [SharedDirectoryRequest]? 8 | let recoveryMode: Bool? 9 | let storage: String? 10 | 11 | struct SharedDirectoryRequest: Codable { 12 | let hostPath: String 13 | let readOnly: Bool? 14 | } 15 | 16 | func parse() throws -> [SharedDirectory] { 17 | guard let sharedDirectories = sharedDirectories else { return [] } 18 | 19 | return try sharedDirectories.map { dir -> SharedDirectory in 20 | // Validate that the host path exists and is a directory 21 | var isDirectory: ObjCBool = false 22 | guard FileManager.default.fileExists(atPath: dir.hostPath, isDirectory: &isDirectory), 23 | isDirectory.boolValue 24 | else { 25 | throw ValidationError( 26 | "Host path does not exist or is not a directory: \(dir.hostPath)") 27 | } 28 | 29 | return SharedDirectory( 30 | hostPath: dir.hostPath, 31 | tag: VZVirtioFileSystemDeviceConfiguration.macOSGuestAutomountTag, 32 | readOnly: dir.readOnly ?? false 33 | ) 34 | } 35 | } 36 | } 37 | 38 | struct PullRequest: Codable { 39 | let image: String 40 | let name: String? 41 | var registry: String 42 | var organization: String 43 | let storage: String? 44 | 45 | enum CodingKeys: String, CodingKey { 46 | case image, name, registry, organization, storage 47 | } 48 | 49 | init(from decoder: Decoder) throws { 50 | let container = try decoder.container(keyedBy: CodingKeys.self) 51 | image = try container.decode(String.self, forKey: .image) 52 | name = try container.decodeIfPresent(String.self, forKey: .name) 53 | registry = try container.decodeIfPresent(String.self, forKey: .registry) ?? "ghcr.io" 54 | organization = try container.decodeIfPresent(String.self, forKey: .organization) ?? "trycua" 55 | storage = try container.decodeIfPresent(String.self, forKey: .storage) 56 | } 57 | } 58 | 59 | struct CreateVMRequest: Codable { 60 | let name: String 61 | let os: String 62 | let cpu: Int 63 | let memory: String 64 | let diskSize: String 65 | let display: String 66 | let ipsw: String? 67 | let storage: String? 68 | 69 | func parse() throws -> (memory: UInt64, diskSize: UInt64) { 70 | return ( 71 | memory: try parseSize(memory), 72 | diskSize: try parseSize(diskSize) 73 | ) 74 | } 75 | } 76 | 77 | struct SetVMRequest: Codable { 78 | let cpu: Int? 79 | let memory: String? 80 | let diskSize: String? 81 | let display: String? 82 | let storage: String? 83 | 84 | func parse() throws -> (memory: UInt64?, diskSize: UInt64?, display: VMDisplayResolution?) { 85 | return ( 86 | memory: try memory.map { try parseSize($0) }, 87 | diskSize: try diskSize.map { try parseSize($0) }, 88 | display: try display.map { 89 | guard let resolution = VMDisplayResolution(string: $0) else { 90 | throw ValidationError( 91 | "Invalid display resolution format: \($0). Expected format: WIDTHxHEIGHT") 92 | } 93 | return resolution 94 | } 95 | ) 96 | } 97 | } 98 | 99 | struct CloneRequest: Codable { 100 | let name: String 101 | let newName: String 102 | let sourceLocation: String? 103 | let destLocation: String? 104 | } 105 | 106 | struct PushRequest: Codable { 107 | let name: String // Name of the local VM 108 | let imageName: String // Base name for the image in the registry 109 | let tags: [String] // List of tags to push 110 | var registry: String // Registry URL 111 | var organization: String // Organization/user in the registry 112 | let storage: String? // Optional VM storage location or direct path 113 | var chunkSizeMb: Int // Chunk size 114 | // dryRun and reassemble are less common for API, default to false? 115 | // verbose is usually handled by server logging 116 | 117 | enum CodingKeys: String, CodingKey { 118 | case name, imageName, tags, registry, organization, storage, chunkSizeMb 119 | } 120 | 121 | // Provide default values for optional fields during decoding 122 | init(from decoder: Decoder) throws { 123 | let container = try decoder.container(keyedBy: CodingKeys.self) 124 | name = try container.decode(String.self, forKey: .name) 125 | imageName = try container.decode(String.self, forKey: .imageName) 126 | tags = try container.decode([String].self, forKey: .tags) 127 | registry = try container.decodeIfPresent(String.self, forKey: .registry) ?? "ghcr.io" 128 | organization = try container.decodeIfPresent(String.self, forKey: .organization) ?? "trycua" 129 | storage = try container.decodeIfPresent(String.self, forKey: .storage) 130 | chunkSizeMb = try container.decodeIfPresent(Int.self, forKey: .chunkSizeMb) ?? 512 131 | } 132 | } 133 | ``` -------------------------------------------------------------------------------- /libs/python/agent/benchmarks/contrib.md: -------------------------------------------------------------------------------- ```markdown 1 | # Contributing Reference Agent Implementations 2 | 3 | This guide explains how to add your own reference agent implementations to the benchmark system. 4 | 5 | ## Adding Reference Agent Implementations 6 | 7 | ### 1. Implement the ModelProtocol 8 | 9 | Create a new file in `models/` directory implementing the `ModelProtocol`: 10 | 11 | ```python 12 | from models.base import ModelProtocol 13 | from typing import Optional, Tuple 14 | from PIL import Image 15 | 16 | class YourModelName(ModelProtocol): 17 | def __init__(self, model_path: str): 18 | self.model_path = model_path 19 | self._model = None 20 | 21 | @property 22 | def model_name(self) -> str: 23 | return self.model_path 24 | 25 | async def load_model(self) -> None: 26 | """Load the model into memory.""" 27 | # Your model loading logic here 28 | pass 29 | 30 | async def unload_model(self) -> None: 31 | """Unload the model from memory.""" 32 | # Your model cleanup logic here 33 | pass 34 | 35 | async def predict_click(self, image: Image.Image, instruction: str) -> Optional[Tuple[int, int]]: 36 | """ 37 | Predict click coordinates for the given image and instruction. 38 | 39 | Args: 40 | image: PIL Image to analyze 41 | instruction: Text instruction describing what to click 42 | 43 | Returns: 44 | Tuple of (x, y) coordinates or None if prediction fails 45 | """ 46 | # Your prediction logic here 47 | return (x, y) # Return predicted coordinates 48 | ``` 49 | 50 | ### 2. Register Your Model 51 | 52 | Add your model to the `get_available_models()` function in `utils.py`: 53 | 54 | ```python 55 | def get_available_models() -> List[Union[str, ModelProtocol]]: 56 | models = [ 57 | # Computer Agent SDK providers 58 | "huggingface-local/HelloKKMe/GTA1-7B", 59 | 60 | # Reference implementations 61 | GTA1Model("HelloKKMe/GTA1-7B"), 62 | YourModelName("path/to/your/model"), # Add your model here 63 | ] 64 | return models 65 | ``` 66 | 67 | ### 3. Test Your Implementation 68 | 69 | Before submitting, test your model with the interactive tool: 70 | 71 | ```bash 72 | python interactive.py 73 | ``` 74 | 75 | This will help you verify that your model loads correctly and produces reasonable predictions. 76 | 77 | ## Example: Adding a New Model 78 | 79 | Here's a complete example of adding a hypothetical "MyVisionModel": 80 | 81 | 1. **Create `models/my_vision_model.py`:** 82 | ```python 83 | import torch 84 | from transformers import AutoModel, AutoProcessor 85 | from models.base import ModelProtocol 86 | from typing import Optional, Tuple 87 | from PIL import Image 88 | 89 | class MyVisionModel(ModelProtocol): 90 | def __init__(self, model_path: str): 91 | self.model_path = model_path 92 | self.model = None 93 | self.processor = None 94 | 95 | @property 96 | def model_name(self) -> str: 97 | return f"MyVisionModel({self.model_path})" 98 | 99 | async def load_model(self) -> None: 100 | """Load the model and processor.""" 101 | self.processor = AutoProcessor.from_pretrained(self.model_path) 102 | self.model = AutoModel.from_pretrained( 103 | self.model_path, 104 | torch_dtype=torch.float16, 105 | device_map="auto" 106 | ) 107 | 108 | async def unload_model(self) -> None: 109 | """Clean up model resources.""" 110 | del self.model 111 | del self.processor 112 | self.model = None 113 | self.processor = None 114 | torch.cuda.empty_cache() 115 | 116 | async def predict_click(self, image: Image.Image, instruction: str) -> Optional[Tuple[int, int]]: 117 | """Predict click coordinates.""" 118 | try: 119 | # Preprocess inputs 120 | inputs = self.processor( 121 | text=instruction, 122 | images=image, 123 | return_tensors="pt" 124 | ) 125 | 126 | # Run inference 127 | with torch.no_grad(): 128 | outputs = self.model(**inputs) 129 | 130 | # Extract coordinates (model-specific logic) 131 | x, y = self._extract_coordinates(outputs) 132 | return (int(x), int(y)) 133 | 134 | except Exception as e: 135 | print(f"Prediction failed: {e}") 136 | return None 137 | 138 | def _extract_coordinates(self, outputs): 139 | """Extract x, y coordinates from model outputs.""" 140 | # Your model-specific coordinate extraction logic 141 | pass 142 | ``` 143 | 144 | 2. **Update `models/__init__.py`:** 145 | ```python 146 | from .gta1 import GTA1Model 147 | from .my_vision_model import MyVisionModel 148 | 149 | __all__ = ["GTA1Model", "MyVisionModel"] 150 | ``` 151 | 152 | 3. **Update `utils.py`:** 153 | ```python 154 | from models import GTA1Model, MyVisionModel 155 | 156 | def get_available_models() -> List[Union[str, ModelProtocol]]: 157 | models = [ 158 | "huggingface-local/HelloKKMe/GTA1-7B", 159 | GTA1Model("HelloKKMe/GTA1-7B"), 160 | MyVisionModel("my-org/my-vision-model"), # Add here 161 | ] 162 | return models 163 | ``` 164 | ``` -------------------------------------------------------------------------------- /libs/lume/src/FileSystem/VMConfig.swift: -------------------------------------------------------------------------------- ```swift 1 | import ArgumentParser 2 | import Foundation 3 | import Virtualization 4 | 5 | /// Represents a shared directory configuration 6 | struct SharedDirectory: Codable { 7 | let hostPath: String 8 | let tag: String 9 | let readOnly: Bool 10 | 11 | var string: String { 12 | return "\(hostPath):\(tag):\(readOnly ? "ro" : "rw")" 13 | } 14 | } 15 | 16 | // MARK: - VMConfig 17 | struct VMConfig: Codable { 18 | 19 | // MARK: - Properties 20 | let os: String 21 | private var _cpuCount: Int? 22 | private var _memorySize: UInt64? 23 | private var _diskSize: UInt64? 24 | private var _macAddress: String? 25 | private var _display: VMDisplayResolution 26 | private var _hardwareModel: Data? 27 | private var _machineIdentifier: Data? 28 | 29 | // MARK: - Initialization 30 | init( 31 | os: String, 32 | cpuCount: Int? = nil, 33 | memorySize: UInt64? = nil, 34 | diskSize: UInt64? = nil, 35 | macAddress: String? = nil, 36 | display: String, 37 | hardwareModel: Data? = nil, 38 | machineIdentifier: Data? = nil 39 | ) throws { 40 | self.os = os 41 | self._cpuCount = cpuCount 42 | self._memorySize = memorySize 43 | self._diskSize = diskSize 44 | self._macAddress = macAddress 45 | self._display = VMDisplayResolution(string: display) ?? VMDisplayResolution(string: "1024x768")! 46 | self._hardwareModel = hardwareModel 47 | self._machineIdentifier = machineIdentifier 48 | } 49 | 50 | var display: VMDisplayResolution { 51 | get { _display } 52 | set { _display = newValue } 53 | } 54 | 55 | var cpuCount: Int? { 56 | get { _cpuCount } 57 | set { _cpuCount = newValue } 58 | } 59 | 60 | var memorySize: UInt64? { 61 | get { _memorySize } 62 | set { _memorySize = newValue } 63 | } 64 | 65 | var diskSize: UInt64? { 66 | get { _diskSize } 67 | set { _diskSize = newValue } 68 | } 69 | 70 | var hardwareModel: Data? { 71 | get { _hardwareModel } 72 | set { _hardwareModel = newValue } 73 | } 74 | 75 | var machineIdentifier: Data? { 76 | get { _machineIdentifier } 77 | set { _machineIdentifier = newValue } 78 | } 79 | 80 | var macAddress: String? { 81 | get { _macAddress } 82 | set { _macAddress = newValue } 83 | } 84 | 85 | mutating func setCpuCount(_ count: Int) { 86 | _cpuCount = count 87 | } 88 | 89 | mutating func setMemorySize(_ size: UInt64) { 90 | _memorySize = size 91 | } 92 | 93 | mutating func setDiskSize(_ size: UInt64) { 94 | _diskSize = size 95 | } 96 | 97 | mutating func setHardwareModel(_ hardwareModel: Data) { 98 | _hardwareModel = hardwareModel 99 | } 100 | 101 | mutating func setMachineIdentifier(_ machineIdentifier: Data) { 102 | _machineIdentifier = machineIdentifier 103 | } 104 | 105 | mutating func setMacAddress(_ newMacAddress: String) { 106 | self._macAddress = newMacAddress 107 | } 108 | 109 | mutating func setDisplay(_ newDisplay: VMDisplayResolution) { 110 | self._display = newDisplay 111 | } 112 | 113 | // MARK: - Codable 114 | enum CodingKeys: String, CodingKey { 115 | case _cpuCount = "cpuCount" 116 | case _memorySize = "memorySize" 117 | case _diskSize = "diskSize" 118 | case macAddress 119 | case display 120 | case _hardwareModel = "hardwareModel" 121 | case _machineIdentifier = "machineIdentifier" 122 | case os 123 | } 124 | 125 | init(from decoder: Decoder) throws { 126 | let container = try decoder.container(keyedBy: CodingKeys.self) 127 | 128 | os = try container.decode(String.self, forKey: .os) 129 | _cpuCount = try container.decodeIfPresent(Int.self, forKey: ._cpuCount) 130 | _memorySize = try container.decodeIfPresent(UInt64.self, forKey: ._memorySize) 131 | _diskSize = try container.decodeIfPresent(UInt64.self, forKey: ._diskSize) 132 | _macAddress = try container.decodeIfPresent(String.self, forKey: .macAddress) 133 | _display = VMDisplayResolution(string: try container.decode(String.self, forKey: .display))! 134 | _hardwareModel = try container.decodeIfPresent(Data.self, forKey: ._hardwareModel) 135 | _machineIdentifier = try container.decodeIfPresent(Data.self, forKey: ._machineIdentifier) 136 | } 137 | 138 | func encode(to encoder: Encoder) throws { 139 | var container = encoder.container(keyedBy: CodingKeys.self) 140 | 141 | try container.encodeIfPresent(os, forKey: .os) 142 | try container.encodeIfPresent(_cpuCount, forKey: ._cpuCount) 143 | try container.encodeIfPresent(_memorySize, forKey: ._memorySize) 144 | try container.encodeIfPresent(_diskSize, forKey: ._diskSize) 145 | try container.encodeIfPresent(_macAddress, forKey: .macAddress) 146 | try container.encode(display.string, forKey: .display) 147 | try container.encodeIfPresent(_hardwareModel, forKey: ._hardwareModel) 148 | try container.encodeIfPresent(_machineIdentifier, forKey: ._machineIdentifier) 149 | } 150 | } 151 | ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/callbacks/base.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Base callback handler interface for ComputerAgent preprocessing and postprocessing hooks. 3 | """ 4 | 5 | from abc import ABC, abstractmethod 6 | from typing import List, Dict, Any, Optional, Union 7 | 8 | 9 | class AsyncCallbackHandler(ABC): 10 | """ 11 | Base class for async callback handlers that can preprocess messages before 12 | the agent loop and postprocess output after the agent loop. 13 | """ 14 | 15 | async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None: 16 | """Called at the start of an agent run loop.""" 17 | pass 18 | 19 | async def on_run_end(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> None: 20 | """Called at the end of an agent run loop.""" 21 | pass 22 | 23 | async def on_run_continue(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> bool: 24 | """Called during agent run loop to determine if execution should continue. 25 | 26 | Args: 27 | kwargs: Run arguments 28 | old_items: Original messages 29 | new_items: New messages generated during run 30 | 31 | Returns: 32 | True to continue execution, False to stop 33 | """ 34 | return True 35 | 36 | async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: 37 | """ 38 | Called before messages are sent to the agent loop. 39 | 40 | Args: 41 | messages: List of message dictionaries to preprocess 42 | 43 | Returns: 44 | List of preprocessed message dictionaries 45 | """ 46 | return messages 47 | 48 | async def on_llm_end(self, output: List[Dict[str, Any]]) -> List[Dict[str, Any]]: 49 | """ 50 | Called after the agent loop returns output. 51 | 52 | Args: 53 | output: List of output message dictionaries to postprocess 54 | 55 | Returns: 56 | List of postprocessed output dictionaries 57 | """ 58 | return output 59 | 60 | async def on_computer_call_start(self, item: Dict[str, Any]) -> None: 61 | """ 62 | Called when a computer call is about to start. 63 | 64 | Args: 65 | item: The computer call item dictionary 66 | """ 67 | pass 68 | 69 | async def on_computer_call_end(self, item: Dict[str, Any], result: List[Dict[str, Any]]) -> None: 70 | """ 71 | Called when a computer call has completed. 72 | 73 | Args: 74 | item: The computer call item dictionary 75 | result: The result of the computer call 76 | """ 77 | pass 78 | 79 | async def on_function_call_start(self, item: Dict[str, Any]) -> None: 80 | """ 81 | Called when a function call is about to start. 82 | 83 | Args: 84 | item: The function call item dictionary 85 | """ 86 | pass 87 | 88 | async def on_function_call_end(self, item: Dict[str, Any], result: List[Dict[str, Any]]) -> None: 89 | """ 90 | Called when a function call has completed. 91 | 92 | Args: 93 | item: The function call item dictionary 94 | result: The result of the function call 95 | """ 96 | pass 97 | 98 | async def on_text(self, item: Dict[str, Any]) -> None: 99 | """ 100 | Called when a text message is encountered. 101 | 102 | Args: 103 | item: The message item dictionary 104 | """ 105 | pass 106 | 107 | async def on_api_start(self, kwargs: Dict[str, Any]) -> None: 108 | """ 109 | Called when an API call is about to start. 110 | 111 | Args: 112 | kwargs: The kwargs being passed to the API call 113 | """ 114 | pass 115 | 116 | async def on_api_end(self, kwargs: Dict[str, Any], result: Any) -> None: 117 | """ 118 | Called when an API call has completed. 119 | 120 | Args: 121 | kwargs: The kwargs that were passed to the API call 122 | result: The result of the API call 123 | """ 124 | pass 125 | 126 | async def on_usage(self, usage: Dict[str, Any]) -> None: 127 | """ 128 | Called when usage information is received. 129 | 130 | Args: 131 | usage: The usage information 132 | """ 133 | pass 134 | 135 | async def on_screenshot(self, screenshot: Union[str, bytes], name: str = "screenshot") -> None: 136 | """ 137 | Called when a screenshot is taken. 138 | 139 | Args: 140 | screenshot: The screenshot image 141 | name: The name of the screenshot 142 | """ 143 | pass 144 | 145 | async def on_responses(self, kwargs: Dict[str, Any], responses: Dict[str, Any]) -> None: 146 | """ 147 | Called when responses are received. 148 | 149 | Args: 150 | kwargs: The kwargs being passed to the agent loop 151 | responses: The responses received 152 | """ 153 | pass ``` -------------------------------------------------------------------------------- /examples/agent_examples.py: -------------------------------------------------------------------------------- ```python 1 | """Example demonstrating the ComputerAgent capabilities with the Omni provider.""" 2 | 3 | import asyncio 4 | import logging 5 | import traceback 6 | import signal 7 | 8 | from computer import Computer, VMProviderType 9 | 10 | # Import the unified agent class and types 11 | from agent import ComputerAgent 12 | 13 | # Import utility functions 14 | from utils import load_dotenv_files, handle_sigint 15 | 16 | # Set up logging 17 | logging.basicConfig(level=logging.INFO) 18 | logger = logging.getLogger(__name__) 19 | 20 | 21 | async def run_agent_example(): 22 | """Run example of using the ComputerAgent with different models.""" 23 | print("\n=== Example: ComputerAgent with different models ===") 24 | 25 | try: 26 | # Create a local macOS computer 27 | computer = Computer( 28 | os_type="macos", 29 | verbosity=logging.DEBUG, 30 | ) 31 | 32 | # Create a remote Linux computer with Cua 33 | # computer = Computer( 34 | # os_type="linux", 35 | # api_key=os.getenv("CUA_API_KEY"), 36 | # name=os.getenv("CUA_CONTAINER_NAME"), 37 | # provider_type=VMProviderType.CLOUD, 38 | # ) 39 | 40 | # Create ComputerAgent with new API 41 | agent = ComputerAgent( 42 | # Supported models: 43 | 44 | # == OpenAI CUA (computer-use-preview) == 45 | model="openai/computer-use-preview", 46 | 47 | # == Anthropic CUA (Claude > 3.5) == 48 | # model="anthropic/claude-opus-4-20250514", 49 | # model="anthropic/claude-sonnet-4-20250514", 50 | # model="anthropic/claude-3-7-sonnet-20250219", 51 | # model="anthropic/claude-3-5-sonnet-20241022", 52 | 53 | # == UI-TARS == 54 | # model="huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B", 55 | # model="mlx/mlx-community/UI-TARS-1.5-7B-6bit", 56 | # model="ollama_chat/0000/ui-tars-1.5-7b", 57 | 58 | # == Omniparser + Any LLM == 59 | # model="omniparser+anthropic/claude-opus-4-20250514", 60 | # model="omniparser+ollama_chat/gemma3:12b-it-q4_K_M", 61 | 62 | tools=[computer], 63 | only_n_most_recent_images=3, 64 | verbosity=logging.DEBUG, 65 | trajectory_dir="trajectories", 66 | use_prompt_caching=True, 67 | max_trajectory_budget=1.0, 68 | ) 69 | 70 | # Example tasks to demonstrate the agent 71 | tasks = [ 72 | "Look for a repository named trycua/cua on GitHub.", 73 | "Check the open issues, open the most recent one and read it.", 74 | "Clone the repository in users/lume/projects if it doesn't exist yet.", 75 | "Open the repository with an app named Cursor (on the dock, black background and white cube icon).", 76 | "From Cursor, open Composer if not already open.", 77 | "Focus on the Composer text area, then write and submit a task to help resolve the GitHub issue.", 78 | ] 79 | 80 | # Use message-based conversation history 81 | history = [] 82 | 83 | for i, task in enumerate(tasks): 84 | print(f"\nExecuting task {i+1}/{len(tasks)}: {task}") 85 | 86 | # Add user message to history 87 | history.append({"role": "user", "content": task}) 88 | 89 | # Run agent with conversation history 90 | async for result in agent.run(history, stream=False): 91 | # Add agent outputs to history 92 | history += result.get("output", []) 93 | 94 | # Print output for debugging 95 | for item in result.get("output", []): 96 | if item.get("type") == "message": 97 | content = item.get("content", []) 98 | for content_part in content: 99 | if content_part.get("text"): 100 | print(f"Agent: {content_part.get('text')}") 101 | elif item.get("type") == "computer_call": 102 | action = item.get("action", {}) 103 | action_type = action.get("type", "") 104 | print(f"Computer Action: {action_type}({action})") 105 | elif item.get("type") == "computer_call_output": 106 | print("Computer Output: [Screenshot/Result]") 107 | 108 | print(f"✅ Task {i+1}/{len(tasks)} completed: {task}") 109 | 110 | except Exception as e: 111 | logger.error(f"Error in run_agent_example: {e}") 112 | traceback.print_exc() 113 | raise 114 | 115 | 116 | def main(): 117 | """Run the Anthropic agent example.""" 118 | try: 119 | load_dotenv_files() 120 | 121 | # Register signal handler for graceful exit 122 | signal.signal(signal.SIGINT, handle_sigint) 123 | 124 | asyncio.run(run_agent_example()) 125 | except Exception as e: 126 | print(f"Error running example: {e}") 127 | traceback.print_exc() 128 | 129 | 130 | if __name__ == "__main__": 131 | main() 132 | ``` -------------------------------------------------------------------------------- /libs/lume/src/Virtualization/DarwinImageLoader.swift: -------------------------------------------------------------------------------- ```swift 1 | import Foundation 2 | import Virtualization 3 | 4 | /// Handles loading and validation of macOS restore images (IPSW files). 5 | /// Provides functionality to: 6 | /// - Fetch the latest supported macOS restore image URL 7 | /// - Load and validate image requirements for VM creation 8 | /// - Extract hardware model and auxiliary storage configuration 9 | protocol ImageLoader: Sendable { 10 | typealias ImageRequirements = DarwinImageLoader.ImageRequirements 11 | func fetchLatestSupportedURL() async throws -> URL 12 | func loadImageRequirements(from url: URL) async throws -> ImageRequirements 13 | func downloadLatestImage() async throws -> Path 14 | } 15 | 16 | final class DarwinImageLoader: NSObject, ImageLoader, @unchecked Sendable, URLSessionDownloadDelegate { 17 | struct ImageRequirements: Sendable { 18 | let hardwareModel: Data 19 | let minimumSupportedCPUCount: Int 20 | let minimumSupportedMemorySize: UInt64 21 | } 22 | 23 | enum ImageError: Error { 24 | case invalidImage 25 | case unsupportedConfiguration 26 | case downloadFailed 27 | } 28 | 29 | private var lastLoggedProgress: Double = 0.0 30 | private var progressLogger = ProgressLogger() 31 | private var completionHandler: ((URL?, Error?) -> Void)? 32 | 33 | func fetchLatestSupportedURL() async throws -> URL { 34 | try await withCheckedThrowingContinuation { continuation in 35 | VZMacOSRestoreImage.fetchLatestSupported { result in 36 | switch result { 37 | case .success(let image): 38 | continuation.resume(returning: image.url) 39 | case .failure(let error): 40 | continuation.resume(throwing: error) 41 | } 42 | } 43 | } 44 | } 45 | 46 | func loadImageRequirements(from url: URL) async throws -> ImageRequirements { 47 | let image = try await VZMacOSRestoreImage.image(from: url) 48 | guard let requirements = image.mostFeaturefulSupportedConfiguration else { 49 | throw ImageError.unsupportedConfiguration 50 | } 51 | 52 | return ImageRequirements( 53 | hardwareModel: requirements.hardwareModel.dataRepresentation, 54 | minimumSupportedCPUCount: requirements.minimumSupportedCPUCount, 55 | minimumSupportedMemorySize: requirements.minimumSupportedMemorySize 56 | ) 57 | } 58 | 59 | func downloadLatestImage() async throws -> Path { 60 | let url = try await fetchLatestSupportedURL() 61 | let tempDir = FileManager.default.temporaryDirectory 62 | let downloadPath = tempDir.appendingPathComponent("latest.ipsw") 63 | 64 | // Reset progress logger state 65 | progressLogger = ProgressLogger(threshold: 0.01) 66 | 67 | // Create a continuation to wait for download completion 68 | return try await withCheckedThrowingContinuation { continuation in 69 | let session = URLSession(configuration: .default, delegate: self, delegateQueue: nil) 70 | let task = session.downloadTask(with: url) 71 | 72 | // Use the delegate method to handle completion 73 | self.completionHandler = { location, error in 74 | if let error = error { 75 | continuation.resume(throwing: error) 76 | return 77 | } 78 | 79 | do { 80 | // Remove existing file if it exists 81 | if FileManager.default.fileExists(atPath: downloadPath.path) { 82 | try FileManager.default.removeItem(at: downloadPath) 83 | } 84 | 85 | try FileManager.default.moveItem(at: location!, to: downloadPath) 86 | Logger.info("Download completed and moved to: \(downloadPath.path)") 87 | continuation.resume(returning: Path(downloadPath.path)) 88 | } catch { 89 | continuation.resume(throwing: error) 90 | } 91 | } 92 | 93 | task.resume() 94 | } 95 | } 96 | 97 | func urlSession(_ session: URLSession, downloadTask: URLSessionDownloadTask, didWriteData bytesWritten: Int64, totalBytesWritten: Int64, totalBytesExpectedToWrite: Int64) { 98 | let progress = Double(totalBytesWritten) / Double(totalBytesExpectedToWrite) 99 | progressLogger.logProgress(current: progress, context: "Downloading IPSW") 100 | } 101 | 102 | func urlSession(_ session: URLSession, downloadTask: URLSessionDownloadTask, didFinishDownloadingTo location: URL) { 103 | // Call the stored completion handler 104 | completionHandler?(location, nil) 105 | } 106 | 107 | func urlSession(_ session: URLSession, task: URLSessionTask, didCompleteWithError error: Error?) { 108 | // Call the stored completion handler with an error if it occurred 109 | if let error = error { 110 | completionHandler?(nil, error) 111 | } 112 | } 113 | } ``` -------------------------------------------------------------------------------- /.github/workflows/pypi-publish-computer.yml: -------------------------------------------------------------------------------- ```yaml 1 | name: Publish Computer Package 2 | 3 | on: 4 | push: 5 | tags: 6 | - "computer-v*" 7 | workflow_dispatch: 8 | inputs: 9 | version: 10 | description: "Version to publish (without v prefix)" 11 | required: true 12 | default: "0.1.0" 13 | workflow_call: 14 | inputs: 15 | version: 16 | description: "Version to publish" 17 | required: true 18 | type: string 19 | 20 | # Adding permissions at workflow level 21 | permissions: 22 | contents: write 23 | 24 | jobs: 25 | prepare: 26 | runs-on: macos-latest 27 | outputs: 28 | version: ${{ steps.get-version.outputs.version }} 29 | core_version: ${{ steps.update-deps.outputs.core_version }} 30 | steps: 31 | - uses: actions/checkout@v4 32 | 33 | - name: Determine version 34 | id: get-version 35 | run: | 36 | if [ "${{ github.event_name }}" == "push" ]; then 37 | # Extract version from tag (for package-specific tags) 38 | if [[ "${{ github.ref }}" =~ ^refs/tags/computer-v([0-9]+\.[0-9]+\.[0-9]+) ]]; then 39 | VERSION=${BASH_REMATCH[1]} 40 | else 41 | echo "Invalid tag format for computer" 42 | exit 1 43 | fi 44 | elif [ "${{ github.event_name }}" == "workflow_dispatch" ]; then 45 | # Use version from workflow dispatch 46 | VERSION=${{ github.event.inputs.version }} 47 | else 48 | # Use version from workflow_call 49 | VERSION=${{ inputs.version }} 50 | fi 51 | echo "VERSION=$VERSION" 52 | echo "version=$VERSION" >> $GITHUB_OUTPUT 53 | 54 | - name: Set up Python 55 | uses: actions/setup-python@v4 56 | with: 57 | python-version: "3.11" 58 | 59 | - name: Update dependencies to latest versions 60 | id: update-deps 61 | run: | 62 | cd libs/python/computer 63 | # Install required package for PyPI API access 64 | pip install requests 65 | 66 | # Create a more robust Python script for PyPI version checking 67 | cat > get_latest_versions.py << 'EOF' 68 | import requests 69 | import json 70 | import sys 71 | 72 | def get_package_version(package_name, fallback="0.1.0"): 73 | try: 74 | response = requests.get(f'https://pypi.org/pypi/{package_name}/json') 75 | print(f"API Response Status for {package_name}: {response.status_code}", file=sys.stderr) 76 | 77 | if response.status_code != 200: 78 | print(f"API request failed for {package_name}, using fallback version", file=sys.stderr) 79 | return fallback 80 | 81 | data = json.loads(response.text) 82 | 83 | if 'info' not in data: 84 | print(f"Missing 'info' key in API response for {package_name}, using fallback version", file=sys.stderr) 85 | return fallback 86 | 87 | return data['info']['version'] 88 | except Exception as e: 89 | print(f"Error fetching version for {package_name}: {str(e)}", file=sys.stderr) 90 | return fallback 91 | 92 | # Get latest versions 93 | print(get_package_version('cua-core')) 94 | EOF 95 | 96 | # Execute the script to get the versions 97 | VERSIONS=($(python get_latest_versions.py)) 98 | LATEST_CORE=${VERSIONS[0]} 99 | 100 | echo "Latest cua-core version: $LATEST_CORE" 101 | 102 | # Output the versions for the next job 103 | echo "core_version=$LATEST_CORE" >> $GITHUB_OUTPUT 104 | 105 | # Determine major version for version constraint 106 | CORE_MAJOR=$(echo $LATEST_CORE | cut -d. -f1) 107 | NEXT_CORE_MAJOR=$((CORE_MAJOR + 1)) 108 | 109 | # Update dependencies in pyproject.toml 110 | if [[ "$OSTYPE" == "darwin"* ]]; then 111 | # macOS version of sed needs an empty string for -i 112 | sed -i '' "s/\"cua-core>=.*,<.*\"/\"cua-core>=$LATEST_CORE,<$NEXT_CORE_MAJOR.0.0\"/" pyproject.toml 113 | else 114 | # Linux version 115 | sed -i "s/\"cua-core>=.*,<.*\"/\"cua-core>=$LATEST_CORE,<$NEXT_CORE_MAJOR.0.0\"/" pyproject.toml 116 | fi 117 | 118 | # Display the updated dependencies 119 | echo "Updated dependencies in pyproject.toml:" 120 | grep -E "cua-core" pyproject.toml 121 | 122 | publish: 123 | needs: prepare 124 | uses: ./.github/workflows/pypi-reusable-publish.yml 125 | with: 126 | package_name: "computer" 127 | package_dir: "libs/python/computer" 128 | version: ${{ needs.prepare.outputs.version }} 129 | is_lume_package: false 130 | base_package_name: "cua-computer" 131 | secrets: 132 | PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} 133 | 134 | set-env-variables: 135 | needs: [prepare, publish] 136 | runs-on: macos-latest 137 | steps: 138 | - name: Set environment variables for use in other jobs 139 | run: | 140 | echo "CORE_VERSION=${{ needs.prepare.outputs.core_version }}" >> $GITHUB_ENV 141 | ``` -------------------------------------------------------------------------------- /libs/xfce/Dockerfile: -------------------------------------------------------------------------------- ```dockerfile 1 | # CUA Docker XFCE Container 2 | # Vanilla XFCE desktop with noVNC and computer-server 3 | 4 | FROM ubuntu:22.04 5 | 6 | # Avoid prompts from apt 7 | ENV DEBIAN_FRONTEND=noninteractive 8 | 9 | # Set environment variables 10 | ENV HOME=/home/cua 11 | ENV DISPLAY=:1 12 | ENV VNC_PORT=5901 13 | ENV NOVNC_PORT=6901 14 | ENV API_PORT=8000 15 | ENV VNC_RESOLUTION=1024x768 16 | ENV VNC_COL_DEPTH=24 17 | 18 | # Install system dependencies first (including sudo) 19 | RUN apt-get update && apt-get install -y \ 20 | # System utilities 21 | sudo \ 22 | # Desktop environment 23 | xfce4 \ 24 | xfce4-terminal \ 25 | dbus-x11 \ 26 | # VNC server 27 | tigervnc-standalone-server \ 28 | tigervnc-common \ 29 | # noVNC dependencies 30 | python3 \ 31 | python3-pip \ 32 | python3-numpy \ 33 | git \ 34 | net-tools \ 35 | netcat \ 36 | supervisor \ 37 | # Computer-server dependencies 38 | python3-tk \ 39 | python3-dev \ 40 | gnome-screenshot \ 41 | wmctrl \ 42 | ffmpeg \ 43 | socat \ 44 | xclip \ 45 | # Browser 46 | wget \ 47 | software-properties-common \ 48 | # Build tools 49 | build-essential \ 50 | libncursesw5-dev \ 51 | libssl-dev \ 52 | libsqlite3-dev \ 53 | tk-dev \ 54 | libgdbm-dev \ 55 | libc6-dev \ 56 | libbz2-dev \ 57 | libffi-dev \ 58 | zlib1g-dev \ 59 | && rm -rf /var/lib/apt/lists/* 60 | 61 | # Remove screensavers and power manager to avoid popups and lock screens 62 | RUN apt-get remove -y \ 63 | xfce4-power-manager \ 64 | xfce4-power-manager-data \ 65 | xfce4-power-manager-plugins \ 66 | xfce4-screensaver \ 67 | light-locker \ 68 | xscreensaver \ 69 | xscreensaver-data || true 70 | 71 | # Create user after sudo is installed 72 | RUN useradd -m -s /bin/bash -G sudo cua && \ 73 | echo "cua:cua" | chpasswd && \ 74 | echo "cua ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers 75 | 76 | # Install Firefox from Mozilla PPA (snap-free) - inline to avoid script issues 77 | RUN apt-get update && \ 78 | add-apt-repository -y ppa:mozillateam/ppa && \ 79 | echo 'Package: *\nPin: release o=LP-PPA-mozillateam\nPin-Priority: 1001' > /etc/apt/preferences.d/mozilla-firefox && \ 80 | apt-get update && \ 81 | apt-get install -y firefox && \ 82 | echo 'pref("datareporting.policy.firstRunURL", "");\npref("datareporting.policy.dataSubmissionEnabled", false);\npref("datareporting.healthreport.service.enabled", false);\npref("datareporting.healthreport.uploadEnabled", false);\npref("trailhead.firstrun.branches", "nofirstrun-empty");\npref("browser.aboutwelcome.enabled", false);' > /usr/lib/firefox/browser/defaults/preferences/firefox.js && \ 83 | update-alternatives --install /usr/bin/x-www-browser x-www-browser /usr/bin/firefox 100 && \ 84 | update-alternatives --install /usr/bin/gnome-www-browser gnome-www-browser /usr/bin/firefox 100 && \ 85 | rm -rf /var/lib/apt/lists/* 86 | 87 | # Install noVNC 88 | RUN git clone https://github.com/novnc/noVNC.git /opt/noVNC && \ 89 | git clone https://github.com/novnc/websockify /opt/noVNC/utils/websockify && \ 90 | ln -s /opt/noVNC/vnc.html /opt/noVNC/index.html 91 | 92 | # Pre-create cache directory with correct ownership before pip install 93 | RUN mkdir -p /home/cua/.cache && \ 94 | chown -R cua:cua /home/cua/.cache 95 | 96 | # Install computer-server 97 | RUN pip3 install cua-computer-server 98 | 99 | # Fix any cache files created by pip 100 | RUN chown -R cua:cua /home/cua/.cache 101 | 102 | # Copy startup scripts 103 | COPY src/supervisor/ /etc/supervisor/conf.d/ 104 | COPY src/scripts/ /usr/local/bin/ 105 | 106 | # Make scripts executable 107 | RUN chmod +x /usr/local/bin/*.sh 108 | 109 | # Setup VNC 110 | USER cua 111 | WORKDIR /home/cua 112 | 113 | # Create VNC directory (no password needed with SecurityTypes None) 114 | RUN mkdir -p $HOME/.vnc 115 | 116 | # Configure XFCE for first start 117 | RUN mkdir -p $HOME/.config/xfce4/xfconf/xfce-perchannel-xml $HOME/.config/xfce4 $HOME/.config/autostart 118 | 119 | # Copy XFCE config to disable browser launching and welcome screens 120 | COPY --chown=cua:cua src/xfce-config/helpers.rc $HOME/.config/xfce4/helpers.rc 121 | COPY --chown=cua:cua src/xfce-config/xfce4-session.xml $HOME/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-session.xml 122 | COPY --chown=cua:cua src/xfce-config/xfce4-power-manager.xml $HOME/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-power-manager.xml 123 | 124 | # Disable autostart for screensaver, lock screen, and power manager 125 | RUN echo "[Desktop Entry]\nHidden=true" > $HOME/.config/autostart/xfce4-tips-autostart.desktop && \ 126 | echo "[Desktop Entry]\nHidden=true" > $HOME/.config/autostart/xfce4-screensaver.desktop && \ 127 | echo "[Desktop Entry]\nHidden=true" > $HOME/.config/autostart/light-locker.desktop && \ 128 | echo "[Desktop Entry]\nHidden=true" > $HOME/.config/autostart/xfce4-power-manager.desktop && \ 129 | chown -R cua:cua $HOME/.config 130 | 131 | # Create storage and shared directories, and Firefox cache directory 132 | RUN mkdir -p $HOME/storage $HOME/shared $HOME/.cache/dconf $HOME/.mozilla/firefox && \ 133 | chown -R cua:cua $HOME/storage $HOME/shared $HOME/.cache $HOME/.mozilla $HOME/.vnc 134 | 135 | USER root 136 | 137 | # Expose ports 138 | EXPOSE $VNC_PORT $NOVNC_PORT $API_PORT 139 | 140 | # Start services via supervisor 141 | CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/supervisord.conf"] 142 | ``` -------------------------------------------------------------------------------- /libs/python/computer-server/computer_server/cli.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Command-line interface for the Computer API server. 3 | """ 4 | 5 | import argparse 6 | import asyncio 7 | import logging 8 | import os 9 | import sys 10 | import threading 11 | from typing import List, Optional 12 | 13 | from .server import Server 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | def parse_args(args: Optional[List[str]] = None) -> argparse.Namespace: 19 | """Parse command-line arguments.""" 20 | parser = argparse.ArgumentParser(description="Start the Computer API server") 21 | parser.add_argument( 22 | "--host", default="0.0.0.0", help="Host to bind the server to (default: 0.0.0.0)" 23 | ) 24 | parser.add_argument( 25 | "--port", type=int, default=8000, help="Port to bind the server to (default: 8000)" 26 | ) 27 | parser.add_argument( 28 | "--log-level", 29 | choices=["debug", "info", "warning", "error", "critical"], 30 | default="info", 31 | help="Logging level (default: info)", 32 | ) 33 | parser.add_argument( 34 | "--ssl-keyfile", 35 | type=str, 36 | help="Path to SSL private key file (enables HTTPS)", 37 | ) 38 | parser.add_argument( 39 | "--ssl-certfile", 40 | type=str, 41 | help="Path to SSL certificate file (enables HTTPS)", 42 | ) 43 | parser.add_argument( 44 | "--watchdog", 45 | action="store_true", 46 | help="Enable watchdog monitoring (automatically enabled if CONTAINER_NAME env var is set)", 47 | ) 48 | parser.add_argument( 49 | "--watchdog-interval", 50 | type=int, 51 | default=30, 52 | help="Watchdog ping interval in seconds (default: 30)", 53 | ) 54 | parser.add_argument( 55 | "--no-restart", 56 | action="store_true", 57 | help="Disable automatic server restart in watchdog", 58 | ) 59 | 60 | return parser.parse_args(args) 61 | 62 | 63 | def main() -> None: 64 | """Main entry point for the CLI.""" 65 | args = parse_args() 66 | 67 | # Configure logging 68 | logging.basicConfig( 69 | level=getattr(logging, args.log_level.upper()), 70 | format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", 71 | ) 72 | 73 | # Check if watchdog should be enabled 74 | container_name = os.environ.get("CONTAINER_NAME") 75 | enable_watchdog = (args.watchdog or bool(container_name)) and not sys.platform.startswith("win") 76 | 77 | if container_name: 78 | logger.info(f"Container environment detected (CONTAINER_NAME={container_name}), enabling watchdog") 79 | elif args.watchdog: 80 | logger.info("Watchdog explicitly enabled via --watchdog flag") 81 | 82 | # Start watchdog if enabled 83 | if enable_watchdog: 84 | logger.info(f"Starting watchdog monitoring with {args.watchdog_interval}s interval") 85 | 86 | def run_watchdog_thread(): 87 | """Run watchdog in a separate thread.""" 88 | loop = asyncio.new_event_loop() 89 | asyncio.set_event_loop(loop) 90 | try: 91 | # Create CLI args dict for watchdog 92 | cli_args = { 93 | 'host': args.host, 94 | 'port': args.port, 95 | 'log_level': args.log_level, 96 | 'ssl_keyfile': args.ssl_keyfile, 97 | 'ssl_certfile': args.ssl_certfile 98 | } 99 | 100 | # Create watchdog with restart settings 101 | from .watchdog import Watchdog 102 | watchdog = Watchdog( 103 | cli_args=cli_args, 104 | ping_interval=args.watchdog_interval 105 | ) 106 | watchdog.restart_enabled = not args.no_restart 107 | 108 | loop.run_until_complete(watchdog.start_monitoring()) 109 | except Exception as e: 110 | logger.error(f"Watchdog error: {e}") 111 | finally: 112 | loop.close() 113 | 114 | # Start watchdog in background thread 115 | watchdog_thread = threading.Thread( 116 | target=run_watchdog_thread, 117 | daemon=True, 118 | name="watchdog" 119 | ) 120 | watchdog_thread.start() 121 | 122 | # Create and start the server 123 | logger.info(f"Starting CUA Computer API server on {args.host}:{args.port}...") 124 | 125 | # Handle SSL configuration 126 | ssl_args = {} 127 | if args.ssl_keyfile and args.ssl_certfile: 128 | ssl_args = { 129 | "ssl_keyfile": args.ssl_keyfile, 130 | "ssl_certfile": args.ssl_certfile, 131 | } 132 | logger.info("HTTPS mode enabled with SSL certificates") 133 | elif args.ssl_keyfile or args.ssl_certfile: 134 | logger.warning("Both --ssl-keyfile and --ssl-certfile are required for HTTPS. Running in HTTP mode.") 135 | else: 136 | logger.info("HTTP mode (no SSL certificates provided)") 137 | 138 | server = Server(host=args.host, port=args.port, log_level=args.log_level, **ssl_args) 139 | 140 | try: 141 | server.start() 142 | except KeyboardInterrupt: 143 | logger.info("Server stopped by user") 144 | sys.exit(0) 145 | except Exception as e: 146 | logger.error(f"Error starting server: {e}") 147 | sys.exit(1) 148 | 149 | 150 | if __name__ == "__main__": 151 | main() 152 | ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/computers/cua.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Computer handler implementation for OpenAI computer-use-preview protocol. 3 | """ 4 | 5 | import base64 6 | from typing import Dict, List, Any, Literal, Union, Optional 7 | from .base import AsyncComputerHandler 8 | from computer import Computer 9 | 10 | class cuaComputerHandler(AsyncComputerHandler): 11 | """Computer handler that implements the Computer protocol using the computer interface.""" 12 | 13 | def __init__(self, cua_computer: Computer): 14 | """Initialize with a computer interface (from tool schema).""" 15 | self.cua_computer = cua_computer 16 | self.interface = None 17 | 18 | async def _initialize(self): 19 | if hasattr(self.cua_computer, '_initialized') and not self.cua_computer._initialized: 20 | await self.cua_computer.run() 21 | self.interface = self.cua_computer.interface 22 | 23 | # ==== Computer-Use-Preview Action Space ==== 24 | 25 | async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]: 26 | """Get the current environment type.""" 27 | # TODO: detect actual environment 28 | return "linux" 29 | 30 | async def get_dimensions(self) -> tuple[int, int]: 31 | """Get screen dimensions as (width, height).""" 32 | assert self.interface is not None 33 | screen_size = await self.interface.get_screen_size() 34 | return screen_size["width"], screen_size["height"] 35 | 36 | async def screenshot(self) -> str: 37 | """Take a screenshot and return as base64 string.""" 38 | assert self.interface is not None 39 | screenshot_bytes = await self.interface.screenshot() 40 | return base64.b64encode(screenshot_bytes).decode('utf-8') 41 | 42 | async def click(self, x: int, y: int, button: str = "left") -> None: 43 | """Click at coordinates with specified button.""" 44 | assert self.interface is not None 45 | if button == "left": 46 | await self.interface.left_click(x, y) 47 | elif button == "right": 48 | await self.interface.right_click(x, y) 49 | else: 50 | # Default to left click for unknown buttons 51 | await self.interface.left_click(x, y) 52 | 53 | async def double_click(self, x: int, y: int) -> None: 54 | """Double click at coordinates.""" 55 | assert self.interface is not None 56 | await self.interface.double_click(x, y) 57 | 58 | async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: 59 | """Scroll at coordinates with specified scroll amounts.""" 60 | assert self.interface is not None 61 | await self.interface.move_cursor(x, y) 62 | await self.interface.scroll(scroll_x, scroll_y) 63 | 64 | async def type(self, text: str) -> None: 65 | """Type text.""" 66 | assert self.interface is not None 67 | await self.interface.type_text(text) 68 | 69 | async def wait(self, ms: int = 1000) -> None: 70 | """Wait for specified milliseconds.""" 71 | assert self.interface is not None 72 | import asyncio 73 | await asyncio.sleep(ms / 1000.0) 74 | 75 | async def move(self, x: int, y: int) -> None: 76 | """Move cursor to coordinates.""" 77 | assert self.interface is not None 78 | await self.interface.move_cursor(x, y) 79 | 80 | async def keypress(self, keys: Union[List[str], str]) -> None: 81 | """Press key combination.""" 82 | assert self.interface is not None 83 | if isinstance(keys, str): 84 | keys = keys.replace("-", "+").split("+") 85 | if len(keys) == 1: 86 | await self.interface.press_key(keys[0]) 87 | else: 88 | # Handle key combinations 89 | await self.interface.hotkey(*keys) 90 | 91 | async def drag(self, path: List[Dict[str, int]]) -> None: 92 | """Drag along specified path.""" 93 | assert self.interface is not None 94 | if not path: 95 | return 96 | 97 | # Start drag from first point 98 | start = path[0] 99 | await self.interface.mouse_down(start["x"], start["y"]) 100 | 101 | # Move through path 102 | for point in path[1:]: 103 | await self.interface.move_cursor(point["x"], point["y"]) 104 | 105 | # End drag at last point 106 | end = path[-1] 107 | await self.interface.mouse_up(end["x"], end["y"]) 108 | 109 | async def get_current_url(self) -> str: 110 | """Get current URL (for browser environments).""" 111 | # This would need to be implemented based on the specific browser interface 112 | # For now, return empty string 113 | return "" 114 | 115 | # ==== Anthropic Computer Action Space ==== 116 | async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None: 117 | """Left mouse down at coordinates.""" 118 | assert self.interface is not None 119 | await self.interface.mouse_down(x, y, button="left") 120 | 121 | async def left_mouse_up(self, x: Optional[int] = None, y: Optional[int] = None) -> None: 122 | """Left mouse up at coordinates.""" 123 | assert self.interface is not None 124 | await self.interface.mouse_up(x, y, button="left") ``` -------------------------------------------------------------------------------- /examples/computer_examples_windows.py: -------------------------------------------------------------------------------- ```python 1 | import os 2 | import asyncio 3 | from pathlib import Path 4 | import sys 5 | import traceback 6 | 7 | # Load environment variables from .env file 8 | project_root = Path(__file__).parent.parent 9 | env_file = project_root / ".env" 10 | print(f"Loading environment from: {env_file}") 11 | from computer.helpers import sandboxed 12 | from dotenv import load_dotenv 13 | 14 | load_dotenv(env_file) 15 | 16 | # Add paths to sys.path if needed 17 | pythonpath = os.environ.get("PYTHONPATH", "") 18 | for path in pythonpath.split(":"): 19 | if path and path not in sys.path: 20 | sys.path.insert(0, path) # Insert at beginning to prioritize 21 | print(f"Added to sys.path: {path}") 22 | 23 | from computer.computer import Computer 24 | from computer.providers.base import VMProviderType 25 | from computer.logger import LogLevel 26 | 27 | # ANSI color codes 28 | RED = '\033[91m' 29 | RESET = '\033[0m' 30 | 31 | async def main(): 32 | try: 33 | print("\n=== Using direct initialization ===") 34 | 35 | # Create a remote Windows computer with Cua 36 | computer = Computer( 37 | os_type="windows", 38 | api_key=os.getenv("CUA_API_KEY"), 39 | name=os.getenv("CONTAINER_NAME") or "", 40 | provider_type=VMProviderType.CLOUD, 41 | ) 42 | 43 | try: 44 | # Run the computer with default parameters 45 | await computer.run() 46 | 47 | # Create output directory if it doesn't exist 48 | output_dir = Path("./output") 49 | output_dir.mkdir(exist_ok=True) 50 | 51 | # Keyboard Actions Examples 52 | print("\n=== Keyboard Actions ===") 53 | await computer.interface.type_text("Hello, World!") 54 | await computer.interface.press_key("enter") 55 | 56 | # Mouse Actions Examples 57 | print("\n=== Mouse Actions ===") 58 | await computer.interface.move_cursor(100, 100) 59 | await computer.interface.left_click() 60 | await computer.interface.double_click(400, 400) 61 | await computer.interface.right_click(300, 300) 62 | 63 | print("\n=== RPC ===") 64 | await computer.venv_install("demo_venv", ["mss"]) 65 | 66 | @sandboxed("demo_venv") 67 | def greet_and_print(name): 68 | from mss import mss 69 | import os 70 | # get username 71 | username = os.getlogin() 72 | print(f"Hello from inside the container, {name}!") 73 | print("Username:", username) 74 | print("Screens:", mss().monitors) 75 | 76 | # take a screenshot 77 | with mss() as sct: 78 | filename = sct.shot(mon=-1, output='C:/Users/azureuser/Desktop/fullscreen.png') 79 | print(filename) 80 | 81 | return {"greeted": name, "username": username} 82 | 83 | # Call with args and kwargs 84 | result = await greet_and_print("John Doe") 85 | print("Result from sandboxed function:", result) 86 | 87 | # Command Actions Examples 88 | print("\n=== Command Actions ===") 89 | result = await computer.interface.run_command("notepad") 90 | print("Result from command:", result) 91 | 92 | screenshot = await computer.interface.screenshot() 93 | screenshot_path = output_dir / "screenshot.png" 94 | with open(screenshot_path, "wb") as f: 95 | f.write(screenshot) 96 | print(f"Screenshot saved to: {screenshot_path.absolute()}") 97 | 98 | # Clipboard Actions Examples 99 | print("\n=== Clipboard Actions ===") 100 | await computer.interface.set_clipboard("Test clipboard") 101 | content = await computer.interface.copy_to_clipboard() 102 | print(f"Clipboard content: {content}") 103 | 104 | 105 | # Simple REPL Loop 106 | print("\n=== Command REPL ===") 107 | print("Enter commands to run on the remote computer.") 108 | print("Type 'exit' or 'quit' to leave the REPL.\n") 109 | 110 | while True: 111 | try: 112 | # Get command from user 113 | command = input("command> ").strip() 114 | 115 | # Check for exit commands 116 | if command.lower() in ['exit', 'quit', '']: 117 | if command.lower() in ['exit', 'quit']: 118 | print("Exiting REPL...") 119 | break 120 | 121 | # Run the command 122 | result = await computer.interface.run_command(command) 123 | 124 | print(result.stdout) 125 | if result.stderr: 126 | print(f"{RED}{result.stderr}{RESET}") 127 | except KeyboardInterrupt: 128 | print("\nExiting REPL...") 129 | break 130 | except Exception as e: 131 | print(f"{RED}Error running command: {e}{RESET}") 132 | 133 | 134 | finally: 135 | # Important to clean up resources 136 | # await computer.stop() 137 | pass 138 | except Exception as e: 139 | print(f"Error in main: {e}") 140 | traceback.print_exc() 141 | 142 | 143 | if __name__ == "__main__": 144 | asyncio.run(main()) 145 | ``` -------------------------------------------------------------------------------- /libs/lume/src/VNC/VNCService.swift: -------------------------------------------------------------------------------- ```swift 1 | import Foundation 2 | import Dynamic 3 | import Virtualization 4 | 5 | /// Protocol defining the interface for VNC server operations 6 | @MainActor 7 | protocol VNCService { 8 | var url: String? { get } 9 | func start(port: Int, virtualMachine: Any?) async throws 10 | func stop() 11 | func openClient(url: String) async throws 12 | } 13 | 14 | /// Default implementation of VNCService 15 | @MainActor 16 | final class DefaultVNCService: VNCService { 17 | private var vncServer: Any? 18 | private let vmDirectory: VMDirectory 19 | 20 | init(vmDirectory: VMDirectory) { 21 | self.vmDirectory = vmDirectory 22 | } 23 | 24 | var url: String? { 25 | get { 26 | return try? vmDirectory.loadSession().url 27 | } 28 | } 29 | 30 | func start(port: Int, virtualMachine: Any?) async throws { 31 | let password = Array(PassphraseGenerator().prefix(4)).joined(separator: "-") 32 | let securityConfiguration = Dynamic._VZVNCAuthenticationSecurityConfiguration(password: password) 33 | 34 | // Create VNC server with specified port 35 | let server = Dynamic._VZVNCServer(port: port, queue: DispatchQueue.main, 36 | securityConfiguration: securityConfiguration) 37 | 38 | if let vm = virtualMachine as? VZVirtualMachine { 39 | server.virtualMachine = vm 40 | } 41 | server.start() 42 | 43 | vncServer = server 44 | 45 | // Wait for port to be assigned (both for auto-assign and specific port) 46 | var attempts = 0 47 | let maxAttempts = 20 // 1 second total wait time 48 | while true { 49 | if let assignedPort: UInt16 = server.port.asUInt16 { 50 | // If we got a non-zero port, check if it matches our request 51 | if assignedPort != 0 { 52 | // For specific port requests, verify we got the requested port 53 | if port != 0 && Int(assignedPort) != port { 54 | throw VMError.vncPortBindingFailed(requested: port, actual: Int(assignedPort)) 55 | } 56 | 57 | // Get the local IP address for the URL - prefer IPv4 58 | let hostIP = try getLocalIPAddress() ?? "127.0.0.1" 59 | let url = "vnc://:\(password)@127.0.0.1:\(assignedPort)" // Use localhost for local connections 60 | let externalUrl = "vnc://:\(password)@\(hostIP):\(assignedPort)" // External URL for remote connections 61 | 62 | Logger.info("VNC server started", metadata: [ 63 | "local": url, 64 | "external": externalUrl 65 | ]) 66 | 67 | // Save session information with local URL for the client 68 | let session = VNCSession(url: url) 69 | try vmDirectory.saveSession(session) 70 | break 71 | } 72 | } 73 | 74 | attempts += 1 75 | if attempts >= maxAttempts { 76 | // If we've timed out and we requested a specific port, it likely means binding failed 77 | vncServer = nil 78 | if port != 0 { 79 | throw VMError.vncPortBindingFailed(requested: port, actual: -1) 80 | } 81 | throw VMError.internalError("Timeout waiting for VNC server to start") 82 | } 83 | try await Task.sleep(nanoseconds: 50_000_000) // 50ms delay between checks 84 | } 85 | } 86 | 87 | // Modified to prefer IPv4 addresses 88 | private func getLocalIPAddress() throws -> String? { 89 | var address: String? 90 | 91 | var ifaddr: UnsafeMutablePointer<ifaddrs>? 92 | guard getifaddrs(&ifaddr) == 0 else { 93 | return nil 94 | } 95 | defer { freeifaddrs(ifaddr) } 96 | 97 | var ptr = ifaddr 98 | while ptr != nil { 99 | defer { ptr = ptr?.pointee.ifa_next } 100 | 101 | let interface = ptr?.pointee 102 | let family = interface?.ifa_addr.pointee.sa_family 103 | 104 | // Only look for IPv4 addresses 105 | if family == UInt8(AF_INET) { 106 | let name = String(cString: (interface?.ifa_name)!) 107 | if name == "en0" { // Primary interface 108 | var hostname = [CChar](repeating: 0, count: Int(NI_MAXHOST)) 109 | getnameinfo(interface?.ifa_addr, 110 | socklen_t((interface?.ifa_addr.pointee.sa_len)!), 111 | &hostname, 112 | socklen_t(hostname.count), 113 | nil, 114 | 0, 115 | NI_NUMERICHOST) 116 | address = String(cString: hostname, encoding: .utf8) 117 | break 118 | } 119 | } 120 | } 121 | 122 | return address 123 | } 124 | 125 | func stop() { 126 | if let server = vncServer as? Dynamic { 127 | server.stop() 128 | } 129 | vncServer = nil 130 | vmDirectory.clearSession() 131 | } 132 | 133 | func openClient(url: String) async throws { 134 | let processRunner = DefaultProcessRunner() 135 | try processRunner.run(executable: "/usr/bin/open", arguments: [url]) 136 | } 137 | } ``` -------------------------------------------------------------------------------- /libs/typescript/agent/examples/playground-example.html: -------------------------------------------------------------------------------- ```html 1 | <!DOCTYPE html> 2 | <html lang="en"> 3 | <head> 4 | <meta charset="UTF-8"> 5 | <meta name="viewport" content="width=device-width, initial-scale=1.0"> 6 | <title>CUA Agent Playground Example</title> 7 | </head> 8 | <body> 9 | <h1>CUA Agent Playground Example</h1> 10 | 11 | <div> 12 | <h2>Configuration</h2> 13 | <label for="url">Agent URL:</label><br> 14 | <input type="text" id="url" placeholder="https://localhost:8000 or peer://peer-id" value="https://localhost:8000" style="width: 400px;"><br><br> 15 | 16 | <label for="model">Model:</label><br> 17 | <input type="text" id="model" placeholder="anthropic/claude-opus-4-1-20250805" value="anthropic/claude-opus-4-1-20250805" style="width: 400px;"><br><br> 18 | </div> 19 | 20 | <div> 21 | <h2>Chat</h2> 22 | <label for="message">Message:</label><br> 23 | <input type="text" id="message" placeholder="Enter your message here..." style="width: 400px;"><br><br> 24 | 25 | <button onclick="sendMessage()">Send Message</button> 26 | <!-- <button onclick="checkHealth()">Check Health</button> --> 27 | <button onclick="clearOutput()">Clear Output</button><br><br> 28 | 29 | <label for="output">Output:</label><br> 30 | <textarea id="output" rows="20" cols="80" readonly></textarea> 31 | </div> 32 | 33 | <script src="https://unpkg.com/[email protected]/dist/peerjs.min.js"></script> 34 | <script type="module"> 35 | // Import the AgentClient from the built library 36 | import AgentClient from '/dist/index.js'; 37 | 38 | let client = null; 39 | 40 | // Make functions available globally 41 | window.sendMessage = sendMessage; 42 | window.checkHealth = checkHealth; 43 | window.clearOutput = clearOutput; 44 | 45 | function log(message) { 46 | const output = document.getElementById('output'); 47 | const timestamp = new Date().toLocaleTimeString(); 48 | output.value += `[${timestamp}] ${message}\n`; 49 | output.scrollTop = output.scrollHeight; 50 | } 51 | 52 | function getClient() { 53 | const url = document.getElementById('url').value.trim(); 54 | if (!url) { 55 | log('ERROR: Please enter a URL'); 56 | return null; 57 | } 58 | 59 | // Create new client if URL changed or client doesn't exist 60 | if (!client || client.url !== url) { 61 | try { 62 | client = new AgentClient(url); 63 | client.url = url; // Store URL for comparison 64 | log(`Created new client for: ${url}`); 65 | } catch (error) { 66 | log(`ERROR creating client: ${error.message}`); 67 | return null; 68 | } 69 | } 70 | 71 | return client; 72 | } 73 | 74 | async function sendMessage() { 75 | const messageInput = document.getElementById('message'); 76 | const modelInput = document.getElementById('model'); 77 | 78 | const message = messageInput.value.trim(); 79 | const model = modelInput.value.trim(); 80 | 81 | if (!message) { 82 | log('ERROR: Please enter a message'); 83 | return; 84 | } 85 | 86 | if (!model) { 87 | log('ERROR: Please enter a model'); 88 | return; 89 | } 90 | 91 | const agentClient = getClient(); 92 | if (!agentClient) return; 93 | 94 | try { 95 | log(`Sending message: "${message}"`); 96 | log(`Using model: ${model}`); 97 | 98 | const request = { 99 | model: model, 100 | input: message 101 | }; 102 | 103 | log('Sending request...'); 104 | const response = await agentClient.responses.create(request); 105 | 106 | log('Response received:'); 107 | log(JSON.stringify(response, null, 2)); 108 | 109 | // Clear the message input 110 | messageInput.value = ''; 111 | 112 | } catch (error) { 113 | log(`ERROR: ${error.message}`); 114 | } 115 | } 116 | 117 | async function checkHealth() { 118 | const agentClient = getClient(); 119 | if (!agentClient) return; 120 | 121 | try { 122 | log('Checking health...'); 123 | const health = await agentClient.health(); 124 | log(`Health status: ${health.status}`); 125 | } catch (error) { 126 | log(`ERROR checking health: ${error.message}`); 127 | } 128 | } 129 | 130 | function clearOutput() { 131 | document.getElementById('output').value = ''; 132 | } 133 | 134 | // Allow sending message with Enter key 135 | document.getElementById('message').addEventListener('keypress', function(e) { 136 | if (e.key === 'Enter') { 137 | sendMessage(); 138 | } 139 | }); 140 | 141 | // Log initial message 142 | log('CUA Agent Client Browser Example loaded'); 143 | log('Enter a URL (HTTP/HTTPS or peer://) and model, then send a message'); 144 | </script> 145 | </body> 146 | </html> 147 | ``` -------------------------------------------------------------------------------- /docs/src/assets/logo-black.svg: -------------------------------------------------------------------------------- ``` 1 | <?xml version="1.0" standalone="no"?> 2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 20010904//EN" 3 | "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd"> 4 | <svg version="1.0" xmlns="http://www.w3.org/2000/svg" 5 | width="1000.000000pt" height="1000.000000pt" viewBox="0 0 1000.000000 1000.000000" 6 | preserveAspectRatio="xMidYMid meet"> 7 | 8 | <g transform="translate(0.000000,1000.000000) scale(0.100000,-0.100000)" 9 | fill="#000000" stroke="none"> 10 | <path d="M4934 9086 c-40 -14 -62 -33 -80 -69 -22 -42 -21 -994 1 -1037 38 11 | -73 174 -101 243 -50 19 14 43 42 53 62 18 35 19 65 19 510 0 471 0 473 -23 12 | 513 -38 69 -133 101 -213 71z"/> 13 | <path d="M3702 8472 c-52 -28 -82 -81 -82 -147 0 -67 8 -80 125 -210 44 -49 14 | 107 -121 139 -160 165 -196 233 -268 278 -291 58 -29 66 -30 124 -2 67 31 104 15 | 86 104 154 0 60 -14 82 -149 235 -42 47 -95 108 -117 135 -23 27 -52 61 -65 16 | 75 -13 14 -57 65 -98 112 -41 47 -89 93 -107 102 -42 20 -111 19 -152 -3z"/> 17 | <path d="M6145 8472 c-29 -18 -136 -133 -235 -252 -53 -64 -190 -222 -230 18 | -265 -37 -41 -70 -108 -70 -142 0 -16 10 -49 23 -73 17 -36 33 -51 79 -73 57 19 | -29 57 -29 107 -12 44 14 63 31 149 128 54 62 122 141 151 177 30 36 57 67 60 20 | 70 12 10 157 175 179 204 33 43 31 150 -2 188 -56 64 -151 86 -211 50z"/> 21 | <path d="M2245 7400 c-188 -14 -374 -75 -585 -191 -222 -123 -464 -366 -577 22 | -579 -13 -25 -28 -52 -33 -60 -74 -123 -137 -348 -161 -580 -10 -106 1 -310 23 | 22 -384 5 -17 9 -44 9 -60 0 -72 116 -366 181 -458 11 -14 19 -29 19 -33 0 24 | -33 296 -355 326 -355 7 0 14 -4 16 -10 5 -17 139 -99 243 -150 106 -52 216 25 | -91 303 -109 98 -20 92 -7 92 -215 0 -176 26 -472 50 -571 5 -22 12 -56 15 26 | -75 8 -44 31 -129 56 -201 10 -31 19 -62 19 -69 0 -8 8 -32 19 -54 10 -23 30 27 | -70 45 -106 76 -182 189 -363 319 -515 296 -344 701 -603 1162 -743 216 -66 28 | 521 -126 730 -143 335 -27 467 -31 653 -19 103 6 237 15 297 19 120 8 282 32 29 | 415 62 47 10 98 19 113 19 16 0 37 5 48 11 11 5 48 16 82 24 34 7 85 21 112 30 | 31 104 36 161 58 201 76 22 10 43 18 47 18 12 0 185 85 263 131 44 25 116 71 31 | 159 100 43 30 87 61 99 68 107 74 344 310 444 444 40 53 72 98 72 101 0 2 17 32 | 31 38 63 68 104 202 390 202 431 0 10 4 22 9 28 12 12 53 168 80 304 30 149 33 | 43 293 48 538 l5 214 33 14 c18 7 53 16 77 20 23 4 48 10 53 14 6 4 28 13 50 34 | 19 91 27 214 86 318 152 224 141 416 353 524 580 98 206 129 320 153 562 19 35 | 189 -20 467 -92 657 -144 382 -420 674 -811 859 -48 22 -93 41 -101 41 -7 0 36 | -35 8 -62 19 -27 10 -92 29 -144 41 -84 20 -119 23 -325 22 -212 0 -238 -2 37 | -330 -25 -55 -14 -131 -37 -170 -52 -38 -15 -84 -32 -101 -39 -18 -6 -38 -16 38 | -45 -22 -8 -6 -27 -18 -44 -26 -79 -40 -121 -67 -205 -134 -69 -54 -225 -212 39 | -255 -257 -21 -32 -26 -33 -84 -6 -25 12 -64 29 -86 40 -183 84 -514 183 -705 40 | 209 -41 6 -91 15 -110 20 -50 13 -318 30 -470 30 -159 0 -363 -16 -450 -35 41 | -36 -8 -87 -17 -115 -20 -48 -7 -178 -36 -240 -55 -84 -26 -222 -71 -240 -79 42 | -11 -4 -47 -19 -80 -31 -77 -30 -162 -66 -198 -85 -32 -17 -67 -20 -67 -6 0 43 | 16 -211 230 -274 279 -96 74 -124 92 -237 149 -204 102 -346 139 -569 146 -85 44 | 2 -200 1 -255 -3z m396 -331 c163 -33 302 -93 433 -184 97 -68 232 -206 299 45 | -307 32 -48 70 -94 85 -104 38 -25 155 -24 185 3 28 24 183 99 302 146 180 70 46 | 201 77 214 77 8 0 39 8 70 19 77 26 221 57 376 82 111 17 173 20 418 20 159 0 47 | 305 -5 325 -10 21 -5 71 -14 112 -21 178 -28 372 -81 590 -161 65 -24 225 48 | -102 279 -137 48 -30 63 -34 118 -34 78 1 105 20 179 131 65 97 213 245 301 49 | 303 74 48 228 128 248 128 6 0 25 6 41 14 61 30 229 56 359 56 202 0 365 -39 50 | 550 -131 285 -142 521 -410 616 -699 108 -331 69 -692 -109 -995 -79 -134 51 | -217 -274 -366 -369 -63 -40 -221 -116 -242 -116 -8 0 -28 -7 -44 -15 -16 -8 52 | -55 -19 -87 -24 -230 -37 -274 -55 -306 -124 -15 -30 -16 -58 -7 -238 18 -382 53 | -25 -716 -128 -994 -63 -171 -182 -380 -298 -523 -59 -74 -186 -204 -244 -251 54 | -25 -20 -54 -44 -65 -54 -26 -24 -178 -128 -235 -161 -25 -14 -88 -46 -140 55 | -72 -52 -25 -106 -51 -120 -58 -34 -18 -216 -80 -315 -107 -114 -31 -197 -48 56 | -410 -85 -126 -21 -452 -46 -625 -48 -376 -3 -837 62 -1105 155 -16 6 -50 17 57 | -75 24 -72 21 -256 98 -320 135 -8 5 -40 21 -70 36 -63 31 -172 103 -277 181 58 | -199 148 -392 374 -504 588 -118 228 -190 479 -220 775 -11 113 -7 483 7 597 59 | 5 42 2 62 -15 96 -37 77 -60 86 -318 127 -29 4 -67 15 -84 24 -18 9 -41 16 60 | -52 16 -10 0 -36 8 -56 18 -20 10 -58 30 -86 43 -139 67 -301 202 -395 329 61 | -150 203 -229 445 -230 705 0 331 117 613 355 850 175 176 364 280 615 339 96 62 | 22 103 23 243 25 95 1 154 -4 228 -20z"/> 63 | <path d="M3464 5185 c-17 -8 -43 -28 -58 -45 l-26 -32 0 -265 c0 -249 1 -268 64 | 20 -298 38 -62 51 -65 244 -65 l175 0 36 34 37 35 -4 283 c-4 378 13 353 -253 65 | 362 -108 4 -147 2 -171 -9z"/> 66 | <path d="M6174 5171 c-12 -5 -31 -22 -43 -37 -22 -28 -22 -32 -19 -309 l3 67 | -281 25 -31 25 -32 189 0 188 -1 41 40 40 40 -5 253 c-6 260 -10 288 -53 342 68 | -15 18 -29 20 -193 22 -97 1 -187 -2 -198 -6z"/> 69 | <path d="M4935 5079 c-199 -25 -341 -112 -454 -278 -49 -71 -134 -238 -151 70 | -296 -7 -22 -21 -59 -31 -83 -11 -23 -19 -50 -19 -60 0 -9 -7 -37 -15 -60 -9 71 | -24 -20 -69 -25 -100 -5 -32 -16 -93 -25 -137 -12 -59 -16 -144 -17 -325 -1 72 | -238 0 -247 25 -321 63 -188 164 -313 318 -394 86 -45 137 -61 274 -85 236 73 | -42 492 -10 651 81 238 137 348 357 348 699 0 89 -21 335 -34 390 -6 25 -15 74 | 70 -20 100 -5 30 -15 71 -21 90 -6 19 -15 51 -19 70 -24 100 -107 282 -186 75 | 406 -59 94 -167 193 -265 242 -46 23 -93 42 -104 42 -12 0 -25 4 -30 9 -15 13 76 | -132 19 -200 10z"/> 77 | </g> 78 | </svg> 79 | ``` -------------------------------------------------------------------------------- /docs/src/assets/logo-white.svg: -------------------------------------------------------------------------------- ``` 1 | <?xml version="1.0" standalone="no"?> 2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 20010904//EN" 3 | "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd"> 4 | <svg version="1.0" xmlns="http://www.w3.org/2000/svg" 5 | width="1000.000000pt" height="1000.000000pt" viewBox="0 0 1000.000000 1000.000000" 6 | preserveAspectRatio="xMidYMid meet"> 7 | 8 | <g transform="translate(0.000000,1000.000000) scale(0.100000,-0.100000)" 9 | fill="#ffffff" stroke="none"> 10 | <path d="M4934 9086 c-40 -14 -62 -33 -80 -69 -22 -42 -21 -994 1 -1037 38 11 | -73 174 -101 243 -50 19 14 43 42 53 62 18 35 19 65 19 510 0 471 0 473 -23 12 | 513 -38 69 -133 101 -213 71z"/> 13 | <path d="M3702 8472 c-52 -28 -82 -81 -82 -147 0 -67 8 -80 125 -210 44 -49 14 | 107 -121 139 -160 165 -196 233 -268 278 -291 58 -29 66 -30 124 -2 67 31 104 15 | 86 104 154 0 60 -14 82 -149 235 -42 47 -95 108 -117 135 -23 27 -52 61 -65 16 | 75 -13 14 -57 65 -98 112 -41 47 -89 93 -107 102 -42 20 -111 19 -152 -3z"/> 17 | <path d="M6145 8472 c-29 -18 -136 -133 -235 -252 -53 -64 -190 -222 -230 18 | -265 -37 -41 -70 -108 -70 -142 0 -16 10 -49 23 -73 17 -36 33 -51 79 -73 57 19 | -29 57 -29 107 -12 44 14 63 31 149 128 54 62 122 141 151 177 30 36 57 67 60 20 | 70 12 10 157 175 179 204 33 43 31 150 -2 188 -56 64 -151 86 -211 50z"/> 21 | <path d="M2245 7400 c-188 -14 -374 -75 -585 -191 -222 -123 -464 -366 -577 22 | -579 -13 -25 -28 -52 -33 -60 -74 -123 -137 -348 -161 -580 -10 -106 1 -310 23 | 22 -384 5 -17 9 -44 9 -60 0 -72 116 -366 181 -458 11 -14 19 -29 19 -33 0 24 | -33 296 -355 326 -355 7 0 14 -4 16 -10 5 -17 139 -99 243 -150 106 -52 216 25 | -91 303 -109 98 -20 92 -7 92 -215 0 -176 26 -472 50 -571 5 -22 12 -56 15 26 | -75 8 -44 31 -129 56 -201 10 -31 19 -62 19 -69 0 -8 8 -32 19 -54 10 -23 30 27 | -70 45 -106 76 -182 189 -363 319 -515 296 -344 701 -603 1162 -743 216 -66 28 | 521 -126 730 -143 335 -27 467 -31 653 -19 103 6 237 15 297 19 120 8 282 32 29 | 415 62 47 10 98 19 113 19 16 0 37 5 48 11 11 5 48 16 82 24 34 7 85 21 112 30 | 31 104 36 161 58 201 76 22 10 43 18 47 18 12 0 185 85 263 131 44 25 116 71 31 | 159 100 43 30 87 61 99 68 107 74 344 310 444 444 40 53 72 98 72 101 0 2 17 32 | 31 38 63 68 104 202 390 202 431 0 10 4 22 9 28 12 12 53 168 80 304 30 149 33 | 43 293 48 538 l5 214 33 14 c18 7 53 16 77 20 23 4 48 10 53 14 6 4 28 13 50 34 | 19 91 27 214 86 318 152 224 141 416 353 524 580 98 206 129 320 153 562 19 35 | 189 -20 467 -92 657 -144 382 -420 674 -811 859 -48 22 -93 41 -101 41 -7 0 36 | -35 8 -62 19 -27 10 -92 29 -144 41 -84 20 -119 23 -325 22 -212 0 -238 -2 37 | -330 -25 -55 -14 -131 -37 -170 -52 -38 -15 -84 -32 -101 -39 -18 -6 -38 -16 38 | -45 -22 -8 -6 -27 -18 -44 -26 -79 -40 -121 -67 -205 -134 -69 -54 -225 -212 39 | -255 -257 -21 -32 -26 -33 -84 -6 -25 12 -64 29 -86 40 -183 84 -514 183 -705 40 | 209 -41 6 -91 15 -110 20 -50 13 -318 30 -470 30 -159 0 -363 -16 -450 -35 41 | -36 -8 -87 -17 -115 -20 -48 -7 -178 -36 -240 -55 -84 -26 -222 -71 -240 -79 42 | -11 -4 -47 -19 -80 -31 -77 -30 -162 -66 -198 -85 -32 -17 -67 -20 -67 -6 0 43 | 16 -211 230 -274 279 -96 74 -124 92 -237 149 -204 102 -346 139 -569 146 -85 44 | 2 -200 1 -255 -3z m396 -331 c163 -33 302 -93 433 -184 97 -68 232 -206 299 45 | -307 32 -48 70 -94 85 -104 38 -25 155 -24 185 3 28 24 183 99 302 146 180 70 46 | 201 77 214 77 8 0 39 8 70 19 77 26 221 57 376 82 111 17 173 20 418 20 159 0 47 | 305 -5 325 -10 21 -5 71 -14 112 -21 178 -28 372 -81 590 -161 65 -24 225 48 | -102 279 -137 48 -30 63 -34 118 -34 78 1 105 20 179 131 65 97 213 245 301 49 | 303 74 48 228 128 248 128 6 0 25 6 41 14 61 30 229 56 359 56 202 0 365 -39 50 | 550 -131 285 -142 521 -410 616 -699 108 -331 69 -692 -109 -995 -79 -134 51 | -217 -274 -366 -369 -63 -40 -221 -116 -242 -116 -8 0 -28 -7 -44 -15 -16 -8 52 | -55 -19 -87 -24 -230 -37 -274 -55 -306 -124 -15 -30 -16 -58 -7 -238 18 -382 53 | -25 -716 -128 -994 -63 -171 -182 -380 -298 -523 -59 -74 -186 -204 -244 -251 54 | -25 -20 -54 -44 -65 -54 -26 -24 -178 -128 -235 -161 -25 -14 -88 -46 -140 55 | -72 -52 -25 -106 -51 -120 -58 -34 -18 -216 -80 -315 -107 -114 -31 -197 -48 56 | -410 -85 -126 -21 -452 -46 -625 -48 -376 -3 -837 62 -1105 155 -16 6 -50 17 57 | -75 24 -72 21 -256 98 -320 135 -8 5 -40 21 -70 36 -63 31 -172 103 -277 181 58 | -199 148 -392 374 -504 588 -118 228 -190 479 -220 775 -11 113 -7 483 7 597 59 | 5 42 2 62 -15 96 -37 77 -60 86 -318 127 -29 4 -67 15 -84 24 -18 9 -41 16 60 | -52 16 -10 0 -36 8 -56 18 -20 10 -58 30 -86 43 -139 67 -301 202 -395 329 61 | -150 203 -229 445 -230 705 0 331 117 613 355 850 175 176 364 280 615 339 96 62 | 22 103 23 243 25 95 1 154 -4 228 -20z"/> 63 | <path d="M3464 5185 c-17 -8 -43 -28 -58 -45 l-26 -32 0 -265 c0 -249 1 -268 64 | 20 -298 38 -62 51 -65 244 -65 l175 0 36 34 37 35 -4 283 c-4 378 13 353 -253 65 | 362 -108 4 -147 2 -171 -9z"/> 66 | <path d="M6174 5171 c-12 -5 -31 -22 -43 -37 -22 -28 -22 -32 -19 -309 l3 67 | -281 25 -31 25 -32 189 0 188 -1 41 40 40 40 -5 253 c-6 260 -10 288 -53 342 68 | -15 18 -29 20 -193 22 -97 1 -187 -2 -198 -6z"/> 69 | <path d="M4935 5079 c-199 -25 -341 -112 -454 -278 -49 -71 -134 -238 -151 70 | -296 -7 -22 -21 -59 -31 -83 -11 -23 -19 -50 -19 -60 0 -9 -7 -37 -15 -60 -9 71 | -24 -20 -69 -25 -100 -5 -32 -16 -93 -25 -137 -12 -59 -16 -144 -17 -325 -1 72 | -238 0 -247 25 -321 63 -188 164 -313 318 -394 86 -45 137 -61 274 -85 236 73 | -42 492 -10 651 81 238 137 348 357 348 699 0 89 -21 335 -34 390 -6 25 -15 74 | 70 -20 100 -5 30 -15 71 -21 90 -6 19 -15 51 -19 70 -24 100 -107 282 -186 75 | 406 -59 94 -167 193 -265 242 -46 23 -93 42 -104 42 -12 0 -25 4 -30 9 -15 13 76 | -132 19 -200 10z"/> 77 | </g> 78 | </svg> 79 | ```