This is page 2 of 16. Use http://codebase.md/trycua/cua?page={x} to view the full context. # Directory Structure ``` ├── .all-contributorsrc ├── .cursorignore ├── .devcontainer │ ├── devcontainer.json │ ├── post-install.sh │ └── README.md ├── .dockerignore ├── .gitattributes ├── .github │ ├── FUNDING.yml │ ├── scripts │ │ ├── get_pyproject_version.py │ │ └── tests │ │ ├── __init__.py │ │ ├── README.md │ │ └── test_get_pyproject_version.py │ └── workflows │ ├── ci-lume.yml │ ├── docker-publish-kasm.yml │ ├── docker-publish-xfce.yml │ ├── docker-reusable-publish.yml │ ├── npm-publish-computer.yml │ ├── npm-publish-core.yml │ ├── publish-lume.yml │ ├── pypi-publish-agent.yml │ ├── pypi-publish-computer-server.yml │ ├── pypi-publish-computer.yml │ ├── pypi-publish-core.yml │ ├── pypi-publish-mcp-server.yml │ ├── pypi-publish-pylume.yml │ ├── pypi-publish-som.yml │ ├── pypi-reusable-publish.yml │ └── test-validation-script.yml ├── .gitignore ├── .vscode │ ├── docs.code-workspace │ ├── launch.json │ ├── libs-ts.code-workspace │ ├── lume.code-workspace │ ├── lumier.code-workspace │ └── py.code-workspace ├── blog │ ├── app-use.md │ ├── assets │ │ ├── composite-agents.png │ │ ├── docker-ubuntu-support.png │ │ ├── hack-booth.png │ │ ├── hack-closing-ceremony.jpg │ │ ├── hack-cua-ollama-hud.jpeg │ │ ├── hack-leaderboard.png │ │ ├── hack-the-north.png │ │ ├── hack-winners.jpeg │ │ ├── hack-workshop.jpeg │ │ ├── hud-agent-evals.png │ │ └── trajectory-viewer.jpeg │ ├── bringing-computer-use-to-the-web.md │ ├── build-your-own-operator-on-macos-1.md │ ├── build-your-own-operator-on-macos-2.md │ ├── composite-agents.md │ ├── cua-hackathon.md │ ├── hack-the-north.md │ ├── hud-agent-evals.md │ ├── human-in-the-loop.md │ ├── introducing-cua-cloud-containers.md │ ├── lume-to-containerization.md │ ├── sandboxed-python-execution.md │ ├── training-computer-use-models-trajectories-1.md │ ├── trajectory-viewer.md │ ├── ubuntu-docker-support.md │ └── windows-sandbox.md ├── CONTRIBUTING.md ├── Development.md ├── Dockerfile ├── docs │ ├── .gitignore │ ├── .prettierrc │ ├── content │ │ └── docs │ │ ├── agent-sdk │ │ │ ├── agent-loops.mdx │ │ │ ├── benchmarks │ │ │ │ ├── index.mdx │ │ │ │ ├── interactive.mdx │ │ │ │ ├── introduction.mdx │ │ │ │ ├── meta.json │ │ │ │ ├── osworld-verified.mdx │ │ │ │ ├── screenspot-pro.mdx │ │ │ │ └── screenspot-v2.mdx │ │ │ ├── callbacks │ │ │ │ ├── agent-lifecycle.mdx │ │ │ │ ├── cost-saving.mdx │ │ │ │ ├── index.mdx │ │ │ │ ├── logging.mdx │ │ │ │ ├── meta.json │ │ │ │ ├── pii-anonymization.mdx │ │ │ │ └── trajectories.mdx │ │ │ ├── chat-history.mdx │ │ │ ├── custom-computer-handlers.mdx │ │ │ ├── custom-tools.mdx │ │ │ ├── customizing-computeragent.mdx │ │ │ ├── integrations │ │ │ │ ├── hud.mdx │ │ │ │ └── meta.json │ │ │ ├── message-format.mdx │ │ │ ├── meta.json │ │ │ ├── migration-guide.mdx │ │ │ ├── prompt-caching.mdx │ │ │ ├── supported-agents │ │ │ │ ├── composed-agents.mdx │ │ │ │ ├── computer-use-agents.mdx │ │ │ │ ├── grounding-models.mdx │ │ │ │ ├── human-in-the-loop.mdx │ │ │ │ └── meta.json │ │ │ ├── supported-model-providers │ │ │ │ ├── index.mdx │ │ │ │ └── local-models.mdx │ │ │ └── usage-tracking.mdx │ │ ├── computer-sdk │ │ │ ├── commands.mdx │ │ │ ├── computer-ui.mdx │ │ │ ├── computers.mdx │ │ │ ├── meta.json │ │ │ └── sandboxed-python.mdx │ │ ├── index.mdx │ │ ├── libraries │ │ │ ├── agent │ │ │ │ └── index.mdx │ │ │ ├── computer │ │ │ │ └── index.mdx │ │ │ ├── computer-server │ │ │ │ ├── Commands.mdx │ │ │ │ ├── index.mdx │ │ │ │ ├── REST-API.mdx │ │ │ │ └── WebSocket-API.mdx │ │ │ ├── core │ │ │ │ └── index.mdx │ │ │ ├── lume │ │ │ │ ├── cli-reference.mdx │ │ │ │ ├── faq.md │ │ │ │ ├── http-api.mdx │ │ │ │ ├── index.mdx │ │ │ │ ├── installation.mdx │ │ │ │ ├── meta.json │ │ │ │ └── prebuilt-images.mdx │ │ │ ├── lumier │ │ │ │ ├── building-lumier.mdx │ │ │ │ ├── docker-compose.mdx │ │ │ │ ├── docker.mdx │ │ │ │ ├── index.mdx │ │ │ │ ├── installation.mdx │ │ │ │ └── meta.json │ │ │ ├── mcp-server │ │ │ │ ├── client-integrations.mdx │ │ │ │ ├── configuration.mdx │ │ │ │ ├── index.mdx │ │ │ │ ├── installation.mdx │ │ │ │ ├── llm-integrations.mdx │ │ │ │ ├── meta.json │ │ │ │ ├── tools.mdx │ │ │ │ └── usage.mdx │ │ │ └── som │ │ │ ├── configuration.mdx │ │ │ └── index.mdx │ │ ├── meta.json │ │ ├── quickstart-cli.mdx │ │ ├── quickstart-devs.mdx │ │ └── telemetry.mdx │ ├── next.config.mjs │ ├── package-lock.json │ ├── package.json │ ├── pnpm-lock.yaml │ ├── postcss.config.mjs │ ├── public │ │ └── img │ │ ├── agent_gradio_ui.png │ │ ├── agent.png │ │ ├── cli.png │ │ ├── computer.png │ │ ├── som_box_threshold.png │ │ └── som_iou_threshold.png │ ├── README.md │ ├── source.config.ts │ ├── src │ │ ├── app │ │ │ ├── (home) │ │ │ │ ├── [[...slug]] │ │ │ │ │ └── page.tsx │ │ │ │ └── layout.tsx │ │ │ ├── api │ │ │ │ └── search │ │ │ │ └── route.ts │ │ │ ├── favicon.ico │ │ │ ├── global.css │ │ │ ├── layout.config.tsx │ │ │ ├── layout.tsx │ │ │ ├── llms.mdx │ │ │ │ └── [[...slug]] │ │ │ │ └── route.ts │ │ │ └── llms.txt │ │ │ └── route.ts │ │ ├── assets │ │ │ ├── discord-black.svg │ │ │ ├── discord-white.svg │ │ │ ├── logo-black.svg │ │ │ └── logo-white.svg │ │ ├── components │ │ │ ├── iou.tsx │ │ │ └── mermaid.tsx │ │ ├── lib │ │ │ ├── llms.ts │ │ │ └── source.ts │ │ └── mdx-components.tsx │ └── tsconfig.json ├── examples │ ├── agent_examples.py │ ├── agent_ui_examples.py │ ├── computer_examples_windows.py │ ├── computer_examples.py │ ├── computer_ui_examples.py │ ├── computer-example-ts │ │ ├── .env.example │ │ ├── .gitignore │ │ ├── .prettierrc │ │ ├── package-lock.json │ │ ├── package.json │ │ ├── pnpm-lock.yaml │ │ ├── README.md │ │ ├── src │ │ │ ├── helpers.ts │ │ │ └── index.ts │ │ └── tsconfig.json │ ├── docker_examples.py │ ├── evals │ │ ├── hud_eval_examples.py │ │ └── wikipedia_most_linked.txt │ ├── pylume_examples.py │ ├── sandboxed_functions_examples.py │ ├── som_examples.py │ ├── utils.py │ └── winsandbox_example.py ├── img │ ├── agent_gradio_ui.png │ ├── agent.png │ ├── cli.png │ ├── computer.png │ ├── logo_black.png │ └── logo_white.png ├── libs │ ├── kasm │ │ ├── Dockerfile │ │ ├── LICENSE │ │ ├── README.md │ │ └── src │ │ └── ubuntu │ │ └── install │ │ └── firefox │ │ ├── custom_startup.sh │ │ ├── firefox.desktop │ │ └── install_firefox.sh │ ├── lume │ │ ├── .cursorignore │ │ ├── CONTRIBUTING.md │ │ ├── Development.md │ │ ├── img │ │ │ └── cli.png │ │ ├── Package.resolved │ │ ├── Package.swift │ │ ├── README.md │ │ ├── resources │ │ │ └── lume.entitlements │ │ ├── scripts │ │ │ ├── build │ │ │ │ ├── build-debug.sh │ │ │ │ ├── build-release-notarized.sh │ │ │ │ └── build-release.sh │ │ │ └── install.sh │ │ ├── src │ │ │ ├── Commands │ │ │ │ ├── Clone.swift │ │ │ │ ├── Config.swift │ │ │ │ ├── Create.swift │ │ │ │ ├── Delete.swift │ │ │ │ ├── Get.swift │ │ │ │ ├── Images.swift │ │ │ │ ├── IPSW.swift │ │ │ │ ├── List.swift │ │ │ │ ├── Logs.swift │ │ │ │ ├── Options │ │ │ │ │ └── FormatOption.swift │ │ │ │ ├── Prune.swift │ │ │ │ ├── Pull.swift │ │ │ │ ├── Push.swift │ │ │ │ ├── Run.swift │ │ │ │ ├── Serve.swift │ │ │ │ ├── Set.swift │ │ │ │ └── Stop.swift │ │ │ ├── ContainerRegistry │ │ │ │ ├── ImageContainerRegistry.swift │ │ │ │ ├── ImageList.swift │ │ │ │ └── ImagesPrinter.swift │ │ │ ├── Errors │ │ │ │ └── Errors.swift │ │ │ ├── FileSystem │ │ │ │ ├── Home.swift │ │ │ │ ├── Settings.swift │ │ │ │ ├── VMConfig.swift │ │ │ │ ├── VMDirectory.swift │ │ │ │ └── VMLocation.swift │ │ │ ├── LumeController.swift │ │ │ ├── Main.swift │ │ │ ├── Server │ │ │ │ ├── Handlers.swift │ │ │ │ ├── HTTP.swift │ │ │ │ ├── Requests.swift │ │ │ │ ├── Responses.swift │ │ │ │ └── Server.swift │ │ │ ├── Utils │ │ │ │ ├── CommandRegistry.swift │ │ │ │ ├── CommandUtils.swift │ │ │ │ ├── Logger.swift │ │ │ │ ├── NetworkUtils.swift │ │ │ │ ├── Path.swift │ │ │ │ ├── ProcessRunner.swift │ │ │ │ ├── ProgressLogger.swift │ │ │ │ ├── String.swift │ │ │ │ └── Utils.swift │ │ │ ├── Virtualization │ │ │ │ ├── DarwinImageLoader.swift │ │ │ │ ├── DHCPLeaseParser.swift │ │ │ │ ├── ImageLoaderFactory.swift │ │ │ │ └── VMVirtualizationService.swift │ │ │ ├── VM │ │ │ │ ├── DarwinVM.swift │ │ │ │ ├── LinuxVM.swift │ │ │ │ ├── VM.swift │ │ │ │ ├── VMDetails.swift │ │ │ │ ├── VMDetailsPrinter.swift │ │ │ │ ├── VMDisplayResolution.swift │ │ │ │ └── VMFactory.swift │ │ │ └── VNC │ │ │ ├── PassphraseGenerator.swift │ │ │ └── VNCService.swift │ │ └── tests │ │ ├── Mocks │ │ │ ├── MockVM.swift │ │ │ ├── MockVMVirtualizationService.swift │ │ │ └── MockVNCService.swift │ │ ├── VM │ │ │ └── VMDetailsPrinterTests.swift │ │ ├── VMTests.swift │ │ ├── VMVirtualizationServiceTests.swift │ │ └── VNCServiceTests.swift │ ├── lumier │ │ ├── .dockerignore │ │ ├── Dockerfile │ │ ├── README.md │ │ └── src │ │ ├── bin │ │ │ └── entry.sh │ │ ├── config │ │ │ └── constants.sh │ │ ├── hooks │ │ │ └── on-logon.sh │ │ └── lib │ │ ├── utils.sh │ │ └── vm.sh │ ├── python │ │ ├── agent │ │ │ ├── agent │ │ │ │ ├── __init__.py │ │ │ │ ├── __main__.py │ │ │ │ ├── adapters │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── huggingfacelocal_adapter.py │ │ │ │ │ ├── human_adapter.py │ │ │ │ │ ├── mlxvlm_adapter.py │ │ │ │ │ └── models │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── generic.py │ │ │ │ │ ├── internvl.py │ │ │ │ │ ├── opencua.py │ │ │ │ │ └── qwen2_5_vl.py │ │ │ │ ├── agent.py │ │ │ │ ├── callbacks │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── budget_manager.py │ │ │ │ │ ├── image_retention.py │ │ │ │ │ ├── logging.py │ │ │ │ │ ├── operator_validator.py │ │ │ │ │ ├── pii_anonymization.py │ │ │ │ │ ├── prompt_instructions.py │ │ │ │ │ ├── telemetry.py │ │ │ │ │ └── trajectory_saver.py │ │ │ │ ├── cli.py │ │ │ │ ├── computers │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── cua.py │ │ │ │ │ └── custom.py │ │ │ │ ├── decorators.py │ │ │ │ ├── human_tool │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── __main__.py │ │ │ │ │ ├── server.py │ │ │ │ │ └── ui.py │ │ │ │ ├── integrations │ │ │ │ │ └── hud │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── agent.py │ │ │ │ │ └── proxy.py │ │ │ │ ├── loops │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── anthropic.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── composed_grounded.py │ │ │ │ │ ├── glm45v.py │ │ │ │ │ ├── gta1.py │ │ │ │ │ ├── holo.py │ │ │ │ │ ├── internvl.py │ │ │ │ │ ├── model_types.csv │ │ │ │ │ ├── moondream3.py │ │ │ │ │ ├── omniparser.py │ │ │ │ │ ├── openai.py │ │ │ │ │ ├── opencua.py │ │ │ │ │ └── uitars.py │ │ │ │ ├── proxy │ │ │ │ │ ├── examples.py │ │ │ │ │ └── handlers.py │ │ │ │ ├── responses.py │ │ │ │ ├── types.py │ │ │ │ └── ui │ │ │ │ ├── __init__.py │ │ │ │ ├── __main__.py │ │ │ │ └── gradio │ │ │ │ ├── __init__.py │ │ │ │ ├── app.py │ │ │ │ └── ui_components.py │ │ │ ├── benchmarks │ │ │ │ ├── .gitignore │ │ │ │ ├── contrib.md │ │ │ │ ├── interactive.py │ │ │ │ ├── models │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ └── gta1.py │ │ │ │ ├── README.md │ │ │ │ ├── ss-pro.py │ │ │ │ ├── ss-v2.py │ │ │ │ └── utils.py │ │ │ ├── example.py │ │ │ ├── pyproject.toml │ │ │ └── README.md │ │ ├── computer │ │ │ ├── computer │ │ │ │ ├── __init__.py │ │ │ │ ├── computer.py │ │ │ │ ├── diorama_computer.py │ │ │ │ ├── helpers.py │ │ │ │ ├── interface │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── factory.py │ │ │ │ │ ├── generic.py │ │ │ │ │ ├── linux.py │ │ │ │ │ ├── macos.py │ │ │ │ │ ├── models.py │ │ │ │ │ └── windows.py │ │ │ │ ├── logger.py │ │ │ │ ├── models.py │ │ │ │ ├── providers │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── cloud │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── provider.py │ │ │ │ │ ├── docker │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── provider.py │ │ │ │ │ ├── factory.py │ │ │ │ │ ├── lume │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── provider.py │ │ │ │ │ ├── lume_api.py │ │ │ │ │ ├── lumier │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── provider.py │ │ │ │ │ └── winsandbox │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── provider.py │ │ │ │ │ └── setup_script.ps1 │ │ │ │ ├── ui │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── __main__.py │ │ │ │ │ └── gradio │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── app.py │ │ │ │ └── utils.py │ │ │ ├── poetry.toml │ │ │ ├── pyproject.toml │ │ │ └── README.md │ │ ├── computer-server │ │ │ ├── computer_server │ │ │ │ ├── __init__.py │ │ │ │ ├── __main__.py │ │ │ │ ├── cli.py │ │ │ │ ├── diorama │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── diorama_computer.py │ │ │ │ │ ├── diorama.py │ │ │ │ │ ├── draw.py │ │ │ │ │ ├── macos.py │ │ │ │ │ └── safezone.py │ │ │ │ ├── handlers │ │ │ │ │ ├── base.py │ │ │ │ │ ├── factory.py │ │ │ │ │ ├── generic.py │ │ │ │ │ ├── linux.py │ │ │ │ │ ├── macos.py │ │ │ │ │ └── windows.py │ │ │ │ ├── main.py │ │ │ │ ├── server.py │ │ │ │ └── watchdog.py │ │ │ ├── examples │ │ │ │ ├── __init__.py │ │ │ │ └── usage_example.py │ │ │ ├── pyproject.toml │ │ │ ├── README.md │ │ │ ├── run_server.py │ │ │ └── test_connection.py │ │ ├── core │ │ │ ├── core │ │ │ │ ├── __init__.py │ │ │ │ └── telemetry │ │ │ │ ├── __init__.py │ │ │ │ └── posthog.py │ │ │ ├── poetry.toml │ │ │ ├── pyproject.toml │ │ │ └── README.md │ │ ├── mcp-server │ │ │ ├── mcp_server │ │ │ │ ├── __init__.py │ │ │ │ ├── __main__.py │ │ │ │ └── server.py │ │ │ ├── pyproject.toml │ │ │ ├── README.md │ │ │ └── scripts │ │ │ ├── install_mcp_server.sh │ │ │ └── start_mcp_server.sh │ │ ├── pylume │ │ │ ├── __init__.py │ │ │ ├── pylume │ │ │ │ ├── __init__.py │ │ │ │ ├── client.py │ │ │ │ ├── exceptions.py │ │ │ │ ├── lume │ │ │ │ ├── models.py │ │ │ │ ├── pylume.py │ │ │ │ └── server.py │ │ │ ├── pyproject.toml │ │ │ └── README.md │ │ └── som │ │ ├── LICENSE │ │ ├── poetry.toml │ │ ├── pyproject.toml │ │ ├── README.md │ │ ├── som │ │ │ ├── __init__.py │ │ │ ├── detect.py │ │ │ ├── detection.py │ │ │ ├── models.py │ │ │ ├── ocr.py │ │ │ ├── util │ │ │ │ └── utils.py │ │ │ └── visualization.py │ │ └── tests │ │ └── test_omniparser.py │ ├── typescript │ │ ├── .gitignore │ │ ├── .nvmrc │ │ ├── agent │ │ │ ├── examples │ │ │ │ ├── playground-example.html │ │ │ │ └── README.md │ │ │ ├── package.json │ │ │ ├── README.md │ │ │ ├── src │ │ │ │ ├── client.ts │ │ │ │ ├── index.ts │ │ │ │ └── types.ts │ │ │ ├── tests │ │ │ │ └── client.test.ts │ │ │ ├── tsconfig.json │ │ │ ├── tsdown.config.ts │ │ │ └── vitest.config.ts │ │ ├── biome.json │ │ ├── computer │ │ │ ├── .editorconfig │ │ │ ├── .gitattributes │ │ │ ├── .gitignore │ │ │ ├── LICENSE │ │ │ ├── package.json │ │ │ ├── README.md │ │ │ ├── src │ │ │ │ ├── computer │ │ │ │ │ ├── index.ts │ │ │ │ │ ├── providers │ │ │ │ │ │ ├── base.ts │ │ │ │ │ │ ├── cloud.ts │ │ │ │ │ │ └── index.ts │ │ │ │ │ └── types.ts │ │ │ │ ├── index.ts │ │ │ │ ├── interface │ │ │ │ │ ├── base.ts │ │ │ │ │ ├── factory.ts │ │ │ │ │ ├── index.ts │ │ │ │ │ ├── linux.ts │ │ │ │ │ ├── macos.ts │ │ │ │ │ └── windows.ts │ │ │ │ └── types.ts │ │ │ ├── tests │ │ │ │ ├── computer │ │ │ │ │ └── cloud.test.ts │ │ │ │ ├── interface │ │ │ │ │ ├── factory.test.ts │ │ │ │ │ ├── index.test.ts │ │ │ │ │ ├── linux.test.ts │ │ │ │ │ ├── macos.test.ts │ │ │ │ │ └── windows.test.ts │ │ │ │ └── setup.ts │ │ │ ├── tsconfig.json │ │ │ ├── tsdown.config.ts │ │ │ └── vitest.config.ts │ │ ├── core │ │ │ ├── .editorconfig │ │ │ ├── .gitattributes │ │ │ ├── .gitignore │ │ │ ├── LICENSE │ │ │ ├── package.json │ │ │ ├── README.md │ │ │ ├── src │ │ │ │ ├── index.ts │ │ │ │ └── telemetry │ │ │ │ ├── clients │ │ │ │ │ ├── index.ts │ │ │ │ │ └── posthog.ts │ │ │ │ └── index.ts │ │ │ ├── tests │ │ │ │ └── telemetry.test.ts │ │ │ ├── tsconfig.json │ │ │ ├── tsdown.config.ts │ │ │ └── vitest.config.ts │ │ ├── package.json │ │ ├── pnpm-lock.yaml │ │ ├── pnpm-workspace.yaml │ │ └── README.md │ └── xfce │ ├── .dockerignore │ ├── .gitignore │ ├── Dockerfile │ ├── README.md │ └── src │ ├── scripts │ │ ├── resize-display.sh │ │ ├── start-computer-server.sh │ │ ├── start-novnc.sh │ │ ├── start-vnc.sh │ │ └── xstartup.sh │ ├── supervisor │ │ └── supervisord.conf │ └── xfce-config │ ├── helpers.rc │ ├── xfce4-power-manager.xml │ └── xfce4-session.xml ├── LICENSE.md ├── notebooks │ ├── agent_nb.ipynb │ ├── blog │ │ ├── build-your-own-operator-on-macos-1.ipynb │ │ └── build-your-own-operator-on-macos-2.ipynb │ ├── composite_agents_docker_nb.ipynb │ ├── computer_nb.ipynb │ ├── computer_server_nb.ipynb │ ├── customizing_computeragent.ipynb │ ├── eval_osworld.ipynb │ ├── ollama_nb.ipynb │ ├── pylume_nb.ipynb │ ├── README.md │ ├── sota_hackathon_cloud.ipynb │ └── sota_hackathon.ipynb ├── pdm.lock ├── pyproject.toml ├── pyrightconfig.json ├── README.md ├── samples │ └── community │ ├── global-online │ │ └── README.md │ └── hack-the-north │ └── README.md ├── scripts │ ├── build-uv.sh │ ├── build.ps1 │ ├── build.sh │ ├── cleanup.sh │ ├── playground-docker.sh │ ├── playground.sh │ └── run-docker-dev.sh └── tests ├── pytest.ini ├── shell_cmd.py ├── test_files.py ├── test_shell_bash.py ├── test_telemetry.py ├── test_venv.py └── test_watchdog.py ``` # Files -------------------------------------------------------------------------------- /docs/content/docs/libraries/mcp-server/client-integrations.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Client Integrations --- ## Claude Desktop Integration To use with Claude Desktop, add an entry to your Claude Desktop configuration (`claude_desktop_config.json`, typically found in `~/.config/claude-desktop/`): For more information on MCP with Claude Desktop, see the [official MCP User Guide](https://modelcontextprotocol.io/quickstart/user). ## Cursor Integration To use with Cursor, add an MCP configuration file in one of these locations: - **Project-specific**: Create `.cursor/mcp.json` in your project directory - **Global**: Create `~/.cursor/mcp.json` in your home directory After configuration, you can simply tell Cursor's Agent to perform computer tasks by explicitly mentioning the CUA agent, such as "Use the computer control tools to open Safari." For more information on MCP with Cursor, see the [official Cursor MCP documentation](https://docs.cursor.com/context/model-context-protocol). ``` -------------------------------------------------------------------------------- /libs/typescript/computer/tests/interface/windows.test.ts: -------------------------------------------------------------------------------- ```typescript import { describe, expect, it } from 'vitest'; import { MacOSComputerInterface } from '../../src/interface/macos.ts'; import { WindowsComputerInterface } from '../../src/interface/windows.ts'; describe('WindowsComputerInterface', () => { const testParams = { ipAddress: '192.0.2.1', // TEST-NET-1 address (RFC 5737) - guaranteed not to be routable username: 'testuser', password: 'testpass', apiKey: 'test-api-key', vmName: 'test-vm', }; describe('Inheritance', () => { it('should extend MacOSComputerInterface', () => { const windowsInterface = new WindowsComputerInterface( testParams.ipAddress, testParams.username, testParams.password, testParams.apiKey, testParams.vmName ); expect(windowsInterface).toBeInstanceOf(MacOSComputerInterface); expect(windowsInterface).toBeInstanceOf(WindowsComputerInterface); }); }); }); ``` -------------------------------------------------------------------------------- /libs/lume/src/VM/VMFactory.swift: -------------------------------------------------------------------------------- ```swift import Foundation import Virtualization enum VMType: String { case darwin = "macOS" case linux = "linux" } protocol VMFactory { @MainActor func createVM( vmDirContext: VMDirContext, imageLoader: ImageLoader? ) throws -> VM } class DefaultVMFactory: VMFactory { @MainActor func createVM( vmDirContext: VMDirContext, imageLoader: ImageLoader? ) throws -> VM { let osType = vmDirContext.config.os.lowercased() switch osType { case "macos", "darwin": guard let imageLoader = imageLoader else { throw VMError.internalError("ImageLoader required for macOS VM") } return DarwinVM(vmDirContext: vmDirContext, imageLoader: imageLoader) case "linux": return LinuxVM(vmDirContext: vmDirContext) default: throw VMError.unsupportedOS(osType) } } } ``` -------------------------------------------------------------------------------- /libs/typescript/core/tests/telemetry.test.ts: -------------------------------------------------------------------------------- ```typescript import { beforeEach, describe, expect, it } from 'vitest'; import { Telemetry } from '../src/'; describe('Telemetry', () => { let telemetry: Telemetry; beforeEach(() => { process.env.CUA_TELEMETRY = ''; process.env.CUA_TELEMETRY_DISABLED = ''; telemetry = new Telemetry(); }); describe('telemetry.enabled', () => { it('should return false when CUA_TELEMETRY is off', () => { process.env.CUA_TELEMETRY = 'off'; telemetry = new Telemetry(); expect(telemetry.enabled).toBe(false); }); it('should return true when CUA_TELEMETRY is not set', () => { process.env.CUA_TELEMETRY = ''; telemetry = new Telemetry(); expect(telemetry.enabled).toBe(true); }); it('should return false if CUA_TELEMETRY_DISABLED is 1', () => { process.env.CUA_TELEMETRY_DISABLED = '1'; telemetry = new Telemetry(); expect(telemetry.enabled).toBe(false); }); }); }); ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/computer/index.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Computer description: Reference for the current version of the Computer library. pypi: cua-computer npm: '@trycua/computer' github: - https://github.com/trycua/cua/tree/main/libs/python/computer - https://github.com/trycua/cua/tree/main/libs/typescript/computer --- The Computer library provides a Computer class for controlling and automating containers running the Computer Server. ## Connecting to Computers See the [Cua Computers](../computer-sdk/computers) documentation for how to connect to different computer types (cloud, local, or host desktop). ## Computer Commands See the [Commands](../computer-sdk/commands) documentation for all supported commands and interface methods (Shell, Mouse, Keyboard, File System, etc.). ## Sandboxed Python Functions See the [Sandboxed Python](../computer-sdk/sandboxed-python) documentation for running Python functions securely in isolated environments on a remote Cua Computer. ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/benchmarks/index.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Benchmarks description: Computer Agent SDK benchmarks for agentic GUI tasks --- The benchmark system evaluates models on GUI grounding tasks, specifically agent loop success rate and click prediction accuracy. It supports both: - **Computer Agent SDK providers** (using model strings like `"huggingface-local/HelloKKMe/GTA1-7B"`) - **Reference agent implementations** (custom model classes implementing the `ModelProtocol`) ## Available Benchmarks - **[ScreenSpot-v2](./benchmarks/screenspot-v2)** - Standard resolution GUI grounding - **[ScreenSpot-Pro](./benchmarks/screenspot-pro)** - High-resolution GUI grounding - **[Interactive Testing](./benchmarks/interactive)** - Real-time testing and visualization ## Quick Start ```bash # Clone the benchmark repository git clone https://github.com/trycua/cua cd libs/python/agent/benchmarks # Install dependencies pip install "cua-agent[all]" # Run a benchmark python ss-v2.py ``` ``` -------------------------------------------------------------------------------- /libs/lume/src/Server/Responses.swift: -------------------------------------------------------------------------------- ```swift import Foundation struct APIError: Codable { let message: String } // Helper struct to encode mixed-type dictionaries struct AnyEncodable: Encodable { private let value: Encodable init(_ value: Encodable) { self.value = value } func encode(to encoder: Encoder) throws { try value.encode(to: encoder) } } extension HTTPResponse { static func json<T: Encodable>(_ value: T) throws -> HTTPResponse { let data = try JSONEncoder().encode(value) return HTTPResponse( statusCode: .ok, headers: ["Content-Type": "application/json"], body: data ) } static func badRequest(message: String) -> HTTPResponse { let error = APIError(message: message) return try! HTTPResponse( statusCode: .badRequest, headers: ["Content-Type": "application/json"], body: JSONEncoder().encode(error) ) } } ``` -------------------------------------------------------------------------------- /libs/python/agent/benchmarks/models/base.py: -------------------------------------------------------------------------------- ```python """ Base protocol for benchmark models. """ from typing import Protocol, Optional, Tuple from PIL import Image class ModelProtocol(Protocol): """Protocol for benchmark models that can predict click coordinates.""" @property def model_name(self) -> str: """Return the name of the model.""" ... async def load_model(self) -> None: """Load the model into memory.""" ... async def unload_model(self) -> None: """Unload the model from memory.""" ... async def predict_click(self, image: Image.Image, instruction: str) -> Optional[Tuple[int, int]]: """ Predict click coordinates for the given image and instruction. Args: image: PIL Image to analyze instruction: Text instruction describing what to click Returns: Tuple of (x, y) coordinates or None if prediction fails """ ... ``` -------------------------------------------------------------------------------- /libs/lume/src/Commands/List.swift: -------------------------------------------------------------------------------- ```swift import ArgumentParser import Foundation struct List: AsyncParsableCommand { static let configuration: CommandConfiguration = CommandConfiguration( commandName: "ls", abstract: "List virtual machines" ) @Option(name: [.long, .customShort("f")], help: "Output format (json|text)") var format: FormatOption = .text @Option(name: .long, help: "Filter by storage location name") var storage: String? init() { } @MainActor func run() async throws { let manager = LumeController() let vms = try manager.list(storage: self.storage) if vms.isEmpty && self.format == .text { if let storageName = self.storage { print("No virtual machines found in storage '\(storageName)'") } else { print("No virtual machines found") } } else { try VMDetailsPrinter.printStatus(vms, format: self.format) } } } ``` -------------------------------------------------------------------------------- /libs/lume/tests/Mocks/MockVNCService.swift: -------------------------------------------------------------------------------- ```swift import Foundation @testable import lume @MainActor final class MockVNCService: VNCService { private(set) var url: String? private(set) var isRunning = false private(set) var clientOpenCount = 0 private var _attachedVM: Any? private let vmDirectory: VMDirectory init(vmDirectory: VMDirectory) { self.vmDirectory = vmDirectory } nonisolated var attachedVM: String? { get async { await Task { @MainActor in _attachedVM as? String }.value } } func start(port: Int, virtualMachine: Any?) async throws { isRunning = true url = "vnc://localhost:\(port)" _attachedVM = virtualMachine } func stop() { isRunning = false url = nil _attachedVM = nil } func openClient(url: String) async throws { guard isRunning else { throw VMError.vncNotConfigured } clientOpenCount += 1 } } ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/lumier/docker-compose.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Docker Compose --- You can also use Docker Compose to run Lumier with a simple configuration file. Create a `docker-compose.yml` file with the following content: ```yaml version: '3' services: lumier: image: trycua/lumier:latest container_name: lumier-vm restart: unless-stopped ports: - "8006:8006" # Port for VNC access volumes: - ./storage:/storage # VM persistent storage - ./shared:/shared # Shared folder accessible in the VM environment: - VM_NAME=lumier-vm - VERSION=ghcr.io/trycua/macos-sequoia-cua:latest - CPU_CORES=4 - RAM_SIZE=8192 - HOST_STORAGE_PATH=${PWD}/storage - HOST_SHARED_PATH=${PWD}/shared stop_signal: SIGINT stop_grace_period: 2m ``` Then run Lumier using: ```bash # First create the required directories mkdir -p storage shared # Start the container docker-compose up -d # View the logs docker-compose logs -f # Stop the container when done docker-compose down ``` ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/supported-model-providers/local-models.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Running Models Locally --- You can run open-source LLMs and vision models on your own machine using cua, without relying on cloud APIs. This is ideal for development, privacy, or running on air-gapped systems. ## Hugging Face (transformers) Use the `huggingface-local/` prefix to run any Hugging Face model locally via the `transformers` library. This supports most text and vision models from the Hugging Face Hub. **Example:** ```python model = "huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B" ``` ## MLX (Apple Silicon) Use the `mlx/` prefix to run models using the `mlx-vlm` library, optimized for Apple Silicon (M1/M2/M3). This allows fast, local inference for many open-source models. **Example:** ```python model = "mlx/mlx-community/UI-TARS-1.5-7B-6bit" ``` ## Ollama Use the `ollama_chat/` prefix to run models using the `ollama` library. This allows fast, local inference for many open-source models. **Example:** ```python model = "omniparser+ollama_chat/llama3.2:latest" ``` ``` -------------------------------------------------------------------------------- /docs/package.json: -------------------------------------------------------------------------------- ```json { "name": "docs", "version": "0.0.0", "private": true, "scripts": { "build": "next build", "dev": "next dev --turbo -p 8090", "start": "next start -p 8090", "postinstall": "fumadocs-mdx" }, "dependencies": { "fumadocs-core": "15.5.1", "fumadocs-mdx": "11.6.7", "fumadocs-ui": "15.5.1", "lucide-react": "^0.525.0", "mermaid": "^11.8.1", "next": "15.3.3", "next-themes": "^0.4.6", "react": "^19.1.0", "react-dom": "^19.1.0", "remark": "^15.0.1", "remark-gfm": "^4.0.1", "remark-mdx": "^3.1.0", "tailwind-merge": "^3.3.1", "zod": "^3.25.76" }, "devDependencies": { "@tailwindcss/postcss": "^4.1.8", "@types/mdx": "^2.0.13", "@types/node": "22.15.28", "@types/react": "^19.1.6", "@types/react-dom": "^19.1.5", "postcss": "^8.5.4", "prettier": "^3.6.2", "tailwindcss": "^4.1.8", "typescript": "^5.8.3" }, "pnpm": { "onlyBuiltDependencies": [ "@tailwindcss/oxide", "esbuild", "sharp" ] } } ``` -------------------------------------------------------------------------------- /libs/lume/src/Main.swift: -------------------------------------------------------------------------------- ```swift import ArgumentParser import Foundation @main struct Lume: AsyncParsableCommand { static var configuration: CommandConfiguration { CommandConfiguration( commandName: "lume", abstract: "A lightweight CLI and local API server to build, run and manage macOS VMs.", version: Version.current, subcommands: CommandRegistry.allCommands, helpNames: .long ) } } // MARK: - Version Management extension Lume { enum Version { static let current: String = "0.1.0" } } // MARK: - Command Execution extension Lume { public static func main() async { do { try await executeCommand() } catch { exit(withError: error) } } private static func executeCommand() async throws { var command = try parseAsRoot() if var asyncCommand = command as? AsyncParsableCommand { try await asyncCommand.run() } else { try command.run() } } } ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/human_tool/__main__.py: -------------------------------------------------------------------------------- ```python #!/usr/bin/env python3 """ Human-in-the-Loop Completion Server and UI This module combines the FastAPI server for handling completion requests with a Gradio UI for human interaction. """ import gradio as gr from fastapi import FastAPI from .server import app as fastapi_app from .ui import create_ui # Create the Gradio demo gradio_demo = create_ui() # Mount Gradio on FastAPI CUSTOM_PATH = "/gradio" app = gr.mount_gradio_app(fastapi_app, gradio_demo, path=CUSTOM_PATH) # Add a redirect from root to Gradio UI @fastapi_app.get("/") async def redirect_to_ui(): """Redirect root to Gradio UI.""" return { "message": "Human Completion Server is running", "ui_url": "/gradio", "api_docs": "/docs" } if __name__ == "__main__": import uvicorn print("🚀 Starting Human-in-the-Loop Completion Server...") print("📊 API Server: http://localhost:8002") print("🎨 Gradio UI: http://localhost:8002/gradio") print("📚 API Docs: http://localhost:8002/docs") uvicorn.run(app, host="0.0.0.0", port=8002) ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/types.py: -------------------------------------------------------------------------------- ```python """ Type definitions for agent """ from typing import Dict, List, Any, Optional, Callable, Protocol, Literal from pydantic import BaseModel import re from litellm import ResponseInputParam, ResponsesAPIResponse, ToolParam from collections.abc import Iterable # Agent input types Messages = str | ResponseInputParam | List[Dict[str, Any]] Tools = Optional[Iterable[ToolParam]] # Agent output types AgentResponse = ResponsesAPIResponse AgentCapability = Literal["step", "click"] # Exception types class ToolError(RuntimeError): """Base exception for tool-related errors""" pass class IllegalArgumentError(ToolError): """Exception raised when function arguments are invalid""" pass # Agent config registration class AgentConfigInfo(BaseModel): """Information about a registered agent config""" agent_class: type models_regex: str priority: int = 0 def matches_model(self, model: str) -> bool: """Check if this agent config matches the given model""" return bool(re.match(self.models_regex, model)) ``` -------------------------------------------------------------------------------- /libs/python/pylume/pylume/exceptions.py: -------------------------------------------------------------------------------- ```python from typing import Optional class LumeError(Exception): """Base exception for all PyLume errors.""" pass class LumeServerError(LumeError): """Raised when there's an error with the PyLume server.""" def __init__(self, message: str, status_code: Optional[int] = None, response_text: Optional[str] = None): self.status_code = status_code self.response_text = response_text super().__init__(message) class LumeConnectionError(LumeError): """Raised when there's an error connecting to the PyLume server.""" pass class LumeTimeoutError(LumeError): """Raised when a request to the PyLume server times out.""" pass class LumeNotFoundError(LumeError): """Raised when a requested resource is not found.""" pass class LumeConfigError(LumeError): """Raised when there's an error with the configuration.""" pass class LumeVMError(LumeError): """Raised when there's an error with a VM operation.""" pass class LumeImageError(LumeError): """Raised when there's an error with an image operation.""" pass ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/custom-tools.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Custom Tools slug: custom-tools --- The Agent SDK supports defining custom Python tools that run securely in sandboxed environments on remote Cua Computers. This enables safe execution of user-defined functions, isolation of dependencies, and robust automation workflows. ## Custom Tools Define a custom tool for an agent: ```python def calculate(a: int, b: int) -> int: """Calculate the sum of two integers""" return a + b # Use with agent agent = ComputerAgent( model="anthropic/claude-3-5-sonnet-20241022", tools=[computer, calculate] ) ``` ## Sandboxed Tools Define a sandboxed tool: ```python from computer.helpers import sandboxed @sandboxed() def read_file(location: str) -> str: """Read contents of a file""" with open(location, 'r') as f: return f.read() ``` You can then register this as a tool for your agent: ```python from agent import ComputerAgent from computer import Computer computer = Computer(...) agent = ComputerAgent( model="anthropic/claude-3-5-sonnet-20241022", tools=[computer, read_file], ) ``` ``` -------------------------------------------------------------------------------- /libs/python/core/pyproject.toml: -------------------------------------------------------------------------------- ```toml [build-system] requires = ["pdm-backend"] build-backend = "pdm.backend" [project] name = "cua-core" version = "0.1.8" description = "Core functionality for Cua including telemetry and shared utilities" readme = "README.md" authors = [ { name = "TryCua", email = "[email protected]" } ] dependencies = [ "pydantic>=2.0.0", "httpx>=0.24.0", "posthog>=3.20.0" ] requires-python = ">=3.11" [tool.pdm] distribution = true [tool.pdm.build] includes = ["core/"] source-includes = ["tests/", "README.md", "LICENSE"] [tool.black] line-length = 100 target-version = ["py311"] [tool.ruff] line-length = 100 target-version = "py311" select = ["E", "F", "B", "I"] fix = true [tool.ruff.format] docstring-code-format = true [tool.mypy] strict = true python_version = "3.11" ignore_missing_imports = true disallow_untyped_defs = true check_untyped_defs = true warn_return_any = true show_error_codes = true warn_unused_ignores = false [tool.pytest.ini_options] asyncio_mode = "auto" testpaths = ["tests"] python_files = "test_*.py" [dependency-groups] dev = [ "pytest>=8.3.5", ] ``` -------------------------------------------------------------------------------- /libs/xfce/src/xfce-config/xfce4-power-manager.xml: -------------------------------------------------------------------------------- ``` <?xml version="1.0" encoding="UTF-8"?> <channel name="xfce4-power-manager" version="1.0"> <property name="xfce4-power-manager" type="empty"> <property name="dpms-enabled" type="bool" value="false"/> <property name="dpms-on-ac-sleep" type="uint" value="0"/> <property name="dpms-on-ac-off" type="uint" value="0"/> <property name="dpms-on-battery-sleep" type="uint" value="0"/> <property name="dpms-on-battery-off" type="uint" value="0"/> <property name="blank-on-ac" type="int" value="0"/> <property name="blank-on-battery" type="int" value="0"/> <property name="lock-screen-suspend-hibernate" type="bool" value="false"/> <property name="logind-handle-lid-switch" type="bool" value="false"/> <property name="brightness-on-ac" type="uint" value="9"/> <property name="brightness-on-battery" type="uint" value="9"/> <property name="inactivity-on-ac" type="uint" value="0"/> <property name="inactivity-on-battery" type="uint" value="0"/> <property name="inactivity-sleep-mode-on-battery" type="uint" value="1"/> </property> </channel> ``` -------------------------------------------------------------------------------- /libs/lume/src/Commands/Delete.swift: -------------------------------------------------------------------------------- ```swift import ArgumentParser import Foundation struct Delete: AsyncParsableCommand { static let configuration = CommandConfiguration( abstract: "Delete a virtual machine" ) @Argument(help: "Name of the virtual machine to delete", completion: .custom(completeVMName)) var name: String @Flag(name: .long, help: "Force deletion without confirmation") var force = false @Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location") var storage: String? init() {} @MainActor func run() async throws { if !force { print( "Are you sure you want to delete the virtual machine '\(name)'? [y/N] ", terminator: "") guard let response = readLine()?.lowercased(), response == "y" || response == "yes" else { print("Deletion cancelled") return } } let vmController = LumeController() try await vmController.delete(name: name, storage: storage) } } ``` -------------------------------------------------------------------------------- /libs/lume/src/Commands/Pull.swift: -------------------------------------------------------------------------------- ```swift import ArgumentParser import Foundation struct Pull: AsyncParsableCommand { static let configuration = CommandConfiguration( abstract: "Pull a macOS image from GitHub Container Registry" ) @Argument(help: "Image to pull (format: name:tag)") var image: String @Argument( help: "Name for the VM (defaults to image name without tag)", transform: { Optional($0) }) var name: String? @Option(help: "Github Container Registry to pull from. Defaults to ghcr.io") var registry: String = "ghcr.io" @Option(help: "Organization to pull from. Defaults to trycua") var organization: String = "trycua" @Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location") var storage: String? init() {} @MainActor func run() async throws { let controller = LumeController() try await controller.pullImage( image: image, name: name, registry: registry, organization: organization, storage: storage ) } } ``` -------------------------------------------------------------------------------- /libs/lume/src/Utils/Path.swift: -------------------------------------------------------------------------------- ```swift import ArgumentParser import Foundation struct Path: CustomStringConvertible, ExpressibleByArgument { let url: URL init(_ path: String) { url = URL(filePath: NSString(string: path).expandingTildeInPath).standardizedFileURL } init(_ url: URL) { self.url = url } init(argument: String) { self.init(argument) } func file(_ path: String) -> Path { return Path(url.appendingPathComponent(path, isDirectory: false)) } func directory(_ path: String) -> Path { return Path(url.appendingPathComponent(path, isDirectory: true)) } func exists() -> Bool { return FileManager.default.fileExists(atPath: url.standardizedFileURL.path(percentEncoded: false)) } func writable() -> Bool { return FileManager.default.isWritableFile(atPath: url.standardizedFileURL.path(percentEncoded: false)) } var name: String { return url.lastPathComponent } var path: String { return url.standardizedFileURL.path(percentEncoded: false) } var description: String { return url.path() } } ``` -------------------------------------------------------------------------------- /libs/typescript/agent/tests/client.test.ts: -------------------------------------------------------------------------------- ```typescript import { describe, it, expect } from 'vitest'; import AgentClient from '../src/index.js'; describe('AgentClient', () => { it('should create client with HTTP URL', () => { const client = new AgentClient('https://localhost:8000'); expect(client).toBeDefined(); expect(client.responses).toBeDefined(); expect(typeof client.responses.create).toBe('function'); }); it('should create client with peer URL', () => { const client = new AgentClient('peer://test-peer-id'); expect(client).toBeDefined(); expect(client.responses).toBeDefined(); expect(typeof client.responses.create).toBe('function'); }); it('should throw error for invalid URL', () => { expect(() => { new AgentClient('invalid://url'); }).toThrow('Invalid URL format'); }); it('should have health method', async () => { const client = new AgentClient('https://localhost:8000'); expect(typeof client.health).toBe('function'); }); it('should have disconnect method', async () => { const client = new AgentClient('https://localhost:8000'); expect(typeof client.disconnect).toBe('function'); }); }); ``` -------------------------------------------------------------------------------- /examples/docker_examples.py: -------------------------------------------------------------------------------- ```python import asyncio from computer.providers.factory import VMProviderFactory from computer import Computer, VMProviderType import os async def main(): # # Create docker provider # provider = VMProviderFactory.create_provider( # provider_type="docker", # image="cua-ubuntu:latest", # Your CUA Ubuntu image # port=8080, # vnc_port=6901 # ) # # Run a container # async with provider: # vm_info = await provider.run_vm( # image="cua-ubuntu:latest", # name="my-cua-container", # run_opts={ # "memory": "4GB", # "cpu": 2, # "vnc_port": 6901, # "api_port": 8080 # } # ) # print(vm_info) computer = Computer( os_type="linux", provider_type=VMProviderType.DOCKER, name="my-cua-container", image="cua-ubuntu:latest", ) await computer.run() screenshot = await computer.interface.screenshot() with open("screenshot_docker.png", "wb") as f: f.write(screenshot) if __name__ == "__main__": asyncio.run(main()) ``` -------------------------------------------------------------------------------- /libs/python/computer/computer/__init__.py: -------------------------------------------------------------------------------- ```python """CUA Computer Interface for cross-platform computer control.""" import logging import sys __version__ = "0.1.0" # Initialize logging logger = logging.getLogger("computer") # Initialize telemetry when the package is imported try: # Import from core telemetry from core.telemetry import ( is_telemetry_enabled, record_event, ) # Check if telemetry is enabled if is_telemetry_enabled(): logger.info("Telemetry is enabled") # Record package initialization record_event( "module_init", { "module": "computer", "version": __version__, "python_version": sys.version, }, ) else: logger.info("Telemetry is disabled") except ImportError as e: # Telemetry not available logger.warning(f"Telemetry not available: {e}") except Exception as e: # Other issues with telemetry logger.warning(f"Error initializing telemetry: {e}") # Core components from .computer import Computer # Provider components from .providers.base import VMProviderType __all__ = ["Computer", "VMProviderType"] ``` -------------------------------------------------------------------------------- /libs/python/pylume/pylume/__init__.py: -------------------------------------------------------------------------------- ```python """ PyLume Python SDK - A client library for managing macOS VMs with PyLume. Example: >>> from pylume import PyLume, VMConfig >>> client = PyLume() >>> config = VMConfig(name="my-vm", cpu=4, memory="8GB", disk_size="64GB") >>> client.create_vm(config) >>> client.run_vm("my-vm") """ # Import exceptions then all models from .exceptions import ( LumeConfigError, LumeConnectionError, LumeError, LumeImageError, LumeNotFoundError, LumeServerError, LumeTimeoutError, LumeVMError, ) from .models import ( CloneSpec, ImageInfo, ImageList, ImageRef, SharedDirectory, VMConfig, VMRunOpts, VMStatus, VMUpdateOpts, ) # Import main class last to avoid circular imports from .pylume import PyLume __version__ = "0.2.2" __all__ = [ "PyLume", "VMConfig", "VMStatus", "VMRunOpts", "VMUpdateOpts", "ImageRef", "CloneSpec", "SharedDirectory", "ImageList", "ImageInfo", "LumeError", "LumeServerError", "LumeConnectionError", "LumeTimeoutError", "LumeNotFoundError", "LumeConfigError", "LumeVMError", "LumeImageError", ] ``` -------------------------------------------------------------------------------- /docs/content/docs/index.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Home icon: House --- import { Monitor, Code, BookOpen } from 'lucide-react'; # Welcome! Cua is a framework for automating Windows, Mac, and Linux apps powered by computer-using agents (CUAs). Cua makes every stage of computer-using agent development simple: - **Development**: Use any LLM provider with liteLLM. The agent SDK makes multiple agent loop providers, trajectory tracing, caching, and budget management easy - **Containerization**: cua offers Docker containers pre-installed with everything needed for AI-powered RPA - **Deployment**: cua cloud gives you a production-ready cloud environment for your assistants <div className="grid grid-cols-1 md:grid-cols-2 gap-6 mt-8"> <Card icon={<Monitor />} href="/quickstart-ui" title="Quickstart (UI)"> Try the cua Agent UI in your browser—no coding required. </Card> <Card icon={<Code />} href="/quickstart-devs" title="Quickstart (Developers)"> Build with Python—full SDK and agent code examples. </Card> </div> <div className="grid grid-cols-1 gap-6 mt-6"> <Card icon={<BookOpen />} href="/libraries/agent" title="API Reference"> Explore the agent SDK and APIs </Card> </div> ``` -------------------------------------------------------------------------------- /libs/python/computer/computer/models.py: -------------------------------------------------------------------------------- ```python """Models for computer configuration.""" from dataclasses import dataclass from typing import Optional, Any, Dict # Import base provider interface from .providers.base import BaseVMProvider @dataclass class Display: """Display configuration.""" width: int height: int @dataclass class Image: """VM image configuration.""" image: str tag: str name: str @dataclass class Computer: """Computer configuration.""" image: str tag: str name: str display: Display memory: str cpu: str vm_provider: Optional[BaseVMProvider] = None # @property # Remove the property decorator async def get_ip(self) -> Optional[str]: """Get the IP address of the VM.""" if not self.vm_provider: return None vm = await self.vm_provider.get_vm(self.name) # Handle both object attribute and dictionary access for ip_address if vm: if isinstance(vm, dict): return vm.get("ip_address") else: # Access as attribute for object-based return values return getattr(vm, "ip_address", None) return None ``` -------------------------------------------------------------------------------- /docs/src/assets/discord-black.svg: -------------------------------------------------------------------------------- ``` <?xml version="1.0" encoding="UTF-8"?><svg id="Discord-Logo" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 126.644 96"><path fill="currentColor" id="Discord-Symbol-Black" d="M81.15,0c-1.2376,2.1973-2.3489,4.4704-3.3591,6.794-9.5975-1.4396-19.3718-1.4396-28.9945,0-.985-2.3236-2.1216-4.5967-3.3591-6.794-9.0166,1.5407-17.8059,4.2431-26.1405,8.0568C2.779,32.5304-1.6914,56.3725.5312,79.8863c9.6732,7.1476,20.5083,12.603,32.0505,16.0884,2.6014-3.4854,4.8998-7.1981,6.8698-11.0623-3.738-1.3891-7.3497-3.1318-10.8098-5.1523.9092-.6567,1.7932-1.3386,2.6519-1.9953,20.281,9.547,43.7696,9.547,64.0758,0,.8587.7072,1.7427,1.3891,2.6519,1.9953-3.4601,2.0457-7.0718,3.7632-10.835,5.1776,1.97,3.8642,4.2683,7.5769,6.8698,11.0623,11.5419-3.4854,22.3769-8.9156,32.0509-16.0631,2.626-27.2771-4.496-50.9172-18.817-71.8548C98.9811,4.2684,90.1918,1.5659,81.1752.0505l-.0252-.0505ZM42.2802,65.4144c-6.2383,0-11.4159-5.6575-11.4159-12.6535s4.9755-12.6788,11.3907-12.6788,11.5169,5.708,11.4159,12.6788c-.101,6.9708-5.026,12.6535-11.3907,12.6535ZM84.3576,65.4144c-6.2637,0-11.3907-5.6575-11.3907-12.6535s4.9755-12.6788,11.3907-12.6788,11.4917,5.708,11.3906,12.6788c-.101,6.9708-5.026,12.6535-11.3906,12.6535Z"/></svg> ``` -------------------------------------------------------------------------------- /libs/lume/tests/Mocks/MockVM.swift: -------------------------------------------------------------------------------- ```swift import Foundation @testable import lume @MainActor class MockVM: VM { private var mockIsRunning = false override func getOSType() -> String { return "mock-os" } override func setup( ipswPath: String, cpuCount: Int, memorySize: UInt64, diskSize: UInt64, display: String ) async throws { // Mock setup implementation vmDirContext.config.setCpuCount(cpuCount) vmDirContext.config.setMemorySize(memorySize) vmDirContext.config.setDiskSize(diskSize) vmDirContext.config.setMacAddress("00:11:22:33:44:55") try vmDirContext.saveConfig() } override func run( noDisplay: Bool, sharedDirectories: [SharedDirectory], mount: Path?, vncPort: Int = 0, recoveryMode: Bool = false, usbMassStoragePaths: [Path]? = nil ) async throws { mockIsRunning = true try await super.run( noDisplay: noDisplay, sharedDirectories: sharedDirectories, mount: mount, vncPort: vncPort, recoveryMode: recoveryMode, usbMassStoragePaths: usbMassStoragePaths ) } override func stop() async throws { mockIsRunning = false try await super.stop() } } ``` -------------------------------------------------------------------------------- /libs/lume/src/Commands/Set.swift: -------------------------------------------------------------------------------- ```swift import ArgumentParser import Foundation struct Set: AsyncParsableCommand { static let configuration = CommandConfiguration( abstract: "Set new values for CPU, memory, and disk size of a virtual machine" ) @Argument(help: "Name of the virtual machine", completion: .custom(completeVMName)) var name: String @Option(help: "New number of CPU cores") var cpu: Int? @Option(help: "New memory size, e.g., 8192MB or 8GB.", transform: { try parseSize($0) }) var memory: UInt64? @Option(help: "New disk size, e.g., 20480MB or 20GB.", transform: { try parseSize($0) }) var diskSize: UInt64? @Option(help: "New display resolution in format WIDTHxHEIGHT.") var display: VMDisplayResolution? @Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location") var storage: String? init() { } @MainActor func run() async throws { let vmController = LumeController() try vmController.updateSettings( name: name, cpu: cpu, memory: memory, diskSize: diskSize, display: display?.string, storage: storage ) } } ``` -------------------------------------------------------------------------------- /libs/lume/src/ContainerRegistry/ImagesPrinter.swift: -------------------------------------------------------------------------------- ```swift import Foundation struct ImagesPrinter { private struct Column: Sendable { let header: String let width: Int let getValue: @Sendable (String) -> String } private static let columns: [Column] = [ Column(header: "name", width: 28) { $0.split(separator: ":").first.map(String.init) ?? $0 }, Column(header: "image_id", width: 16) { $0.split(separator: ":").last.map(String.init) ?? "-" } ] static func print(images: [String]) { if images.isEmpty { Swift.print("No images found") return } printHeader() images.sorted().forEach(printImage) } private static func printHeader() { let paddedHeaders = columns.map { $0.header.paddedToWidth($0.width) } Swift.print(paddedHeaders.joined()) } private static func printImage(_ image: String) { let paddedColumns = columns.map { column in column.getValue(image).paddedToWidth(column.width) } Swift.print(paddedColumns.joined()) } } private extension String { func paddedToWidth(_ width: Int) -> String { padding(toLength: width, withPad: " ", startingAt: 0) } } ``` -------------------------------------------------------------------------------- /docs/src/assets/discord-white.svg: -------------------------------------------------------------------------------- ``` <?xml version="1.0" encoding="UTF-8"?><svg id="Discord-Logo" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 126.644 96"><defs><style>.cls-1{fill:#fff;}</style></defs><path id="Discord-Symbol-White" class="cls-1" d="M81.15,0c-1.2376,2.1973-2.3489,4.4704-3.3591,6.794-9.5975-1.4396-19.3718-1.4396-28.9945,0-.985-2.3236-2.1216-4.5967-3.3591-6.794-9.0166,1.5407-17.8059,4.2431-26.1405,8.0568C2.779,32.5304-1.6914,56.3725.5312,79.8863c9.6732,7.1476,20.5083,12.603,32.0505,16.0884,2.6014-3.4854,4.8998-7.1981,6.8698-11.0623-3.738-1.3891-7.3497-3.1318-10.8098-5.1523.9092-.6567,1.7932-1.3386,2.6519-1.9953,20.281,9.547,43.7696,9.547,64.0758,0,.8587.7072,1.7427,1.3891,2.6519,1.9953-3.4601,2.0457-7.0718,3.7632-10.835,5.1776,1.97,3.8642,4.2683,7.5769,6.8698,11.0623,11.5419-3.4854,22.3769-8.9156,32.0509-16.0631,2.626-27.2771-4.496-50.9172-18.817-71.8548C98.9811,4.2684,90.1918,1.5659,81.1752.0505l-.0252-.0505ZM42.2802,65.4144c-6.2383,0-11.4159-5.6575-11.4159-12.6535s4.9755-12.6788,11.3907-12.6788,11.5169,5.708,11.4159,12.6788c-.101,6.9708-5.026,12.6535-11.3907,12.6535ZM84.3576,65.4144c-6.2637,0-11.3907-5.6575-11.3907-12.6535s4.9755-12.6788,11.3907-12.6788,11.4917,5.708,11.3906,12.6788c-.101,6.9708-5.026,12.6535-11.3906,12.6535Z"/></svg> ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/lumier/building-lumier.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Building Lumier --- If you want to customize the Lumier container or build it from source, you can follow these steps: ```bash # 1. Navigate to the Lumier directory cd libs/lumier # 2. Build the Docker image locally docker build -t lumier-custom:latest . # 3. Run your custom build docker run -it --rm \ --name lumier-vm \ -p 8006:8006 \ -e VM_NAME=lumier-vm \ -e VERSION=ghcr.io/trycua/macos-sequoia-cua:latest \ -e CPU_CORES=4 \ -e RAM_SIZE=8192 \ lumier-custom:latest ``` ### Customization Options The Dockerfile provides several customization points: 1. **Base image**: The container uses Debian Bullseye Slim as the base. You can modify this if needed. 2. **Installed packages**: You can add or remove packages in the apt-get install list. 3. **Hooks**: Check the `/run/hooks/` directory for scripts that run at specific points during VM lifecycle. 4. **Configuration**: Review `/run/config/constants.sh` for default settings. After making your modifications, you can build and push your custom image to your own Docker Hub repository: ```bash # Build with a custom tag docker build -t yourusername/lumier:custom . # Push to Docker Hub (after docker login) docker push yourusername/lumier:custom ``` ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/__init__.py: -------------------------------------------------------------------------------- ```python """ agent - Decorator-based Computer Use Agent with liteLLM integration """ import logging import sys from .decorators import register_agent from .agent import ComputerAgent from .types import Messages, AgentResponse # Import loops to register them from . import loops __all__ = [ "register_agent", "ComputerAgent", "Messages", "AgentResponse" ] __version__ = "0.4.0" logger = logging.getLogger(__name__) # Initialize telemetry when the package is imported try: # Import from core telemetry for basic functions from core.telemetry import ( is_telemetry_enabled, record_event, ) # Check if telemetry is enabled if is_telemetry_enabled(): logger.info("Telemetry is enabled") # Record package initialization record_event( "module_init", { "module": "agent", "version": __version__, "python_version": sys.version, }, ) else: logger.info("Telemetry is disabled") except ImportError as e: # Telemetry not available logger.warning(f"Telemetry not available: {e}") except Exception as e: # Other issues with telemetry logger.warning(f"Error initializing telemetry: {e}") ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/lumier/index.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Lumier description: Reference for the current version of the Lumier library. github: - https://github.com/trycua/cua/tree/main/libs/lumier --- **Lumier** is an interface for running macOS virtual machines with minimal setup. It uses Docker as a packaging system to deliver a pre-configured environment that connects to the `lume` virtualization service running on your host machine. With Lumier, you get: - A ready-to-use macOS or Linux virtual machine in minutes - Browser-based VNC access to your VM - Easy file sharing between your host and VM - Simple configuration through environment variables ## How It Works <Callout title="Note"> We're using Docker primarily as a convenient delivery mechanism, not as an isolation layer. Unlike traditional Docker containers, Lumier leverages the Apple Virtualization Framework (Apple Vz) through the `lume` CLI to create true virtual machines. </Callout> Here's what's happening behind the scenes: 1. The Docker container provides a consistent environment to run the Lumier interface 2. Lumier connects to the Lume service running on your host Mac 3. Lume uses Apple's Virtualization Framework to create a true macOS virtual machine 4. The VM runs with hardware acceleration using your Mac's native virtualization capabilities ``` -------------------------------------------------------------------------------- /libs/typescript/agent/package.json: -------------------------------------------------------------------------------- ```json { "name": "@trycua/agent", "version": "0.1.0", "packageManager": "[email protected]", "description": "TypeScript SDK for CUA agent interaction", "type": "module", "license": "MIT", "homepage": "https://github.com/trycua/cua/tree/main/libs/typescript/agent", "bugs": { "url": "https://github.com/trycua/cua/issues" }, "repository": { "type": "git", "url": "git+https://github.com/trycua/cua.git" }, "author": "cua", "files": [ "dist" ], "main": "./dist/index.js", "module": "./dist/index.js", "types": "./dist/index.d.ts", "exports": { ".": "./dist/index.js", "./package.json": "./package.json" }, "publishConfig": { "access": "public" }, "scripts": { "lint": "biome lint .", "lint:fix": "biome lint --fix .", "build": "tsdown", "dev": "tsdown --watch", "test": "vitest", "typecheck": "tsc --noEmit", "release": "bumpp && pnpm publish", "prepublishOnly": "pnpm run build" }, "dependencies": { "@trycua/core": "^0.1.2", "peerjs": "^1.5.4", "pino": "^9.7.0" }, "devDependencies": { "@biomejs/biome": "^1.9.4", "@types/node": "^22.15.17", "bumpp": "^10.1.0", "happy-dom": "^17.4.7", "tsdown": "^0.14.1", "typescript": "^5.7.2", "vitest": "^2.1.8" } } ``` -------------------------------------------------------------------------------- /libs/typescript/computer/tests/interface/index.test.ts: -------------------------------------------------------------------------------- ```typescript import { describe, expect, it } from 'vitest'; import * as InterfaceExports from '../../src/interface/index.ts'; describe('Interface Module Exports', () => { it('should export InterfaceFactory', () => { expect(InterfaceExports.InterfaceFactory).toBeDefined(); expect( InterfaceExports.InterfaceFactory.createInterfaceForOS ).toBeDefined(); }); it('should export BaseComputerInterface', () => { expect(InterfaceExports.BaseComputerInterface).toBeDefined(); }); it('should export MacOSComputerInterface', () => { expect(InterfaceExports.MacOSComputerInterface).toBeDefined(); }); it('should export LinuxComputerInterface', () => { expect(InterfaceExports.LinuxComputerInterface).toBeDefined(); }); it('should export WindowsComputerInterface', () => { expect(InterfaceExports.WindowsComputerInterface).toBeDefined(); }); it('should export all expected interfaces', () => { const expectedExports = [ 'InterfaceFactory', 'BaseComputerInterface', 'MacOSComputerInterface', 'LinuxComputerInterface', 'WindowsComputerInterface', ]; const actualExports = Object.keys(InterfaceExports); for (const exportName of expectedExports) { expect(actualExports).toContain(exportName); } }); }); ``` -------------------------------------------------------------------------------- /libs/lume/Package.swift: -------------------------------------------------------------------------------- ```swift // swift-tools-version: 6.0 // The swift-tools-version declares the minimum version of Swift required to build this package. import PackageDescription let package = Package( name: "lume", platforms: [ .macOS(.v14) ], dependencies: [ .package(url: "https://github.com/apple/swift-argument-parser", from: "1.3.1"), .package(url: "https://github.com/apple/swift-format.git", branch: ("release/5.10")), .package(url: "https://github.com/apple/swift-atomics.git", .upToNextMajor(from: "1.2.0")), .package(url: "https://github.com/mhdhejazi/Dynamic", branch: "master") ], targets: [ // Targets are the basic building blocks of a package, defining a module or a test suite. // Targets can depend on other targets in this package and products from dependencies. .executableTarget( name: "lume", dependencies: [ .product(name: "ArgumentParser", package: "swift-argument-parser"), .product(name: "Atomics", package: "swift-atomics"), .product(name: "Dynamic", package: "Dynamic") ], path: "src"), .testTarget( name: "lumeTests", dependencies: [ "lume" ], path: "tests") ] ) ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/mcp-server/installation.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Installation --- Install the package from PyPI: ```bash pip install cua-mcp-server ``` This will install: - The MCP server - CUA agent and computer dependencies - An executable `cua-mcp-server` script in your PATH ## Easy Setup Script If you want to simplify installation, you can use this one-liner to download and run the installation script: ```bash curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/python/mcp-server/scripts/install_mcp_server.sh | bash ``` This script will: - Create the ~/.cua directory if it doesn't exist - Generate a startup script at ~/.cua/start_mcp_server.sh - Make the script executable - The startup script automatically manages Python virtual environments and installs/updates the cua-mcp-server package You can then use the script in your MCP configuration like this: ```json { "mcpServers": { "cua-agent": { "command": "/bin/bash", "args": ["~/.cua/start_mcp_server.sh"], "env": { "CUA_MODEL_NAME": "anthropic/claude-3-5-sonnet-20241022" } } } } ``` ### Troubleshooting If you get a `/bin/bash: ~/cua/libs/python/mcp-server/scripts/start_mcp_server.sh: No such file or directory` error, try changing the path to the script to be absolute instead of relative. To see the logs: ``` tail -n 20 -f ~/Library/Logs/Claude/mcp*.log ``` ``` -------------------------------------------------------------------------------- /.github/workflows/npm-publish-core.yml: -------------------------------------------------------------------------------- ```yaml name: Publish @trycua/core to npm on: push: branches: main jobs: publish: permissions: id-token: write contents: read runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Use Node.js 24.x uses: actions/setup-node@v4 with: node-version: "24.x" registry-url: "https://registry.npmjs.org" - name: Setup pnpm 10 uses: pnpm/action-setup@v4 with: version: 10 - name: Check if version changed id: check-version uses: EndBug/version-check@v2 with: file-name: libs/typescript/core/package.json diff-search: true - name: Install dependencies if: steps.check-version.outputs.changed == 'true' working-directory: ./libs/typescript/core run: pnpm install --frozen-lockfile - name: Build package if: steps.check-version.outputs.changed == 'true' working-directory: ./libs/typescript/core run: pnpm run build --if-present - name: Publish to npm if: steps.check-version.outputs.changed == 'true' working-directory: ./libs/typescript/core run: pnpm publish --access public --no-git-checks env: NPM_CONFIG_PROVENANCE: true NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} ``` -------------------------------------------------------------------------------- /docs/src/components/mermaid.tsx: -------------------------------------------------------------------------------- ```typescript 'use client'; import { useEffect, useId, useRef, useState } from 'react'; import { useTheme } from 'next-themes'; export function Mermaid({ chart }: { chart: string }) { const id = useId(); const [svg, setSvg] = useState(''); const containerRef = useRef<HTMLDivElement>(null); const currentChartRef = useRef<string>(null); const { resolvedTheme } = useTheme(); useEffect(() => { if (currentChartRef.current === chart || !containerRef.current) return; const container = containerRef.current; currentChartRef.current = chart; async function renderChart() { const { default: mermaid } = await import('mermaid'); try { // configure mermaid mermaid.initialize({ startOnLoad: false, securityLevel: 'loose', fontFamily: 'inherit', themeCSS: 'margin: 1.5rem auto 0;', theme: resolvedTheme === 'dark' ? 'dark' : 'default', }); const { svg, bindFunctions } = await mermaid.render( id, chart.replaceAll('\\n', '\n'), ); bindFunctions?.(container); setSvg(svg); } catch (error) { console.error('Error while rendering mermaid', error); } } void renderChart(); }, [chart, id, resolvedTheme]); return <div ref={containerRef} dangerouslySetInnerHTML={{ __html: svg }} />; } ``` -------------------------------------------------------------------------------- /.github/workflows/npm-publish-computer.yml: -------------------------------------------------------------------------------- ```yaml name: Publish @trycua/computer to npm on: push: branches: main jobs: publish: permissions: id-token: write contents: read runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Use Node.js 24.x uses: actions/setup-node@v4 with: node-version: "24.x" registry-url: "https://registry.npmjs.org" - name: Setup pnpm 10 uses: pnpm/action-setup@v4 with: version: 10 - name: Check if version changed id: check-version uses: EndBug/version-check@v2 with: file-name: libs/typescript/computer/package.json diff-search: true - name: Install dependencies if: steps.check-version.outputs.changed == 'true' working-directory: ./libs/typescript/computer run: pnpm install --frozen-lockfile - name: Build package if: steps.check-version.outputs.changed == 'true' working-directory: ./libs/typescript/computer run: pnpm run build --if-present - name: Publish to npm if: steps.check-version.outputs.changed == 'true' working-directory: ./libs/typescript/computer run: pnpm publish --access public --no-git-checks env: NPM_CONFIG_PROVENANCE: true NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} ``` -------------------------------------------------------------------------------- /libs/typescript/computer/package.json: -------------------------------------------------------------------------------- ```json { "name": "@trycua/computer", "version": "0.1.3", "packageManager": "[email protected]", "description": "Typescript SDK for cua computer interaction", "type": "module", "license": "MIT", "homepage": "https://github.com/trycua/cua/tree/feature/computer/typescript/libs/typescript/computer", "bugs": { "url": "https://github.com/trycua/cua/issues" }, "repository": { "type": "git", "url": "git+https://github.com/trycua/cua.git" }, "author": "cua", "files": [ "dist" ], "main": "./dist/index.js", "module": "./dist/index.js", "types": "./dist/index.d.ts", "exports": { ".": "./dist/index.js", "./package.json": "./package.json" }, "publishConfig": { "access": "public" }, "scripts": { "lint": "biome lint .", "lint:fix": "biome lint --fix .", "build": "tsdown", "dev": "tsdown --watch", "test": "vitest", "typecheck": "tsc --noEmit", "release": "bumpp && pnpm publish", "prepublishOnly": "pnpm run build" }, "dependencies": { "@trycua/core": "^0.1.2", "pino": "^9.7.0", "ws": "^8.18.0" }, "devDependencies": { "@biomejs/biome": "^1.9.4", "@types/node": "^22.15.17", "@types/ws": "^8.18.1", "bumpp": "^10.1.0", "happy-dom": "^17.4.7", "tsdown": "^0.11.9", "tsx": "^4.19.4", "typescript": "^5.8.3", "vitest": "^3.1.3" } } ``` -------------------------------------------------------------------------------- /libs/python/computer/pyproject.toml: -------------------------------------------------------------------------------- ```toml [build-system] requires = ["pdm-backend"] build-backend = "pdm.backend" [project] name = "cua-computer" version = "0.4.8" description = "Computer-Use Interface (CUI) framework powering Cua" readme = "README.md" authors = [ { name = "TryCua", email = "[email protected]" } ] dependencies = [ "pillow>=10.0.0", "websocket-client>=1.8.0", "websockets>=12.0", "aiohttp>=3.9.0", "cua-core>=0.1.0,<0.2.0", "pydantic>=2.11.1" ] requires-python = ">=3.11" [project.optional-dependencies] lume = [ ] lumier = [ ] ui = [ "gradio>=5.23.3", "python-dotenv>=1.0.1", "datasets>=3.6.0", ] all = [ # Include all optional dependencies "gradio>=5.23.3", "python-dotenv>=1.0.1", "datasets>=3.6.0", ] [tool.pdm] distribution = true [tool.pdm.build] includes = ["computer/"] source-includes = ["tests/", "README.md", "LICENSE"] [tool.black] line-length = 100 target-version = ["py311"] [tool.ruff] line-length = 100 target-version = "py311" select = ["E", "F", "B", "I"] fix = true [tool.ruff.format] docstring-code-format = true [tool.mypy] strict = true python_version = "3.11" ignore_missing_imports = true disallow_untyped_defs = true check_untyped_defs = true warn_return_any = true show_error_codes = true warn_unused_ignores = false [tool.pytest.ini_options] asyncio_mode = "auto" testpaths = ["tests"] python_files = "test_*.py" ``` -------------------------------------------------------------------------------- /libs/typescript/core/package.json: -------------------------------------------------------------------------------- ```json { "name": "@trycua/core", "version": "0.1.3", "packageManager": "[email protected]", "description": "Typescript SDK for cua core.", "type": "module", "license": "MIT", "homepage": "https://github.com/trycua/cua/tree/feature/computer/typescript/libs/typescript/computer", "bugs": { "url": "https://github.com/trycua/cua/issues" }, "repository": { "type": "git", "url": "git+https://github.com/trycua/cua.git" }, "author": "cua", "files": [ "dist" ], "main": "./dist/index.js", "module": "./dist/index.js", "types": "./dist/index.d.ts", "exports": { ".": "./dist/index.js", "./package.json": "./package.json" }, "publishConfig": { "access": "public" }, "scripts": { "lint": "biome lint .", "lint:fix": "biome lint --fix .", "build": "tsdown", "dev": "tsdown --watch", "test": "vitest", "typecheck": "tsc --noEmit", "release": "bumpp && pnpm publish", "prepublishOnly": "pnpm run build" }, "dependencies": { "@types/uuid": "^10.0.0", "pino": "^9.7.0", "posthog-node": "^5.1.1", "uuid": "^11.1.0" }, "devDependencies": { "@biomejs/biome": "^1.9.4", "@types/node": "^22.15.17", "@types/ws": "^8.18.1", "bumpp": "^10.1.0", "happy-dom": "^17.4.7", "tsdown": "^0.11.9", "tsx": "^4.19.4", "typescript": "^5.8.3", "vitest": "^3.1.3" } } ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/adapters/models/__init__.py: -------------------------------------------------------------------------------- ```python from typing import Optional try: from transformers import AutoConfig HF_AVAILABLE = True except ImportError: HF_AVAILABLE = False from .generic import GenericHFModel from .opencua import OpenCUAModel from .qwen2_5_vl import Qwen2_5_VLModel from .internvl import InternVLModel def load_model(model_name: str, device: str = "auto", trust_remote_code: bool = False): """Factory function to load and return the right model handler instance. - If the underlying transformers config class matches OpenCUA, return OpenCUAModel - Otherwise, return GenericHFModel """ if not HF_AVAILABLE: raise ImportError( "HuggingFace transformers dependencies not found. Install with: pip install \"cua-agent[uitars-hf]\"" ) cfg = AutoConfig.from_pretrained(model_name, trust_remote_code=trust_remote_code) cls = cfg.__class__.__name__ print(f"cls: {cls}") if "OpenCUA" in cls: return OpenCUAModel(model_name=model_name, device=device, trust_remote_code=trust_remote_code) elif "Qwen2_5_VL" in cls: return Qwen2_5_VLModel(model_name=model_name, device=device, trust_remote_code=trust_remote_code) elif "InternVL" in cls: return InternVLModel(model_name=model_name, device=device, trust_remote_code=trust_remote_code) return GenericHFModel(model_name=model_name, device=device, trust_remote_code=trust_remote_code) ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/callbacks/trajectories.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Trajectories description: Recording and viewing agent conversation trajectories --- # Trajectory Saving Callback The TrajectorySaverCallback records complete agent conversations including messages, actions, and screenshots for debugging and analysis. ## Callbacks Example ```python from agent.callbacks import TrajectorySaverCallback agent = ComputerAgent( model="anthropic/claude-3-5-sonnet-20241022", tools=[computer], callbacks=[ TrajectorySaverCallback( trajectory_dir="my_trajectories", save_screenshots=True ) ] ) ``` ## Shorthand ```python agent = ComputerAgent( model="anthropic/claude-3-5-sonnet-20241022", trajectory_dir="trajectories", # Auto-save trajectories tools=[computer] ) ``` ## View Trajectories Online View trajectories in the browser at: **[trycua.com/trajectory-viewer](http://trycua.com/trajectory-viewer)** The viewer provides: - Interactive conversation replay - Screenshot galleries - No data collection ## Trajectory Structure Trajectories are saved with: - Complete conversation history - Usage statistics and costs - Timestamps and metadata - Screenshots and computer actions Each trajectory contains: - **metadata.json**: Run info, timestamps, usage stats (`total_tokens`, `response_cost`) - **turn_000/**: Turn-by-turn conversation history (api calls, responses, computer calls, screenshots) ``` -------------------------------------------------------------------------------- /examples/utils.py: -------------------------------------------------------------------------------- ```python """Utility functions for example scripts.""" import os import sys import signal from pathlib import Path from typing import Optional def load_env_file(path: Path) -> bool: """Load environment variables from a file. Args: path: Path to the .env file Returns: True if file was loaded successfully, False otherwise """ if not path.exists(): return False print(f"Loading environment from {path}") with open(path, "r") as f: for line in f: line = line.strip() if not line or line.startswith("#"): continue key, value = line.split("=", 1) os.environ[key] = value return True def load_dotenv_files(): """Load environment variables from .env files. Tries to load from .env.local first, then .env if .env.local doesn't exist. """ # Get the project root directory (parent of the examples directory) project_root = Path(__file__).parent.parent # Try loading .env.local first, then .env if .env.local doesn't exist env_local_path = project_root / ".env.local" env_path = project_root / ".env" # Load .env.local if it exists, otherwise try .env if not load_env_file(env_local_path): load_env_file(env_path) def handle_sigint(signum, frame): """Handle SIGINT (Ctrl+C) gracefully.""" print("\nExiting gracefully...") sys.exit(0) ``` -------------------------------------------------------------------------------- /libs/lume/Development.md: -------------------------------------------------------------------------------- ```markdown # Development Guide This guide will help you set up your development environment and understand the process for contributing code to lume. ## Environment Setup Lume development requires: - Swift 6 or higher - Xcode 15 or higher - macOS Sequoia 15.2 or higher - (Optional) VS Code with Swift extension If you're working on Lume in the context of the Cua monorepo, we recommend using the dedicated VS Code workspace configuration: ```bash # Open VS Code workspace from the root of the monorepo code .vscode/lume.code-workspace ``` This workspace is preconfigured with Swift language support, build tasks, and debug configurations. ## Setting Up the Repository Locally 1. **Fork the Repository**: Create your own fork of lume 2. **Clone the Repository**: ```bash git clone https://github.com/trycua/lume.git cd lume ``` 3. **Install Dependencies**: ```bash swift package resolve ``` 4. **Build the Project**: ```bash swift build ``` ## Development Workflow 1. Create a new branch for your changes 2. Make your changes 3. Run the tests: `swift test` 4. Build and test your changes locally 5. Commit your changes with clear commit messages ## Submitting Pull Requests 1. Push your changes to your fork 2. Open a Pull Request with: - A clear title and description - Reference to any related issues - Screenshots or logs if relevant 3. Respond to any feedback from maintainers ``` -------------------------------------------------------------------------------- /libs/lume/src/Commands/Serve.swift: -------------------------------------------------------------------------------- ```swift import ArgumentParser import Foundation struct Serve: AsyncParsableCommand { static let configuration = CommandConfiguration( abstract: "Start the VM management server" ) @Option(help: "Port to listen on") var port: UInt16 = 7777 func run() async throws { let server = await Server(port: port) Logger.info("Starting server", metadata: ["port": "\(port)"]) // Using custom error handling to prevent ArgumentParser from printing additional error messages do { try await server.start() } catch let error as PortError { // For port errors, just log once with the suggestion let suggestedPort = port + 1 // Create a user-friendly error message that includes the suggestion let message = """ \(error.localizedDescription) Try using a different port: lume serve --port \(suggestedPort) """ // Log the message (without the "ERROR:" prefix that ArgumentParser will add) Logger.error(message) // Exit with a custom code to prevent ArgumentParser from printing the error again Foundation.exit(1) } catch { // For other errors, log once Logger.error("Failed to start server", metadata: ["error": error.localizedDescription]) throw error } } } ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/computers/__init__.py: -------------------------------------------------------------------------------- ```python """ Computer handler factory and interface definitions. This module provides a factory function to create computer handlers from different computer interface types, supporting both the ComputerHandler protocol and the Computer library interface. """ from .base import AsyncComputerHandler from .cua import cuaComputerHandler from .custom import CustomComputerHandler from computer import Computer as cuaComputer def is_agent_computer(computer): """Check if the given computer is a ComputerHandler or CUA Computer.""" return isinstance(computer, AsyncComputerHandler) or \ isinstance(computer, cuaComputer) or \ (isinstance(computer, dict)) #and "screenshot" in computer) async def make_computer_handler(computer): """ Create a computer handler from a computer interface. Args: computer: Either a ComputerHandler instance, Computer instance, or dict of functions Returns: ComputerHandler: A computer handler instance Raises: ValueError: If the computer type is not supported """ if isinstance(computer, AsyncComputerHandler): return computer if isinstance(computer, cuaComputer): computer_handler = cuaComputerHandler(computer) await computer_handler._initialize() return computer_handler if isinstance(computer, dict): return CustomComputerHandler(computer) raise ValueError(f"Unsupported computer type: {type(computer)}") ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/computer-server/REST-API.mdx: -------------------------------------------------------------------------------- ```markdown --- title: REST API Reference description: Reference for the /cmd REST endpoint of the Computer Server. --- # REST API Reference The Computer Server exposes a single REST endpoint for command execution: - `http://localhost:8000/cmd` - `https://your-container.containers.cloud.trycua.com:8443/cmd` (cloud) ## POST /cmd - Accepts commands as JSON in the request body - Returns results as a streaming response (text/event-stream) ### Request Format ```json { "command": "<command_name>", "params": { ... } } ``` ### Required Headers (for cloud containers) - `X-Container-Name`: Name of the container (cloud only) - `X-API-Key`: API key for authentication (cloud only) ### Example Request (Python) ```python import requests url = "http://localhost:8000/cmd" body = {"command": "screenshot", "params": {}} resp = requests.post(url, json=body) print(resp.text) ``` ### Example Request (Cloud) ```python import requests url = "https://your-container.containers.cloud.trycua.com:8443/cmd" headers = { "X-Container-Name": "your-container", "X-API-Key": "your-api-key" } body = {"command": "screenshot", "params": {}} resp = requests.post(url, json=body, headers=headers) print(resp.text) ``` ### Response Format Streaming text/event-stream with JSON objects, e.g.: ``` data: {"success": true, "content": "..."} data: {"success": false, "error": "..."} ``` ### Supported Commands See [Commands Reference](./Commands) for the full list of commands and parameters. ``` -------------------------------------------------------------------------------- /libs/typescript/computer/src/interface/factory.ts: -------------------------------------------------------------------------------- ```typescript /** * Factory for creating computer interfaces. */ import type { OSType } from '../types'; import type { BaseComputerInterface } from './base'; import { LinuxComputerInterface } from './linux'; import { MacOSComputerInterface } from './macos'; import { WindowsComputerInterface } from './windows'; export const InterfaceFactory = { /** * Create an interface for the specified OS. * * @param os Operating system type ('macos', 'linux', or 'windows') * @param ipAddress IP address of the computer to control * @param apiKey Optional API key for cloud authentication * @param vmName Optional VM name for cloud authentication * @returns The appropriate interface for the OS * @throws Error if the OS type is not supported */ createInterfaceForOS( os: OSType, ipAddress: string, apiKey?: string, vmName?: string ): BaseComputerInterface { switch (os) { case 'macos': return new MacOSComputerInterface( ipAddress, 'lume', 'lume', apiKey, vmName ); case 'linux': return new LinuxComputerInterface( ipAddress, 'lume', 'lume', apiKey, vmName ); case 'windows': return new WindowsComputerInterface( ipAddress, 'lume', 'lume', apiKey, vmName ); default: throw new Error(`Unsupported OS type: ${os}`); } }, }; ``` -------------------------------------------------------------------------------- /libs/kasm/Dockerfile: -------------------------------------------------------------------------------- ```dockerfile FROM kasmweb/core-ubuntu-jammy:1.17.0 USER root ENV HOME=/home/kasm-default-profile ENV STARTUPDIR=/dockerstartup ENV INST_SCRIPTS=$STARTUPDIR/install WORKDIR $HOME ######### Customize Container Here ########### # Installing python, pip, and libraries RUN apt-get update RUN apt install -y wget build-essential libncursesw5-dev libssl-dev \ libsqlite3-dev tk-dev libgdbm-dev libc6-dev libbz2-dev libffi-dev zlib1g-dev RUN add-apt-repository ppa:deadsnakes/ppa RUN apt install -y python3.11 python3-pip python3-tk python3-dev \ gnome-screenshot wmctrl ffmpeg socat xclip RUN pip install cua-computer-server # Install Firefox ENV DEBIAN_FRONTEND=noninteractive \ INST_DIR=$STARTUPDIR/install COPY ./src/ $INST_DIR RUN bash ${INST_DIR}/ubuntu/install/firefox/install_firefox.sh # Disable SSL requirement RUN sed -i 's/require_ssl: true/require_ssl: false/g' /usr/share/kasmvnc/kasmvnc_defaults.yaml RUN sed -i 's/-sslOnly//g' /dockerstartup/vnc_startup.sh # Running the python script on startup RUN echo "/usr/bin/python3 -m computer_server" > $STARTUPDIR/custom_startup.sh \ && chmod +x $STARTUPDIR/custom_startup.sh # Enable sudo support for kasm-user RUN echo "kasm-user:password" | chpasswd RUN usermod -aG sudo kasm-user RUN echo "kasm-user ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers ######### End Customizations ########### RUN chown 1000:0 $HOME RUN $STARTUPDIR/set_user_permission.sh $HOME ENV HOME=/home/kasm-user WORKDIR $HOME RUN mkdir -p $HOME && chown -R 1000:0 $HOME USER 1000 ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/callbacks/prompt_instructions.py: -------------------------------------------------------------------------------- ```python """ Prompt instructions callback. This callback allows simple prompt engineering by pre-pending a user instructions message to the start of the conversation before each LLM call. Usage: from agent.callbacks import PromptInstructionsCallback agent = ComputerAgent( model="openai/computer-use-preview", callbacks=[PromptInstructionsCallback("Follow these rules...")] ) """ from typing import Any, Dict, List, Optional from .base import AsyncCallbackHandler class PromptInstructionsCallback(AsyncCallbackHandler): """ Prepend a user instructions message to the message list. This is a minimal, non-invasive way to guide the agent's behavior without modifying agent loops or tools. It works with any provider/loop since it only alters the messages array before sending to the model. """ def __init__(self, instructions: Optional[str]) -> None: self.instructions = instructions async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: # Pre-pend instructions message if not self.instructions: return messages # Ensure we don't duplicate if already present at the front if messages and isinstance(messages[0], dict): first = messages[0] if first.get("role") == "user" and first.get("content") == self.instructions: return messages return [ {"role": "user", "content": self.instructions}, ] + messages ``` -------------------------------------------------------------------------------- /examples/winsandbox_example.py: -------------------------------------------------------------------------------- ```python """Example of using the Windows Sandbox computer provider. Learn more at: https://learn.microsoft.com/en-us/windows/security/application-security/application-isolation/windows-sandbox/ """ import asyncio from computer import Computer async def main(): """Test the Windows Sandbox provider.""" # Create a computer instance using Windows Sandbox computer = Computer( provider_type="winsandbox", os_type="windows", memory="4GB", # ephemeral=True, # Always true for Windows Sandbox ) try: print("Starting Windows Sandbox...") await computer.run() print("Windows Sandbox is ready!") print(f"IP Address: {await computer.get_ip()}") # Test basic functionality print("Testing basic functionality...") screenshot = await computer.interface.screenshot() print(f"Screenshot taken: {len(screenshot)} bytes") # Test running a command print("Testing command execution...") result = await computer.interface.run_command("echo Hello from Windows Sandbox!") print(f"Command output: {result.stdout}") print("Press any key to continue...") input() except Exception as e: print(f"Error: {e}") import traceback traceback.print_exc() finally: print("Stopping Windows Sandbox...") await computer.stop() print("Windows Sandbox stopped.") if __name__ == "__main__": asyncio.run(main()) ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/callbacks/agent-lifecycle.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Agent Lifecycle description: Agent callback lifecycle and hooks --- # Callbacks Callbacks provide hooks into the agent lifecycle for extensibility. They're called in a specific order during agent execution. ## Callback Lifecycle ### 1. `on_run_start(kwargs, old_items)` Called once when agent run begins. Initialize tracking, logging, or state. ### 2. `on_run_continue(kwargs, old_items, new_items)` → bool Called before each iteration. Return `False` to stop execution (e.g., budget limits). ### 3. `on_llm_start(messages)` → messages Preprocess messages before LLM call. Use for PII anonymization, image retention. ### 4. `on_api_start(kwargs)` Called before each LLM API call. ### 5. `on_api_end(kwargs, result)` Called after each LLM API call completes. ### 6. `on_usage(usage)` Called when usage information is received from LLM. ### 7. `on_llm_end(messages)` → messages Postprocess messages after LLM call. Use for PII deanonymization. ### 8. `on_responses(kwargs, responses)` Called when responses are received from agent loop. ### 9. Response-specific hooks: - `on_text(item)` - Text messages - `on_computer_call_start(item)` - Before computer actions - `on_computer_call_end(item, result)` - After computer actions - `on_function_call_start(item)` - Before function calls - `on_function_call_end(item, result)` - After function calls - `on_screenshot(screenshot, name)` - When screenshots are taken ### 10. `on_run_end(kwargs, old_items, new_items)` Called when agent run completes. Finalize tracking, save trajectories. ``` -------------------------------------------------------------------------------- /libs/python/computer-server/computer_server/diorama/macos.py: -------------------------------------------------------------------------------- ```python import platform import sys import platform import inspect from computer_server.diorama.diorama import Diorama from computer_server.diorama.base import BaseDioramaHandler from typing import Optional class MacOSDioramaHandler(BaseDioramaHandler): """Handler for Diorama commands on macOS, using local diorama module.""" async def diorama_cmd(self, action: str, arguments: Optional[dict] = None) -> dict: if platform.system().lower() != "darwin": return {"success": False, "error": "Diorama is only supported on macOS."} try: app_list = arguments.get("app_list") if arguments else None if not app_list: return {"success": False, "error": "Missing 'app_list' in arguments"} diorama = Diorama(app_list) interface = diorama.interface if not hasattr(interface, action): return {"success": False, "error": f"Unknown diorama action: {action}"} method = getattr(interface, action) # Remove app_list from arguments before calling the method filtered_arguments = dict(arguments) filtered_arguments.pop("app_list", None) if inspect.iscoroutinefunction(method): result = await method(**(filtered_arguments or {})) else: result = method(**(filtered_arguments or {})) return {"success": True, "result": result} except Exception as e: import traceback return {"success": False, "error": str(e), "trace": traceback.format_exc()} ``` -------------------------------------------------------------------------------- /docs/src/lib/source.ts: -------------------------------------------------------------------------------- ```typescript import { docs } from '@/.source'; import { loader } from 'fumadocs-core/source'; import { icons } from 'lucide-react'; import { createElement } from 'react'; import fs from 'node:fs/promises'; import path from 'node:path'; /** * Returns available API doc versions for a given section (e.g., 'agent'). * Each version is an object: { label, slug } * - 'Current' (index.mdx) → slug: [] * - '[version].mdx' → slug: [version] */ export async function getApiVersions( section: string ): Promise<{ label: string; slug: string[] }[]> { const dir = path.join(process.cwd(), 'content/docs/api', section); let files: string[] = []; try { files = (await fs.readdir(dir)).filter((f) => f.endsWith('.mdx')); } catch (_e) { return []; } const versions = files.map((file) => { if (file === 'index.mdx') { return { label: 'Current', slug: [] }; } const version = file.replace(/\.mdx$/, ''); return { label: version, slug: [version] }; }); // Always put 'Current' first, then others sorted descending (semver-ish) return [ ...versions.filter((v) => v.label === 'Current'), ...versions .filter((v) => v.label !== 'Current') .sort((a, b) => b.label.localeCompare(a.label, undefined, { numeric: true }) ), ]; } // See https://fumadocs.vercel.app/docs/headless/source-api for more info export const source = loader({ // it assigns a URL to your pages baseUrl: '/', source: docs.toFumadocsSource(), icon(icon) { if (!icon) return; if (icon in icons) return createElement(icons[icon as keyof typeof icons]); }, }); ``` -------------------------------------------------------------------------------- /libs/python/computer/computer/interface/factory.py: -------------------------------------------------------------------------------- ```python """Factory for creating computer interfaces.""" from typing import Literal, Optional from .base import BaseComputerInterface class InterfaceFactory: """Factory for creating OS-specific computer interfaces.""" @staticmethod def create_interface_for_os( os: Literal['macos', 'linux', 'windows'], ip_address: str, api_key: Optional[str] = None, vm_name: Optional[str] = None ) -> BaseComputerInterface: """Create an interface for the specified OS. Args: os: Operating system type ('macos', 'linux', or 'windows') ip_address: IP address of the computer to control api_key: Optional API key for cloud authentication vm_name: Optional VM name for cloud authentication Returns: BaseComputerInterface: The appropriate interface for the OS Raises: ValueError: If the OS type is not supported """ # Import implementations here to avoid circular imports from .macos import MacOSComputerInterface from .linux import LinuxComputerInterface from .windows import WindowsComputerInterface if os == 'macos': return MacOSComputerInterface(ip_address, api_key=api_key, vm_name=vm_name) elif os == 'linux': return LinuxComputerInterface(ip_address, api_key=api_key, vm_name=vm_name) elif os == 'windows': return WindowsComputerInterface(ip_address, api_key=api_key, vm_name=vm_name) else: raise ValueError(f"Unsupported OS type: {os}") ``` -------------------------------------------------------------------------------- /libs/lume/src/VM/VMDetails.swift: -------------------------------------------------------------------------------- ```swift import Foundation import Network struct DiskSize: Codable { let allocated: UInt64 let total: UInt64 } extension DiskSize { var formattedAllocated: String { formatBytes(allocated) } var formattedTotal: String { formatBytes(total) } private func formatBytes(_ bytes: UInt64) -> String { let units = ["B", "KB", "MB", "GB", "TB"] var size = Double(bytes) var unitIndex = 0 while size >= 1024 && unitIndex < units.count - 1 { size /= 1024 unitIndex += 1 } return String(format: "%.1f%@", size, units[unitIndex]) } } struct VMDetails: Codable { let name: String let os: String let cpuCount: Int let memorySize: UInt64 let diskSize: DiskSize let display: String let status: String let vncUrl: String? let ipAddress: String? let locationName: String let sharedDirectories: [SharedDirectory]? init( name: String, os: String, cpuCount: Int, memorySize: UInt64, diskSize: DiskSize, display: String, status: String, vncUrl: String?, ipAddress: String?, locationName: String, sharedDirectories: [SharedDirectory]? = nil ) { self.name = name self.os = os self.cpuCount = cpuCount self.memorySize = memorySize self.diskSize = diskSize self.display = display self.status = status self.vncUrl = vncUrl self.ipAddress = ipAddress self.locationName = locationName self.sharedDirectories = sharedDirectories } } ``` -------------------------------------------------------------------------------- /libs/python/computer-server/computer_server/diorama/diorama_computer.py: -------------------------------------------------------------------------------- ```python import asyncio class DioramaComputer: """ A minimal Computer-like interface for Diorama, compatible with ComputerAgent. Implements _initialized, run(), and __aenter__ for agent compatibility. """ def __init__(self, diorama): """ Initialize the DioramaComputer with a diorama instance. Args: diorama: The diorama instance to wrap with a computer-like interface. """ self.diorama = diorama self.interface = self.diorama.interface self._initialized = False async def __aenter__(self): """ Async context manager entry method for compatibility with ComputerAgent. Ensures an event loop is running and marks the instance as initialized. Creates a new event loop if none is currently running. Returns: DioramaComputer: The initialized instance. """ # Ensure the event loop is running (for compatibility) try: asyncio.get_running_loop() except RuntimeError: asyncio.set_event_loop(asyncio.new_event_loop()) self._initialized = True return self async def run(self): """ Run method stub for compatibility with ComputerAgent interface. Ensures the instance is initialized before returning. If not already initialized, calls __aenter__ to perform initialization. Returns: DioramaComputer: The initialized instance. """ # This is a stub for compatibility if not self._initialized: await self.__aenter__() return self ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/lume/prebuilt-images.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Prebuilt Images --- Pre-built images are available in the registry [ghcr.io/trycua](https://github.com/orgs/trycua/packages). These images come with an SSH server pre-configured and auto-login enabled. <Callout> The default password on pre-built images is `lume`. For the security of your VM, change this password after your first login. </Callout> ## Available Images The following pre-built images are available to download via `lume pull`: | Image | Tag | Description | Logical Size | |-------|------------|-------------|------| | `macos-sequoia-vanilla` | `latest`, `15.2` | macOS Sequoia 15.2 image | 20GB | | `macos-sequoia-xcode` | `latest`, `15.2` | macOS Sequoia 15.2 image with Xcode command line tools | 22GB | | `macos-sequoia-cua` | `latest`, `15.3` | macOS Sequoia 15.3 image compatible with the Computer interface | 24GB | | `ubuntu-noble-vanilla` | `latest`, `24.04.1` | [Ubuntu Server for ARM 24.04.1 LTS](https://ubuntu.com/download/server/arm) with Ubuntu Desktop | 20GB | ## Disk Space For additional disk space, resize the VM disk after pulling the image using the `lume set <name> --disk-size <size>` command. Note that the actual disk space used by sparse images will be much lower than the logical size listed. <Callout> **Important Note (v0.2.0+):** Images are being re-uploaded with sparse file system optimizations enabled, resulting in significantly lower actual disk usage. Older images (without the `-sparse` suffix) are now **deprecated**. The last version of `lume` fully supporting the non-sparse images was `v0.1.x`. Starting from `v0.2.0`, lume will automatically pull images optimized with sparse file system support. </Callout> ``` -------------------------------------------------------------------------------- /libs/python/pylume/pyproject.toml: -------------------------------------------------------------------------------- ```toml [build-system] build-backend = "pdm.backend" requires = ["pdm-backend"] [project] authors = [{ name = "TryCua", email = "[email protected]" }] classifiers = [ "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Operating System :: MacOS :: MacOS X", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", ] dependencies = ["pydantic>=2.11.1"] description = "Python SDK for lume - run macOS and Linux VMs on Apple Silicon" dynamic = ["version"] keywords = ["apple-silicon", "macos", "virtualization", "vm"] license = { text = "MIT" } name = "pylume" readme = "README.md" requires-python = ">=3.9" [tool.pdm.version] path = "pylume/__init__.py" source = "file" [project.urls] homepage = "https://github.com/trycua/pylume" repository = "https://github.com/trycua/pylume" [tool.pdm] distribution = true [tool.pdm.dev-dependencies] dev = [ "black>=23.0.0", "isort>=5.12.0", "pytest-asyncio>=0.23.0", "pytest>=7.0.0", ] [tool.black] line-length = 100 target-version = ["py311"] [tool.ruff] fix = true line-length = 100 select = ["B", "E", "F", "I"] target-version = "py311" [tool.ruff.format] docstring-code-format = true [tool.mypy] check_untyped_defs = true disallow_untyped_defs = true ignore_missing_imports = true python_version = "3.11" show_error_codes = true strict = true warn_return_any = true warn_unused_ignores = false [tool.pytest.ini_options] asyncio_mode = "auto" python_files = "test_*.py" testpaths = ["tests"] [tool.pdm.build] includes = ["pylume/"] source-includes = ["LICENSE", "README.md", "tests/"] ``` -------------------------------------------------------------------------------- /examples/sandboxed_functions_examples.py: -------------------------------------------------------------------------------- ```python from pathlib import Path import os import sys # Load environment variables from .env file project_root = Path(__file__).parent.parent env_file = project_root / ".env" print(f"Loading environment from: {env_file}") from dotenv import load_dotenv load_dotenv(env_file) # Add paths to sys.path if needed pythonpath = os.environ.get("PYTHONPATH", "") for path in pythonpath.split(":"): if path and path not in sys.path: sys.path.insert(0, path) # Insert at beginning to prioritize print(f"Added to sys.path: {path}") import asyncio from computer.computer import Computer from computer.helpers import sandboxed async def main(): # Initialize the computer in a Cua Container computer = Computer() await computer.run() # Install a package in a virtual environment in the container await computer.venv_install("demo_venv", ["requests", "macos-pyxa"]) # Open Safari await computer.interface.run_command("open -a Safari") await asyncio.sleep(2) # Define a sandboxed function # This function will run inside the Cua Container @sandboxed("demo_venv") def greet_and_print(name): # get .html of the current Safari tab import PyXA safari = PyXA.Application("Safari") current_doc = safari.current_document html = current_doc.source() print(f"Hello from inside the container, {name}!") print("Safari HTML length:", len(html)) return {"greeted": name, "safari_html_length": len(html), "safari_html_snippet": html[:200]} # Call with args and kwargs result = await greet_and_print("Cua") print("Result from sandboxed function:", result) if __name__ == "__main__": asyncio.run(main()) ``` -------------------------------------------------------------------------------- /libs/lumier/Dockerfile: -------------------------------------------------------------------------------- ```dockerfile # Base image using Debian for arm64 architecture (optimized for Apple Silicon) FROM debian:bullseye-slim AS lumier-base # Set environment variables for Lume API server configuration ENV LUME_API_HOST="host.docker.internal" # Default VM configuration (can be overridden at runtime) ENV VERSION="ghcr.io/trycua/macos-sequoia-vanilla:latest" ENV RAM_SIZE="8192" ENV CPU_CORES="4" ENV DISK_SIZE="100" ENV DISPLAY="1024x768" ENV VM_NAME="lumier" ENV HOST_SHARED_PATH="" ENV LUMIER_DEBUG="0" # Install necessary tools and noVNC dependencies RUN apt-get update && \ apt-get install -y \ netcat-traditional \ curl \ sshpass \ wget \ unzip \ git \ python3 \ python3-pip \ python3-numpy \ procps && \ rm -rf /var/lib/apt/lists/* # Download and install noVNC without caching RUN wget https://github.com/trycua/noVNC/archive/refs/heads/master.zip -O master1.zip && \ unzip master1.zip && \ mv noVNC-master /opt/noVNC && \ rm master1.zip # Set environment variables for noVNC ENV NOVNC_PATH="/opt/noVNC" # Create necessary directories RUN mkdir -p /run/bin /run/lib /run/config /run/hooks /run/lifecycle # Copy scripts to the container COPY src/config/constants.sh /run/config/ COPY src/bin/entry.sh /run/bin/entry.sh # Copy library files if they exist COPY src/lib/ /run/lib/ COPY src/hooks/ /run/hooks/ # Copy on-logon script to lifecycle directory COPY src/hooks/on-logon.sh /run/lifecycle/ # Make scripts executable RUN chmod +x \ /run/bin/* \ /run/hooks/* \ /run/lifecycle/* 2>/dev/null || true # Expose ports for noVNC and Lume API EXPOSE 8006 # VOLUME setup VOLUME [ "/storage" ] VOLUME [ "/data" ] # Default entrypoint ENTRYPOINT ["/run/bin/entry.sh"] ``` -------------------------------------------------------------------------------- /.github/workflows/pypi-publish-core.yml: -------------------------------------------------------------------------------- ```yaml name: Publish Core Package on: push: tags: - "core-v*" workflow_dispatch: inputs: version: description: "Version to publish (without v prefix)" required: true default: "0.1.0" workflow_call: inputs: version: description: "Version to publish" required: true type: string # Adding permissions at workflow level permissions: contents: write jobs: prepare: runs-on: macos-latest outputs: version: ${{ steps.get-version.outputs.version }} steps: - uses: actions/checkout@v4 - name: Determine version id: get-version run: | if [ "${{ github.event_name }}" == "push" ]; then # Extract version from tag (for package-specific tags) if [[ "${{ github.ref }}" =~ ^refs/tags/core-v([0-9]+\.[0-9]+\.[0-9]+) ]]; then VERSION=${BASH_REMATCH[1]} else echo "Invalid tag format for core" exit 1 fi elif [ "${{ github.event_name }}" == "workflow_dispatch" ]; then # Use version from workflow dispatch VERSION=${{ github.event.inputs.version }} else # Use version from workflow_call VERSION=${{ inputs.version }} fi echo "VERSION=$VERSION" echo "version=$VERSION" >> $GITHUB_OUTPUT publish: needs: prepare uses: ./.github/workflows/pypi-reusable-publish.yml with: package_name: "core" package_dir: "libs/python/core" version: ${{ needs.prepare.outputs.version }} is_lume_package: false base_package_name: "cua-core" secrets: PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} ``` -------------------------------------------------------------------------------- /libs/python/computer-server/pyproject.toml: -------------------------------------------------------------------------------- ```toml [build-system] requires = ["pdm-backend"] build-backend = "pdm.backend" [project] name = "cua-computer-server" version = "0.1.24" description = "Server component for the Computer-Use Interface (CUI) framework powering Cua" authors = [ { name = "TryCua", email = "[email protected]" } ] readme = "README.md" license = { text = "MIT" } requires-python = ">=3.9" dependencies = [ "fastapi>=0.111.0", "uvicorn[standard]>=0.27.0", "pydantic>=2.0.0", "pyautogui>=0.9.54", "pynput>=1.8.1", "pillow>=10.2.0", "aiohttp>=3.9.1", "pyperclip>=1.9.0", "websockets>=12.0" ] [project.optional-dependencies] macos = [ "pyobjc-framework-Cocoa>=10.1", "pyobjc-framework-Quartz>=10.1", "pyobjc-framework-ApplicationServices>=10.1" ] linux = [ "python-xlib>=0.33" ] windows = [ "pywin32>=310" ] [project.urls] homepage = "https://github.com/trycua/cua" repository = "https://github.com/trycua/cua" [project.scripts] cua-computer-server = "computer_server:run_cli" [tool.pdm] distribution = true [tool.pdm.build] includes = ["computer_server"] package-data = {"computer_server" = ["py.typed"]} [tool.pdm.dev-dependencies] test = [ "pytest>=7.0.0", "pytest-asyncio>=0.23.0" ] format = [ "black>=23.0.0", "isort>=5.12.0" ] dev = [ "ruff>=0.0.241", "mypy>=0.971" ] [tool.pdm.scripts] api = "python -m computer_server" [tool.ruff] line-length = 100 target-version = "py310" select = ["E", "F", "B", "I"] fix = true [tool.ruff.format] docstring-code-format = true [tool.mypy] strict = true python_version = "3.10" ignore_missing_imports = true disallow_untyped_defs = true check_untyped_defs = true warn_return_any = true show_error_codes = true warn_unused_ignores = false ``` -------------------------------------------------------------------------------- /libs/lume/src/VM/LinuxVM.swift: -------------------------------------------------------------------------------- ```swift import Foundation /// Linux-specific virtual machine implementation @MainActor final class LinuxVM: VM { override init( vmDirContext: VMDirContext, virtualizationServiceFactory: @escaping (VMVirtualizationServiceContext) throws -> VMVirtualizationService = { try LinuxVirtualizationService(configuration: $0) }, vncServiceFactory: @escaping (VMDirectory) -> VNCService = { DefaultVNCService(vmDirectory: $0) } ) { super.init( vmDirContext: vmDirContext, virtualizationServiceFactory: virtualizationServiceFactory, vncServiceFactory: vncServiceFactory ) } override func getOSType() -> String { return "linux" } override func setup( ipswPath: String, cpuCount: Int, memorySize: UInt64, diskSize: UInt64, display: String ) async throws { try setDiskSize(diskSize) let service = try virtualizationServiceFactory( try createVMVirtualizationServiceContext( cpuCount: cpuCount, memorySize: memorySize, display: display ) ) guard let linuxService = service as? LinuxVirtualizationService else { throw VMError.internalError("Installation requires LinuxVirtualizationService") } try updateVMConfig(vmConfig: try VMConfig( os: getOSType(), cpuCount: cpuCount, memorySize: memorySize, diskSize: diskSize, macAddress: linuxService.generateMacAddress(), display: display )) // Create NVRAM store for EFI try linuxService.createNVRAM(at: vmDirContext.nvramPath) } } ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/lume/installation.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Installation description: Installation instructions for the current version of the Lume CLI. --- ## Quickstart Install and run a prebuilt macOS VM in two commands: ```bash # Install Lume /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" # Pull & start a macOS image lume run macos-sequoia-vanilla:latest ``` <Callout title="Security Note"> All prebuilt images use the default password `lume`. Change this immediately after your first login using the `passwd` command. </Callout> **System Requirements**: - Apple Silicon Mac (M1, M2, M3, etc.) - macOS 13.0 or later - At least 8GB of RAM (16GB recommended) - At least 50GB of free disk space ## Install with Script Install with a single command: ```bash /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" ``` ### Manual Start (No Background Service) By default, Lume is installed as a background service that starts automatically on login. If you prefer to start the Lume API service manually when needed, you can use the `--no-background-service` option: ```bash /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh) --no-background-service" ``` <Callout title="Note"> With this option, you'll need to manually start the Lume API service by running `lume serve` in your terminal whenever you need to use tools or libraries that rely on the Lume API (such as the Computer-Use Agent). </Callout> ## Manual Download and Installation You can also download the `lume.pkg.tar.gz` archive from the [latest release](https://github.com/trycua/cua/releases?q=lume&expanded=true), extract it, and install the package manually. ``` -------------------------------------------------------------------------------- /libs/lume/tests/Mocks/MockVMVirtualizationService.swift: -------------------------------------------------------------------------------- ```swift import Foundation import Virtualization @testable import lume @MainActor final class MockVMVirtualizationService: VMVirtualizationService { private(set) var currentState: VZVirtualMachine.State = .stopped private(set) var startCallCount = 0 private(set) var stopCallCount = 0 private(set) var pauseCallCount = 0 private(set) var resumeCallCount = 0 var state: VZVirtualMachine.State { currentState } private var _shouldFailNextOperation = false private var _operationError: Error = VMError.internalError("Mock operation failed") nonisolated func configure(shouldFail: Bool, error: Error = VMError.internalError("Mock operation failed")) async { await setConfiguration(shouldFail: shouldFail, error: error) } @MainActor private func setConfiguration(shouldFail: Bool, error: Error) { _shouldFailNextOperation = shouldFail _operationError = error } func start() async throws { startCallCount += 1 if _shouldFailNextOperation { throw _operationError } currentState = .running } func stop() async throws { stopCallCount += 1 if _shouldFailNextOperation { throw _operationError } currentState = .stopped } func pause() async throws { pauseCallCount += 1 if _shouldFailNextOperation { throw _operationError } currentState = .paused } func resume() async throws { resumeCallCount += 1 if _shouldFailNextOperation { throw _operationError } currentState = .running } func getVirtualMachine() -> Any { return "mock_vm" } } ``` -------------------------------------------------------------------------------- /docs/src/app/layout.config.tsx: -------------------------------------------------------------------------------- ```typescript import type { BaseLayoutProps } from 'fumadocs-ui/layouts/shared'; import Image from 'next/image'; import LogoBlack from '@/assets/logo-black.svg'; import LogoWhite from '@/assets/logo-white.svg'; import DiscordWhite from '@/assets/discord-white.svg'; import DiscordBlack from '@/assets/discord-black.svg'; import { HomeIcon } from 'lucide-react'; /** * Shared layout configurations * * you can customise layouts individually from: * Home Layout: app/(home)/layout.tsx * Docs Layout: app/docs/layout.tsx */ export const baseOptions: BaseLayoutProps = { nav: { title: ( <> <Image width={30} height={30} src={LogoBlack} aria-label="Logo" className="block dark:hidden" alt="Logo" /> <Image width={30} height={30} src={LogoWhite} aria-label="Logo" className="hidden dark:block" alt="Logo" /> Cua Documentation </> ), }, githubUrl: 'https://github.com/trycua/cua', links: [ { url: 'https://trycua.com', text: 'cua home', type: 'icon', icon: <HomeIcon />, external: false, }, { url: 'https://discord.com/invite/mVnXXpdE85', text: 'cua discord', type: 'icon', icon: ( <> <Image width={20} height={20} alt="Discord" className="hidden dark:block opacity-70 hover:opacity-100" src={DiscordWhite} /> <Image width={20} height={20} alt="Discord" className="dark:hidden block opacity-55 hover:opacity-100" src={DiscordBlack} /> </> ), }, ], }; ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/usage-tracking.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Usage Tracking sidebar_position: 9 description: How to track token usage and cost in ComputerAgent and agent loops. --- Tracking usage is important for monitoring costs and optimizing your agent workflows. The ComputerAgent API provides easy access to token and cost usage for every run. ## Accessing Usage Data Whenever you run an agent loop, each result contains a `usage` dictionary with token and cost information: ```python async for result in agent.run(...): print(result["usage"]) # Example output: # { # "prompt_tokens": 150, # "completion_tokens": 75, # "total_tokens": 225, # "response_cost": 0.01, # } ``` - `prompt_tokens`: Number of tokens in the prompt - `completion_tokens`: Number of tokens in the agent's response - `total_tokens`: Total tokens used - `response_cost`: Estimated cost (USD) for this turn ## Tracking Total Usage You can accumulate usage across multiple turns: ```python total_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0, "response_cost": 0.0} async for result in agent.run(...): for k in total_usage: total_usage[k] += result["usage"].get(k, 0) print("Total usage:", total_usage) ``` ## Using Callbacks for Usage Tracking You can also use a callback to automatically track usage. Implement the `on_usage` method in your callback class: ```python from agent.callbacks import AsyncCallbackHandler class UsageTrackerCallback(AsyncCallbackHandler): async def on_usage(self, usage): print("Usage update:", usage) agent = ComputerAgent( ..., callbacks=[UsageTrackerCallback()] ) ``` See also: [Budget Manager Callbacks](./callbacks/cost-saving) ## See Also - [Prompt Caching](./prompt-caching) - [Callbacks](./callbacks) ``` -------------------------------------------------------------------------------- /libs/lume/src/Commands/Create.swift: -------------------------------------------------------------------------------- ```swift import ArgumentParser import Foundation import Virtualization // MARK: - Create Command struct Create: AsyncParsableCommand { static let configuration = CommandConfiguration( abstract: "Create a new virtual machine" ) @Argument(help: "Name for the virtual machine") var name: String @Option( help: "Operating system to install. Defaults to macOS.", completion: .list(["macOS", "linux"])) var os: String = "macOS" @Option(help: "Number of CPU cores", transform: { Int($0) ?? 4 }) var cpu: Int = 4 @Option( help: "Memory size, e.g., 8192MB or 8GB. Defaults to 8GB.", transform: { try parseSize($0) } ) var memory: UInt64 = 8 * 1024 * 1024 * 1024 @Option( help: "Disk size, e.g., 20480MB or 20GB. Defaults to 50GB.", transform: { try parseSize($0) }) var diskSize: UInt64 = 50 * 1024 * 1024 * 1024 @Option(help: "Display resolution in format WIDTHxHEIGHT. Defaults to 1024x768.") var display: VMDisplayResolution = VMDisplayResolution(string: "1024x768")! @Option( help: "Path to macOS restore image (IPSW), or 'latest' to download the latest supported version. Required for macOS VMs.", completion: .file(extensions: ["ipsw"]) ) var ipsw: String? @Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location") var storage: String? init() { } @MainActor func run() async throws { let controller = LumeController() try await controller.create( name: name, os: os, diskSize: diskSize, cpuCount: cpu, memorySize: memory, display: display.string, ipsw: ipsw, storage: storage ) } } ``` -------------------------------------------------------------------------------- /libs/python/computer/computer/helpers.py: -------------------------------------------------------------------------------- ```python """ Helper functions and decorators for the Computer module. """ import logging import asyncio from functools import wraps from typing import Any, Callable, Optional, TypeVar, cast # Global reference to the default computer instance _default_computer = None logger = logging.getLogger(__name__) def set_default_computer(computer): """ Set the default computer instance to be used by the remote decorator. Args: computer: The computer instance to use as default """ global _default_computer _default_computer = computer def sandboxed(venv_name: str = "default", computer: str = "default", max_retries: int = 3): """ Decorator that wraps a function to be executed remotely via computer.venv_exec Args: venv_name: Name of the virtual environment to execute in computer: The computer instance to use, or "default" to use the globally set default max_retries: Maximum number of retries for the remote execution """ def decorator(func): @wraps(func) async def wrapper(*args, **kwargs): # Determine which computer instance to use comp = computer if computer != "default" else _default_computer if comp is None: raise RuntimeError("No computer instance available. Either specify a computer instance or call set_default_computer() first.") for i in range(max_retries): try: return await comp.venv_exec(venv_name, func, *args, **kwargs) except Exception as e: logger.error(f"Attempt {i+1} failed: {e}") await asyncio.sleep(1) if i == max_retries - 1: raise e return wrapper return decorator ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/callbacks/budget_manager.py: -------------------------------------------------------------------------------- ```python from typing import Dict, List, Any from .base import AsyncCallbackHandler class BudgetExceededError(Exception): """Exception raised when budget is exceeded.""" pass class BudgetManagerCallback(AsyncCallbackHandler): """Budget manager callback that tracks usage costs and can stop execution when budget is exceeded.""" def __init__(self, max_budget: float, reset_after_each_run: bool = True, raise_error: bool = False): """ Initialize BudgetManagerCallback. Args: max_budget: Maximum budget allowed reset_after_each_run: Whether to reset budget after each run raise_error: Whether to raise an error when budget is exceeded """ self.max_budget = max_budget self.reset_after_each_run = reset_after_each_run self.raise_error = raise_error self.total_cost = 0.0 async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None: """Reset budget if configured to do so.""" if self.reset_after_each_run: self.total_cost = 0.0 async def on_usage(self, usage: Dict[str, Any]) -> None: """Track usage costs.""" if "response_cost" in usage: self.total_cost += usage["response_cost"] async def on_run_continue(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> bool: """Check if budget allows continuation.""" if self.total_cost >= self.max_budget: if self.raise_error: raise BudgetExceededError(f"Budget exceeded: ${self.total_cost} >= ${self.max_budget}") else: print(f"Budget exceeded: ${self.total_cost} >= ${self.max_budget}") return False return True ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/decorators.py: -------------------------------------------------------------------------------- ```python """ Decorators for agent - agent_loop decorator """ from typing import List, Optional from .types import AgentConfigInfo # Global registry _agent_configs: List[AgentConfigInfo] = [] def register_agent(models: str, priority: int = 0): """ Decorator to register an AsyncAgentConfig class. Args: models: Regex pattern to match supported models priority: Priority for agent selection (higher = more priority) """ def decorator(agent_class: type): # Validate that the class implements AsyncAgentConfig protocol if not hasattr(agent_class, 'predict_step'): raise ValueError(f"Agent class {agent_class.__name__} must implement predict_step method") if not hasattr(agent_class, 'predict_click'): raise ValueError(f"Agent class {agent_class.__name__} must implement predict_click method") if not hasattr(agent_class, 'get_capabilities'): raise ValueError(f"Agent class {agent_class.__name__} must implement get_capabilities method") # Register the agent config config_info = AgentConfigInfo( agent_class=agent_class, models_regex=models, priority=priority ) _agent_configs.append(config_info) # Sort by priority (highest first) _agent_configs.sort(key=lambda x: x.priority, reverse=True) return agent_class return decorator def get_agent_configs() -> List[AgentConfigInfo]: """Get all registered agent configs""" return _agent_configs.copy() def find_agent_config(model: str) -> Optional[AgentConfigInfo]: """Find the best matching agent config for a model""" for config_info in _agent_configs: if config_info.matches_model(model): return config_info return None ``` -------------------------------------------------------------------------------- /libs/python/mcp-server/scripts/install_mcp_server.sh: -------------------------------------------------------------------------------- ```bash #!/bin/bash set -e # Create the ~/.cua directory if it doesn't exist mkdir -p "$HOME/.cua" # Create start_mcp_server.sh script in ~/.cua directory cat > "$HOME/.cua/start_mcp_server.sh" << 'EOF' #!/bin/bash set -e # Function to check if a directory is writable is_writable() { [ -w "$1" ] } # Function to check if a command exists (silent) command_exists() { command -v "$1" >/dev/null 2>&1 } # Find a writable directory for the virtual environment if is_writable "$HOME"; then VENV_DIR="$HOME/.cua-mcp-venv" elif is_writable "/tmp"; then VENV_DIR="/tmp/.cua-mcp-venv" else # Try to create a directory in the current working directory TEMP_DIR="$(pwd)/.cua-mcp-venv" if is_writable "$(pwd)"; then VENV_DIR="$TEMP_DIR" else echo "Error: Cannot find a writable directory for the virtual environment." >&2 exit 1 fi fi # Check if Python is installed if ! command_exists python3; then echo "Error: Python 3 is not installed." >&2 exit 1 fi # Check if pip is installed if ! command_exists pip3; then echo "Error: pip3 is not installed." >&2 exit 1 fi # Create virtual environment if it doesn't exist if [ ! -d "$VENV_DIR" ]; then # Redirect output to prevent JSON parsing errors in Claude python3 -m venv "$VENV_DIR" >/dev/null 2>&1 fi # Activate virtual environment source "$VENV_DIR/bin/activate" # Always install/upgrade the latest version of cua-mcp-server pip install --upgrade "cua-mcp-server" # Run the MCP server with isolation from development paths cd "$VENV_DIR" # Change to venv directory to avoid current directory in path python3 -c "from mcp_server.server import main; main()" EOF # Make the script executable chmod +x "$HOME/.cua/start_mcp_server.sh" echo "MCP server startup script created at $HOME/.cua/start_mcp_server.sh" ``` -------------------------------------------------------------------------------- /libs/python/agent/pyproject.toml: -------------------------------------------------------------------------------- ```toml [build-system] requires = ["pdm-backend"] build-backend = "pdm.backend" [project] name = "cua-agent" version = "0.4.0" description = "CUA (Computer Use) Agent for AI-driven computer interaction" readme = "README.md" authors = [ { name = "TryCua", email = "[email protected]" } ] dependencies = [ "httpx>=0.27.0", "aiohttp>=3.9.3", "asyncio", "anyio>=4.4.1", "typing-extensions>=4.12.2", "pydantic>=2.6.4", "rich>=13.7.1", "python-dotenv>=1.0.1", "cua-computer>=0.4.0,<0.5.0", "cua-core>=0.1.8,<0.2.0", "certifi>=2024.2.2", "litellm>=1.74.12" ] requires-python = ">=3.12" [project.optional-dependencies] openai = [] anthropic = [] omni = [ "cua-som>=0.1.0,<0.2.0", ] uitars = [] uitars-mlx = [ "mlx-vlm>=0.1.27; sys_platform == 'darwin'" ] uitars-hf = [ "accelerate", "torch", "transformers>=4.54.0" ] glm45v-hf = [ "accelerate", "torch", "transformers-v4.55.0-GLM-4.5V-preview" ] opencua-hf = [ "accelerate", "torch", "transformers==4.53.0", "tiktoken>=0.11.0", "blobfile>=3.0.0" ] internvl-hf = [ "accelerate", "torch", "transformers>=4.55.0", "einops", "timm" ] ui = [ "gradio>=5.23.3", "python-dotenv>=1.0.1", ] cli = [ "yaspin>=3.1.0", ] hud = [ "hud-python==0.4.52", ] all = [ # uitars requirements "mlx-vlm>=0.1.27; sys_platform == 'darwin'", "accelerate", "torch", "transformers>=4.55.0", # internvl requirements, "einops", "timm", # opencua requirements "tiktoken>=0.11.0", "blobfile>=3.0.0", # ui requirements "gradio>=5.23.3", "python-dotenv>=1.0.1", # cli requirements "yaspin>=3.1.0", # hud requirements "hud-python==0.4.52", ] [tool.uv] constraint-dependencies = ["fastrtc>0.43.0", "mlx-audio>0.2.3"] [tool.pdm] distribution = true [tool.pdm.build] includes = ["agent/"] ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/supported-agents/human-in-the-loop.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Human-In-The-Loop description: Use humans as agents for evaluation, demonstrations, and interactive control --- The Agent SDK provides a human tool, with native support for using a human-in-the-loop as a way to evaluate your environment, tools, or to create demonstrations. You can use it by doing `grounding_model+human/human` or `human/human` directly. ## Getting Started To start the human agent tool, simply run: ```bash python -m agent.human_tool ``` The UI will show you pending completions. Select a completion to take control of the agent. ## Usage Examples ### Direct Human Agent ```python from agent import ComputerAgent from agent.computer import computer agent = ComputerAgent( "human/human", tools=[computer] ) async for _ in agent.run("Take a screenshot, analyze the UI, and click on the most prominent button"): pass ``` ### Composed with Grounding Model ```python agent = ComputerAgent( "huggingface-local/HelloKKMe/GTA1-7B+human/human", tools=[computer] ) async for _ in agent.run("Navigate to the settings page and enable dark mode"): pass ``` ## Features The human-in-the-loop interface provides: - **Interactive UI**: Web-based interface for reviewing and responding to agent requests - **Image Display**: Screenshots with click handlers for direct interaction - **Action Accordions**: Support for various computer actions (click, type, keypress, etc.) - **Tool Calls**: Full OpenAI-compatible tool call support - **Real-time Updates**: Smart polling for responsive UI updates ## Use Cases - **Evaluation**: Have humans evaluate agent performance and provide ground truth responses - **Demonstrations**: Create training data by having humans demonstrate tasks - **Interactive Control**: Take manual control when automated agents need human guidance - **Testing**: Validate agent, tool, and environment behavior manually --- ``` -------------------------------------------------------------------------------- /libs/lume/src/Utils/Utils.swift: -------------------------------------------------------------------------------- ```swift import Foundation import ArgumentParser extension Collection { subscript (safe index: Index) -> Element? { indices.contains(index) ? self[index] : nil } } func resolveBinaryPath(_ name: String) -> URL? { guard let path = ProcessInfo.processInfo.environment["PATH"] else { return nil } for pathComponent in path.split(separator: ":") { let url = URL(fileURLWithPath: String(pathComponent)) .appendingPathComponent(name, isDirectory: false) if FileManager.default.fileExists(atPath: url.path) { return url } } return nil } // Helper function to parse size strings func parseSize(_ input: String) throws -> UInt64 { let lowercased = input.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() let multiplier: Double let valueString: String if lowercased.hasSuffix("tb") { multiplier = 1024 * 1024 * 1024 * 1024 valueString = String(lowercased.dropLast(2)) } else if lowercased.hasSuffix("gb") { multiplier = 1024 * 1024 * 1024 valueString = String(lowercased.dropLast(2)) } else if lowercased.hasSuffix("mb") { multiplier = 1024 * 1024 valueString = String(lowercased.dropLast(2)) } else if lowercased.hasSuffix("kb") { multiplier = 1024 valueString = String(lowercased.dropLast(2)) } else { multiplier = 1024 * 1024 valueString = lowercased } guard let value = Double(valueString.trimmingCharacters(in: .whitespacesAndNewlines)) else { throw ValidationError("Malformed size input: \(input). Could not parse numeric value.") } let bytesAsDouble = (value * multiplier).rounded() guard bytesAsDouble >= 0 && bytesAsDouble <= Double(UInt64.max) else { throw ValidationError("Calculated size out of bounds for UInt64: \(input)") } let val = UInt64(bytesAsDouble) return val } ``` -------------------------------------------------------------------------------- /.github/workflows/pypi-publish-som.yml: -------------------------------------------------------------------------------- ```yaml name: Publish SOM Package on: push: tags: - "som-v*" workflow_dispatch: inputs: version: description: "Version to publish (without v prefix)" required: true default: "0.1.0" workflow_call: inputs: version: description: "Version to publish" required: true type: string outputs: version: description: "The version that was published" value: ${{ jobs.determine-version.outputs.version }} # Adding permissions at workflow level permissions: contents: write jobs: determine-version: runs-on: macos-latest outputs: version: ${{ steps.get-version.outputs.version }} steps: - uses: actions/checkout@v4 - name: Determine version id: get-version run: | if [ "${{ github.event_name }}" == "push" ]; then # Extract version from tag (for package-specific tags) if [[ "${{ github.ref }}" =~ ^refs/tags/som-v([0-9]+\.[0-9]+\.[0-9]+) ]]; then VERSION=${BASH_REMATCH[1]} else echo "Invalid tag format for som" exit 1 fi elif [ "${{ github.event_name }}" == "workflow_dispatch" ]; then # Use version from workflow dispatch VERSION=${{ github.event.inputs.version }} else # Use version from workflow_call VERSION=${{ inputs.version }} fi echo "VERSION=$VERSION" echo "version=$VERSION" >> $GITHUB_OUTPUT publish: needs: determine-version uses: ./.github/workflows/pypi-reusable-publish.yml with: package_name: "som" package_dir: "libs/python/som" version: ${{ needs.determine-version.outputs.version }} is_lume_package: false base_package_name: "cua-som" secrets: PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} ``` -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- ```dockerfile FROM python:3.12-slim # Set environment variables ENV PYTHONUNBUFFERED=1 \ PYTHONDONTWRITEBYTECODE=1 \ PIP_NO_CACHE_DIR=1 \ PIP_DISABLE_PIP_VERSION_CHECK=1 \ PYTHONPATH="/app/libs/python/core:/app/libs/python/computer:/app/libs/python/agent:/app/libs/python/som:/app/libs/python/pylume:/app/libs/python/computer-server:/app/libs/python/mcp-server" # Install system dependencies for ARM architecture RUN apt-get update && apt-get install -y --no-install-recommends \ git \ build-essential \ libgl1-mesa-glx \ libglib2.0-0 \ libxcb-xinerama0 \ libxkbcommon-x11-0 \ cmake \ pkg-config \ curl \ iputils-ping \ net-tools \ sed \ xxd \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* # Set working directory WORKDIR /app # Copy the entire project temporarily # We'll mount the real source code over this at runtime COPY . /app/ # Create a simple .env.local file for build.sh RUN echo "PYTHON_BIN=python" > /app/.env.local # Modify build.sh to skip virtual environment creation RUN sed -i 's/python -m venv .venv/echo "Skipping venv creation in Docker"/' /app/scripts/build.sh && \ sed -i 's/source .venv\/bin\/activate/echo "Skipping venv activation in Docker"/' /app/scripts/build.sh && \ sed -i 's/find . -type d -name ".venv" -exec rm -rf {} +/echo "Skipping .venv removal in Docker"/' /app/scripts/build.sh && \ chmod +x /app/scripts/build.sh # Run the build script to install dependencies RUN cd /app && ./scripts/build.sh # Clean up the source files now that dependencies are installed # When we run the container, we'll mount the actual source code RUN rm -rf /app/* /app/.??* # Note: This Docker image doesn't contain the lume executable (macOS-specific) # Instead, it relies on connecting to a lume server running on the host machine # via host.docker.internal:7777 # Default command CMD ["bash"] ``` -------------------------------------------------------------------------------- /examples/computer-example-ts/src/helpers.ts: -------------------------------------------------------------------------------- ```typescript import type { Computer } from "@trycua/computer"; import type OpenAI from "openai"; export async function executeAction( computer: Computer, action: OpenAI.Responses.ResponseComputerToolCall["action"], ) { switch (action.type) { case "click": { const { x, y, button } = action; console.log(`Executing click at (${x}, ${y}) with button '${button}'.`); await computer.interface.moveCursor(x, y); if (button === "right") await computer.interface.rightClick(); else await computer.interface.leftClick(); break; } case "type": { const { text } = action; console.log(`Typing text: ${text}`); await computer.interface.typeText(text); } break; case "scroll": { const { x: locX, y: locY, scroll_x, scroll_y } = action; console.log( `Scrolling at (${locX}, ${locY}) with offsets (scroll_x=${scroll_x}, scroll_y=${scroll_y}).`, ); await computer.interface.moveCursor(locX, locY); await computer.interface.scroll(scroll_x, scroll_y); break; } case "keypress": { const { keys } = action; for (const key of keys) { console.log(`Pressing key: ${key}.`); // Map common key names to CUA equivalents if (key.toLowerCase() === "enter") { await computer.interface.pressKey("return"); } else if (key.toLowerCase() === "space") { await computer.interface.pressKey("space"); } else { await computer.interface.pressKey(key); } } break; } case "wait": { console.log(`Waiting for 3 seconds.`); await new Promise((resolve) => setTimeout(resolve, 3 * 1000)); break; } case "screenshot": { console.log("Taking screenshot."); // This is handled automatically in the main loop, but we can take an extra one if requested const screenshot = await computer.interface.screenshot(); return screenshot; } default: console.log(`Unrecognized action: ${action.type}`); break; } } ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/prompt-caching.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Prompt Caching sidebar_position: 8 description: How to use prompt caching in ComputerAgent and agent loops. --- Prompt caching is a cost-saving feature offered by some LLM API providers that helps avoid reprocessing the same prompt, improving efficiency and reducing costs for repeated or long-running tasks. ## Usage The `use_prompt_caching` argument is available for `ComputerAgent` and agent loops: ```python agent = ComputerAgent( ..., use_prompt_caching=True, ) ``` - **Type:** `bool` - **Default:** `False` - **Purpose:** Use prompt caching to avoid reprocessing the same prompt. ## Anthropic CUAs When using Anthropic-based CUAs (Claude models), setting `use_prompt_caching=True` will automatically add `{ "cache_control": "ephemeral" }` to your messages. This enables prompt caching for the session and can speed up repeated runs with the same prompt. <Callout title="Note"> This argument is only required for Anthropic CUAs. For other providers, it is ignored. </Callout> ## OpenAI Provider With the OpenAI provider, prompt caching is handled automatically for prompts of 1000+ tokens. You do **not** need to set `use_prompt_caching`—caching will occur for long prompts without any extra configuration. ## Example ```python from agent import ComputerAgent agent = ComputerAgent( model="anthropic/claude-3-5-sonnet-20241022", use_prompt_caching=True, ) ``` ## Implementation Details - For Anthropic: Adds `{ "cache_control": "ephemeral" }` to messages when enabled. - For OpenAI: Caching is automatic for long prompts; the argument is ignored. ## When to Use - Enable for Anthropic CUAs if you want to avoid reprocessing the same prompt in repeated or iterative tasks. - Not needed for OpenAI models unless you want explicit ephemeral cache control (not required for most users). ## See Also - [Agent Loops](./agent-loops) - [Migration Guide](./migration-guide) ``` -------------------------------------------------------------------------------- /tests/test_telemetry.py: -------------------------------------------------------------------------------- ```python """ Required environment variables: - CUA_API_KEY: API key for Cua cloud provider """ import os import pytest from pathlib import Path import sys # Load environment variables from .env file project_root = Path(__file__).parent.parent env_file = project_root / ".env" print(f"Loading environment from: {env_file}") from dotenv import load_dotenv load_dotenv(env_file) # Add paths to sys.path if needed pythonpath = os.environ.get("PYTHONPATH", "") for path in pythonpath.split(":"): if path and path not in sys.path: sys.path.insert(0, path) # Insert at beginning to prioritize print(f"Added to sys.path: {path}") from core.telemetry import record_event, is_telemetry_enabled, destroy_telemetry_client class TestTelemetry: def setup_method(self): """Reset environment variables before each test""" os.environ.pop('CUA_TELEMETRY', None) os.environ.pop('CUA_TELEMETRY_ENABLED', None) destroy_telemetry_client() def test_telemetry_disabled_when_cua_telemetry_is_off(self): """Should return false when CUA_TELEMETRY is off""" os.environ['CUA_TELEMETRY'] = 'off' assert is_telemetry_enabled() is False def test_telemetry_enabled_when_cua_telemetry_not_set(self): """Should return true when CUA_TELEMETRY is not set""" assert is_telemetry_enabled() is True def test_telemetry_disabled_when_cua_telemetry_enabled_is_0(self): """Should return false if CUA_TELEMETRY_ENABLED is 0""" os.environ['CUA_TELEMETRY_ENABLED'] = '0' assert is_telemetry_enabled() is False def test_send_test_event_to_posthog(self): """Should send a test event to PostHog""" # This should not raise an exception record_event('test_telemetry', {'message': 'Hello, world!'}) if __name__ == "__main__": # Run tests directly pytest.main([__file__, "-v"]) ``` -------------------------------------------------------------------------------- /libs/typescript/biome.json: -------------------------------------------------------------------------------- ```json { "$schema": "https://biomejs.dev/schemas/1.9.4/schema.json", "vcs": { "enabled": false, "clientKind": "git", "useIgnoreFile": false }, "files": { "ignoreUnknown": false, "include": ["core/**/*.ts", "computer/**/*.ts"], "ignore": ["dist", "node_modules"] }, "formatter": { "enabled": true, "useEditorconfig": true, "formatWithErrors": false, "indentStyle": "space", "indentWidth": 2, "lineEnding": "lf", "lineWidth": 80, "attributePosition": "auto", "bracketSpacing": true }, "organizeImports": { "enabled": true }, "linter": { "enabled": true, "rules": { "recommended": true, "style": { "useSelfClosingElements": "warn", "noUnusedTemplateLiteral": "warn", "noNonNullAssertion": "off" }, "a11y": { "useMediaCaption": "off", "useKeyWithClickEvents": "warn", "useKeyWithMouseEvents": "warn", "noSvgWithoutTitle": "off", "useButtonType": "warn", "noAutofocus": "off" }, "suspicious": { "noArrayIndexKey": "off" }, "correctness": { "noUnusedVariables": "warn", "noUnusedFunctionParameters": "warn", "noUnusedImports": "warn" }, "complexity": { "useOptionalChain": "info" }, "nursery": { "useSortedClasses": { "level": "warn", "fix": "safe", "options": { "attributes": ["className"], "functions": ["cn"] } } } } }, "javascript": { "formatter": { "jsxQuoteStyle": "double", "quoteProperties": "asNeeded", "trailingCommas": "es5", "semicolons": "always", "arrowParentheses": "always", "bracketSameLine": false, "quoteStyle": "single", "attributePosition": "auto", "bracketSpacing": true } } } ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/som/index.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Set-of-Mark description: Reference for the current version of the Set-of-Mark library. pypi: cua-som github: - https://github.com/trycua/cua/tree/main/libs/python/som --- <Callout>A corresponding <a href="https://github.com/trycua/cua/blob/main/examples/som_examples.py" target="_blank">Python example</a> is available for this documentation.</Callout> ## Overview The SOM library provides visual element detection and interaction capabilities. It is based on the [Set-of-Mark](https://arxiv.org/abs/2310.11441) research paper and the [OmniParser](https://github.com/microsoft/OmniParser) model. ## API Documentation ### OmniParser Class ```python class OmniParser: def __init__(self, device: str = "auto"): """Initialize the parser with automatic device detection""" def parse( self, image: PIL.Image, box_threshold: float = 0.3, iou_threshold: float = 0.1, use_ocr: bool = True, ocr_engine: str = "easyocr" ) -> ParseResult: """Parse UI elements from an image""" ``` ### ParseResult Object ```python @dataclass class ParseResult: elements: List[UIElement] # Detected elements visualized_image: PIL.Image # Annotated image processing_time: float # Time in seconds def to_dict(self) -> dict: """Convert to JSON-serializable dictionary""" def filter_by_type(self, elem_type: str) -> List[UIElement]: """Filter elements by type ('icon' or 'text')""" ``` ### UIElement ```python class UIElement(BaseModel): id: Optional[int] = Field(None) # Element ID (1-indexed) type: Literal["icon", "text"] # Element type bbox: BoundingBox # Bounding box coordinates { x1, y1, x2, y2 } interactivity: bool = Field(default=False) # Whether the element is interactive confidence: float = Field(default=1.0) # Detection confidence ``` ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/computer-server/WebSocket-API.mdx: -------------------------------------------------------------------------------- ```markdown --- title: WebSocket API Reference description: Reference for the /ws WebSocket endpoint of the Computer Server. --- # WebSocket API Reference The Computer Server exposes a WebSocket endpoint for real-time command execution and streaming results. - `ws://localhost:8000/ws` - `wss://your-container.containers.cloud.trycua.com:8443/ws` (cloud) ### Authentication (Cloud Only) For cloud containers, you must authenticate immediately after connecting: ```json { "command": "authenticate", "params": { "container_name": "your-container", "api_key": "your-api-key" } } ``` If authentication fails, the connection is closed. ### Command Format Send JSON messages: ```json { "command": "<command_name>", "params": { ... } } ``` ### Example (Python) ```python import websockets import asyncio import json async def main(): uri = "ws://localhost:8000/ws" async with websockets.connect(uri) as ws: await ws.send(json.dumps({"command": "version", "params": {}})) response = await ws.recv() print(response) asyncio.run(main()) ``` ### Example (Cloud) ```python import websockets import asyncio import json async def main(): uri = "wss://your-container.containers.cloud.trycua.com:8443/ws" async with websockets.connect(uri) as ws: await ws.send(json.dumps({ "command": "authenticate", "params": { "container_name": "your-container", "api_key": "your-api-key" } })) auth_response = await ws.recv() print(auth_response) await ws.send(json.dumps({"command": "version", "params": {}})) response = await ws.recv() print(response) asyncio.run(main()) ``` ### Response Format Each response is a JSON object: ```json { "success": true, ... } ``` ### Supported Commands See [Commands Reference](./Commands) for the full list of commands and parameters. ``` -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- ```toml [build-system] build-backend = "pdm.backend" requires = ["pdm-backend"] [project] authors = [{ name = "TryCua", email = "[email protected]" }] dependencies = [ "openai<1.100.0", "anthropic>=0.67.0", ] description = "CUA (Computer Use Agent) mono-repo" license = { text = "MIT" } name = "cua-workspace" readme = "README.md" requires-python = "<3.14,>=3.12" version = "0.1.0" [project.urls] repository = "https://github.com/trycua/cua" [dependency-groups] dev = [] examples = [] [tool.pdm] distribution = false [tool.pdm.dev-dependencies] dev = [ "-e core @ file:///${PROJECT_ROOT}/libs/python/core", "-e agent @ file:///${PROJECT_ROOT}/libs/python/agent", "-e computer @ file:///${PROJECT_ROOT}/libs/python/computer", "-e computer-server @ file:///${PROJECT_ROOT}/libs/python/computer-server", "-e cua-som @ file:///${PROJECT_ROOT}/libs/python/som", "-e mcp-server @ file:///${PROJECT_ROOT}/libs/python/mcp-server", "-e pylume @ file:///${PROJECT_ROOT}/libs/python/pylume", "black>=23.0.0", "ipykernel>=6.29.5", "jedi>=0.19.2", "jupyter>=1.0.0", "mypy>=1.10.0", "ruff>=0.9.2", "types-requests>=2.31.0", "hud-python[agent]==0.4.52" ] docs = ["mkdocs-material>=9.2.0", "mkdocs>=1.5.0"] test = [ "aioresponses>=0.7.4", "pytest-asyncio>=0.21.1", "pytest-cov>=4.1.0", "pytest-mock>=3.10.0", "pytest-xdist>=3.6.1", "pytest>=8.0.0", ] [tool.pdm.resolution] respect-source-order = true [tool.black] line-length = 100 target-version = ["py311"] [tool.ruff] fix = true line-length = 100 select = ["B", "E", "F", "I"] target-version = "py311" [tool.ruff.format] docstring-code-format = true [tool.mypy] check_untyped_defs = true disallow_untyped_defs = true ignore_missing_imports = true python_version = "3.11" show_error_codes = true strict = true warn_return_any = true warn_unused_ignores = false [tool.pytest.ini_options] asyncio_mode = "auto" python_files = "test_*.py" testpaths = ["libs/*/tests"] ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/som/configuration.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Configuration --- ### Detection Parameters #### Box Threshold (0.3) Controls the confidence threshold for accepting detections: <img src="/docs/img/som_box_threshold.png" alt="Illustration of confidence thresholds in object detection, with a high-confidence detection accepted and a low-confidence detection rejected." width="500px" /> - Higher values (0.3) yield more precise but fewer detections - Lower values (0.01) catch more potential icons but increase false positives - Default is 0.3 for optimal precision/recall balance #### IOU Threshold (0.1) Controls how overlapping detections are merged: <img src="/docs/img/som_iou_threshold.png" alt="Diagram showing Intersection over Union (IOU) with low overlap between two boxes kept separate and high overlap leading to merging." width="500px" /> - Lower values (0.1) more aggressively remove overlapping boxes - Higher values (0.5) allow more overlapping detections - Default is 0.1 to handle densely packed UI elements ### OCR Configuration - **Engine**: EasyOCR - Primary choice for all platforms - Fast initialization and processing - Built-in English language support - GPU acceleration when available - **Settings**: - Timeout: 5 seconds - Confidence threshold: 0.5 - Paragraph mode: Disabled - Language: English only ## Performance ### Hardware Acceleration #### MPS (Metal Performance Shaders) - Multi-scale detection (640px, 1280px, 1920px) - Test-time augmentation enabled - Half-precision (FP16) - Average detection time: ~0.4s - Best for production use when available #### CPU - Single-scale detection (1280px) - Full-precision (FP32) - Average detection time: ~1.3s - Reliable fallback option ### Example Output Structure ``` examples/output/ ├── {timestamp}_no_ocr/ │ ├── annotated_images/ │ │ └── screenshot_analyzed.png │ ├── screen_details.txt │ └── summary.json └── {timestamp}_ocr/ ├── annotated_images/ │ └── screenshot_analyzed.png ├── screen_details.txt └── summary.json ``` ``` -------------------------------------------------------------------------------- /libs/python/computer-server/examples/usage_example.py: -------------------------------------------------------------------------------- ```python #!/usr/bin/env python3 """ Example showing how to use the CUA Computer API as an imported package. """ import asyncio import logging from typing import TYPE_CHECKING # For type checking only if TYPE_CHECKING: from computer_api import Server # Setup logging logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" ) logger = logging.getLogger(__name__) # Example 1: Synchronous usage (blocks until server is stopped) def example_sync(): """ Example of synchronous server usage. This will block until interrupted. Run with: python3 -m examples.usage_example sync """ # Import directly to avoid any confusion from computer_api.server import Server server = Server(port=8080) print("Server started at http://localhost:8080") print("Press Ctrl+C to stop the server") try: server.start() # This will block until the server is stopped except KeyboardInterrupt: print("Server stopped by user") # Example 2: Asynchronous usage async def example_async(): """ Example of asynchronous server usage. This will start the server in the background and allow other operations to run concurrently. Run with: python3 -m examples.usage_example async """ # Import directly to avoid any confusion from computer_api.server import Server server = Server(port=8080) # Start the server in the background await server.start_async() print("Server is running in the background") print("Performing other tasks...") # Do other things while the server is running for i in range(5): print(f"Doing work iteration {i+1}/5...") await asyncio.sleep(2) print("Work complete, stopping server...") # Stop the server when done await server.stop() print("Server stopped") if __name__ == "__main__": import sys if len(sys.argv) > 1 and sys.argv[1] == "async": asyncio.run(example_async()) else: example_sync() ``` -------------------------------------------------------------------------------- /libs/lume/tests/VMVirtualizationServiceTests.swift: -------------------------------------------------------------------------------- ```swift import Foundation import Testing import Virtualization @testable import lume @Test("VMVirtualizationService starts correctly") func testVMVirtualizationServiceStart() async throws { let service = MockVMVirtualizationService() // Initial state #expect(await service.state == .stopped) #expect(await service.startCallCount == 0) // Start service try await service.start() #expect(await service.state == .running) #expect(await service.startCallCount == 1) } @Test("VMVirtualizationService stops correctly") func testVMVirtualizationServiceStop() async throws { let service = MockVMVirtualizationService() // Start then stop try await service.start() try await service.stop() #expect(await service.state == .stopped) #expect(await service.stopCallCount == 1) } @Test("VMVirtualizationService handles pause and resume") func testVMVirtualizationServicePauseResume() async throws { let service = MockVMVirtualizationService() // Start and pause try await service.start() try await service.pause() #expect(await service.state == .paused) #expect(await service.pauseCallCount == 1) // Resume try await service.resume() #expect(await service.state == .running) #expect(await service.resumeCallCount == 1) } @Test("VMVirtualizationService handles operation failures") func testVMVirtualizationServiceFailures() async throws { let service = MockVMVirtualizationService() await service.configure(shouldFail: true) // Test start failure do { try await service.start() #expect(Bool(false), "Expected start to throw") } catch let error as VMError { switch error { case .internalError(let message): #expect(message == "Mock operation failed") default: #expect(Bool(false), "Unexpected error type: \(error)") } } #expect(await service.state == .stopped) #expect(await service.startCallCount == 1) } ``` -------------------------------------------------------------------------------- /libs/python/som/pyproject.toml: -------------------------------------------------------------------------------- ```toml [build-system] requires = ["pdm-backend"] build-backend = "pdm.backend" [project] name = "cua-som" version = "0.1.0" description = "Computer Vision and OCR library for detecting and analyzing UI elements" authors = [ { name = "TryCua", email = "[email protected]" } ] dependencies = [ "torch>=2.2.1", "torchvision>=0.17.1", "ultralytics>=8.1.28", "easyocr>=1.7.1", "numpy>=1.26.4", "pillow>=10.2.0", "setuptools>=75.8.1", "opencv-python-headless>=4.11.0.86", "matplotlib>=3.8.3", "huggingface-hub>=0.21.4", "supervision>=0.25.1", "typing-extensions>=4.9.0", "pydantic>=2.6.3" ] requires-python = ">=3.11" readme = "README.md" license = {text = "AGPL-3.0-or-later"} keywords = ["computer-vision", "ocr", "ui-analysis", "icon-detection"] classifiers = [ "Development Status :: 4 - Beta", "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)", "Intended Audience :: Developers", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.11", "Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: Scientific/Engineering :: Image Recognition" ] [project.urls] Homepage = "https://github.com/trycua/cua" Repository = "https://github.com/trycua/cua" Documentation = "https://github.com/trycua/cua/tree/main/docs" [tool.pdm] distribution = true package-type = "library" src-layout = false [tool.pdm.build] includes = ["som/"] source-includes = ["tests/", "README.md", "LICENSE"] [tool.black] line-length = 100 target-version = ["py311"] [tool.ruff] line-length = 100 target-version = "py311" select = ["E", "F", "B", "I"] fix = true [tool.ruff.format] docstring-code-format = true [tool.mypy] strict = true python_version = "3.11" ignore_missing_imports = true disallow_untyped_defs = true check_untyped_defs = true warn_return_any = true show_error_codes = true warn_unused_ignores = false [tool.pytest.ini_options] asyncio_mode = "auto" testpaths = ["tests"] python_files = "test_*.py" ``` -------------------------------------------------------------------------------- /libs/lumier/src/hooks/on-logon.sh: -------------------------------------------------------------------------------- ```bash #!/bin/bash # Arguments passed from execute_remote_script in vm.sh # $1: VNC_PASSWORD # $2: HOST_SHARED_PATH (Path inside VM where host shared dir is mounted, e.g., /Volumes/My Shared Files) VNC_PASSWORD="$1" # IMPORTANT: In the VM, the shared folder is always mounted at this fixed location HOST_SHARED_PATH="/Volumes/My Shared Files" # Set default value for VNC_DEBUG if not provided VNC_DEBUG=${VNC_DEBUG:-0} # Define the path to the user's optional on-logon script within the shared folder USER_ON_LOGON_SCRIPT_PATH="$HOST_SHARED_PATH/lifecycle/on-logon.sh" # Show basic information when debug is enabled if [ "$VNC_DEBUG" = "1" ]; then echo "[VM] Lumier lifecycle script starting" echo "[VM] Looking for user script: $USER_ON_LOGON_SCRIPT_PATH" fi # Check if the user-provided script exists if [ -f "$USER_ON_LOGON_SCRIPT_PATH" ]; then if [ "$VNC_DEBUG" = "1" ]; then echo "[VM] Found user script: $USER_ON_LOGON_SCRIPT_PATH" fi # Always show what script we're executing echo "[VM] Executing user lifecycle script" # Make script executable chmod +x "$USER_ON_LOGON_SCRIPT_PATH" # Execute the user script in a subshell with error output captured "$USER_ON_LOGON_SCRIPT_PATH" "$VNC_PASSWORD" "$HOST_SHARED_PATH" 2>&1 # Capture exit code USER_SCRIPT_EXIT_CODE=$? # Always report script execution results if [ $USER_SCRIPT_EXIT_CODE -eq 0 ]; then echo "[VM] User lifecycle script completed successfully" else echo "[VM] User lifecycle script failed with exit code: $USER_SCRIPT_EXIT_CODE" fi # Check results (only in debug mode) if [ "$VNC_DEBUG" = "1" ]; then # List any files created by the script echo "[VM] Files created by user script:" ls -la /Users/lume/Desktop/hello_*.txt 2>/dev/null || echo "[VM] No script-created files found" fi else if [ "$VNC_DEBUG" = "1" ]; then echo "[VM] No user lifecycle script found" fi fi exit 0 # Ensure the entry point script exits cleanly ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/callbacks/cost-saving.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Cost Optimization description: Budget management and image retention for cost optimization --- # Cost Optimization Callbacks Optimize agent costs with budget management and image retention callbacks. ## Budget Manager Callbacks Example ```python from agent.callbacks import BudgetManagerCallback agent = ComputerAgent( model="anthropic/claude-3-5-sonnet-20241022", tools=[computer], callbacks=[ BudgetManagerCallback( max_budget=5.0, # $5 limit reset_after_each_run=False, raise_error=True ) ] ) ``` ## Budget Manager Shorthand ```python # Simple budget limit agent = ComputerAgent( model="anthropic/claude-3-5-sonnet-20241022", max_trajectory_budget=5.0 # $5 limit ) ``` **Or with options:** ```python # Advanced budget configuration agent = ComputerAgent( model="anthropic/claude-3-5-sonnet-20241022", max_trajectory_budget={ "max_budget": 10.0, "raise_error": True, # Raise error when exceeded "reset_after_each_run": False # Persistent across runs } ) ``` ## Image Retention Callbacks Example ```python from agent.callbacks import ImageRetentionCallback agent = ComputerAgent( model="anthropic/claude-3-5-sonnet-20241022", tools=[computer], callbacks=[ ImageRetentionCallback(only_n_most_recent_images=3) ] ) ``` ## Image Retention Shorthand ```python agent = ComputerAgent( model="anthropic/claude-3-5-sonnet-20241022", tools=[computer], only_n_most_recent_images=3 # Auto-adds ImageRetentionCallback ) ``` ## Combined Cost Optimization ```python agent = ComputerAgent( model="anthropic/claude-3-5-sonnet-20241022", tools=[computer], max_trajectory_budget=5.0, # Budget limit only_n_most_recent_images=3, # Image retention trajectory_dir="trajectories" # Track spending ) ``` ## Budget Manager Options - `max_budget`: Dollar limit for trajectory - `reset_after_each_run`: Reset budget per run (default: True) - `raise_error`: Raise exception vs. graceful stop (default: False) ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/callbacks/index.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Callbacks --- Callbacks in the Agent SDK provide hooks into the agent's lifecycle, allowing for custom functionality to be executed at various stages of an agent's run. They enable extensibility by allowing developers to integrate their own logic for tasks such as logging, cost management, and data anonymization. ## Usage You can add preprocessing and postprocessing hooks using callbacks, or write your own by subclassing `AsyncCallbackHandler`. ### Built-in Callbacks Built-in callbacks can be used as follows: ```python from agent.callbacks import ( ImageRetentionCallback, TrajectorySaverCallback, BudgetManagerCallback, LoggingCallback ) agent = ComputerAgent( model="anthropic/claude-3-5-sonnet-20241022", tools=[computer], callbacks=[ ImageRetentionCallback(only_n_most_recent_images=3), TrajectorySaverCallback(trajectory_dir="trajectories"), BudgetManagerCallback(max_budget=10.0, raise_error=True), LoggingCallback(level=logging.INFO) ] ) ``` The following built-in callbacks are available: - [BudgetManagerCallback](callbacks/cost-saving): Stops execution when budget exceeded - [LoggingCallback](callbacks/trajectories): Logs agent activities - **ImageRetentionCallback**: Limits recent images in context - **TrajectorySaverCallback**: Saves conversation trajectories - [PII Anonymization](callbacks/pii-anonymization) ### Custom Callbacks Create custom callbacks using knowlege of the callback lifecycle as described in [Agent Lifecycle](callbacks/agent-lifecycle). ```python from agent.callbacks.base import AsyncCallbackHandler class CustomCallback(AsyncCallbackHandler): async def on_llm_start(self, messages): """Preprocess messages before LLM call""" # Add custom preprocessing logic return messages async def on_llm_end(self, messages): """Postprocess messages after LLM call""" # Add custom postprocessing logic return messages async def on_usage(self, usage): """Track usage information""" print(f"Tokens used: {usage.total_tokens}") ``` ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/computers/base.py: -------------------------------------------------------------------------------- ```python """ Base computer interface protocol for agent interactions. """ from typing import Protocol, Literal, List, Dict, Any, Union, Optional, runtime_checkable @runtime_checkable class AsyncComputerHandler(Protocol): """Protocol defining the interface for computer interactions.""" # ==== Computer-Use-Preview Action Space ==== async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]: """Get the current environment type.""" ... async def get_dimensions(self) -> tuple[int, int]: """Get screen dimensions as (width, height).""" ... async def screenshot(self) -> str: """Take a screenshot and return as base64 string.""" ... async def click(self, x: int, y: int, button: str = "left") -> None: """Click at coordinates with specified button.""" ... async def double_click(self, x: int, y: int) -> None: """Double click at coordinates.""" ... async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: """Scroll at coordinates with specified scroll amounts.""" ... async def type(self, text: str) -> None: """Type text.""" ... async def wait(self, ms: int = 1000) -> None: """Wait for specified milliseconds.""" ... async def move(self, x: int, y: int) -> None: """Move cursor to coordinates.""" ... async def keypress(self, keys: Union[List[str], str]) -> None: """Press key combination.""" ... async def drag(self, path: List[Dict[str, int]]) -> None: """Drag along specified path.""" ... async def get_current_url(self) -> str: """Get current URL (for browser environments).""" ... # ==== Anthropic Action Space ==== async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None: """Left mouse down at coordinates.""" ... async def left_mouse_up(self, x: Optional[int] = None, y: Optional[int] = None) -> None: """Left mouse up at coordinates.""" ... ``` -------------------------------------------------------------------------------- /libs/typescript/computer/tests/interface/factory.test.ts: -------------------------------------------------------------------------------- ```typescript import { describe, expect, it } from 'vitest'; import { InterfaceFactory } from '../../src/interface/factory.ts'; import { LinuxComputerInterface } from '../../src/interface/linux.ts'; import { MacOSComputerInterface } from '../../src/interface/macos.ts'; import { WindowsComputerInterface } from '../../src/interface/windows.ts'; import { OSType } from '../../src/types.ts'; describe('InterfaceFactory', () => { const testParams = { ipAddress: '192.168.1.100', username: 'testuser', password: 'testpass', apiKey: 'test-api-key', vmName: 'test-vm', }; describe('createInterfaceForOS', () => { it('should create MacOSComputerInterface for macOS', () => { const interface_ = InterfaceFactory.createInterfaceForOS( OSType.MACOS, testParams.ipAddress, testParams.apiKey, testParams.vmName ); expect(interface_).toBeInstanceOf(MacOSComputerInterface); }); it('should create LinuxComputerInterface for Linux', () => { const interface_ = InterfaceFactory.createInterfaceForOS( OSType.LINUX, testParams.ipAddress, testParams.apiKey, testParams.vmName ); expect(interface_).toBeInstanceOf(LinuxComputerInterface); }); it('should create WindowsComputerInterface for Windows', () => { const interface_ = InterfaceFactory.createInterfaceForOS( OSType.WINDOWS, testParams.ipAddress, testParams.apiKey, testParams.vmName ); expect(interface_).toBeInstanceOf(WindowsComputerInterface); }); it('should throw error for unsupported OS type', () => { expect(() => { InterfaceFactory.createInterfaceForOS( 'unsupported' as OSType, testParams.ipAddress, testParams.apiKey, testParams.vmName ); }).toThrow('Unsupported OS type: unsupported'); }); it('should create interface without API key and VM name', () => { const interface_ = InterfaceFactory.createInterfaceForOS( OSType.MACOS, testParams.ipAddress ); expect(interface_).toBeInstanceOf(MacOSComputerInterface); }); }); }); ``` -------------------------------------------------------------------------------- /.github/scripts/get_pyproject_version.py: -------------------------------------------------------------------------------- ```python #!/usr/bin/env python3 """ Verifies that the version in pyproject.toml matches the expected version. Usage: python get_pyproject_version.py <pyproject_path> <expected_version> Exit codes: 0 - Versions match 1 - Versions don't match or error occurred """ import sys try: import tomllib except ImportError: # Fallback for Python < 3.11 import toml as tomllib def main(): if len(sys.argv) != 3: print("Usage: python get_pyproject_version.py <pyproject_path> <expected_version>", file=sys.stderr) sys.exit(1) pyproject_path = sys.argv[1] expected_version = sys.argv[2] # tomllib requires binary mode try: with open(pyproject_path, 'rb') as f: data = tomllib.load(f) except FileNotFoundError: print(f"❌ ERROR: File not found: {pyproject_path}", file=sys.stderr) sys.exit(1) except Exception as e: # Fallback to toml if using the old library or handle other errors try: import toml data = toml.load(pyproject_path) except FileNotFoundError: print(f"❌ ERROR: File not found: {pyproject_path}", file=sys.stderr) sys.exit(1) except Exception as toml_err: print(f"❌ ERROR: Failed to parse TOML file: {e}", file=sys.stderr) sys.exit(1) actual_version = data.get('project', {}).get('version') if not actual_version: print("❌ ERROR: No version found in pyproject.toml", file=sys.stderr) sys.exit(1) if actual_version != expected_version: print("❌ Version mismatch detected!", file=sys.stderr) print(f" pyproject.toml version: {actual_version}", file=sys.stderr) print(f" Expected version: {expected_version}", file=sys.stderr) print("", file=sys.stderr) print("The version in pyproject.toml must match the version being published.", file=sys.stderr) print(f"Please update pyproject.toml to version {expected_version} or use the correct tag.", file=sys.stderr) sys.exit(1) print(f"✅ Version consistency check passed: {actual_version}") sys.exit(0) if __name__ == '__main__': main() ``` -------------------------------------------------------------------------------- /libs/kasm/src/ubuntu/install/firefox/custom_startup.sh: -------------------------------------------------------------------------------- ```bash #!/usr/bin/env bash set -ex START_COMMAND="firefox" PGREP="firefox" export MAXIMIZE="true" export MAXIMIZE_NAME="Mozilla Firefox" MAXIMIZE_SCRIPT=$STARTUPDIR/maximize_window.sh DEFAULT_ARGS="" ARGS=${APP_ARGS:-$DEFAULT_ARGS} options=$(getopt -o gau: -l go,assign,url: -n "$0" -- "$@") || exit eval set -- "$options" while [[ $1 != -- ]]; do case $1 in -g|--go) GO='true'; shift 1;; -a|--assign) ASSIGN='true'; shift 1;; -u|--url) OPT_URL=$2; shift 2;; *) echo "bad option: $1" >&2; exit 1;; esac done shift # Process non-option arguments. for arg; do echo "arg! $arg" done FORCE=$2 # run with vgl if GPU is available if [ -f /opt/VirtualGL/bin/vglrun ] && [ ! -z "${KASM_EGL_CARD}" ] && [ ! -z "${KASM_RENDERD}" ] && [ -O "${KASM_RENDERD}" ] && [ -O "${KASM_EGL_CARD}" ] ; then START_COMMAND="/opt/VirtualGL/bin/vglrun -d ${KASM_EGL_CARD} $START_COMMAND" fi kasm_exec() { if [ -n "$OPT_URL" ] ; then URL=$OPT_URL elif [ -n "$1" ] ; then URL=$1 fi # Since we are execing into a container that already has the browser running from startup, # when we don't have a URL to open we want to do nothing. Otherwise a second browser instance would open. if [ -n "$URL" ] ; then /usr/bin/filter_ready /usr/bin/desktop_ready bash ${MAXIMIZE_SCRIPT} & $START_COMMAND $ARGS $OPT_URL else echo "No URL specified for exec command. Doing nothing." fi } kasm_startup() { if [ -n "$KASM_URL" ] ; then URL=$KASM_URL elif [ -z "$URL" ] ; then URL=$LAUNCH_URL fi if [ -z "$DISABLE_CUSTOM_STARTUP" ] || [ -n "$FORCE" ] ; then echo "Entering process startup loop" set +x while true do if ! pgrep -x $PGREP > /dev/null then /usr/bin/filter_ready /usr/bin/desktop_ready set +e bash ${MAXIMIZE_SCRIPT} & $START_COMMAND $ARGS $URL set -e fi sleep 1 done set -x fi } if [ -n "$GO" ] || [ -n "$ASSIGN" ] ; then kasm_exec else kasm_startup fi ```