This is page 1 of 16. Use http://codebase.md/trycua/cua?page={x} to view the full context. # Directory Structure ``` ├── .all-contributorsrc ├── .cursorignore ├── .devcontainer │ ├── devcontainer.json │ ├── post-install.sh │ └── README.md ├── .dockerignore ├── .gitattributes ├── .github │ ├── FUNDING.yml │ ├── scripts │ │ ├── get_pyproject_version.py │ │ └── tests │ │ ├── __init__.py │ │ ├── README.md │ │ └── test_get_pyproject_version.py │ └── workflows │ ├── ci-lume.yml │ ├── docker-publish-kasm.yml │ ├── docker-publish-xfce.yml │ ├── docker-reusable-publish.yml │ ├── npm-publish-computer.yml │ ├── npm-publish-core.yml │ ├── publish-lume.yml │ ├── pypi-publish-agent.yml │ ├── pypi-publish-computer-server.yml │ ├── pypi-publish-computer.yml │ ├── pypi-publish-core.yml │ ├── pypi-publish-mcp-server.yml │ ├── pypi-publish-pylume.yml │ ├── pypi-publish-som.yml │ ├── pypi-reusable-publish.yml │ └── test-validation-script.yml ├── .gitignore ├── .vscode │ ├── docs.code-workspace │ ├── launch.json │ ├── libs-ts.code-workspace │ ├── lume.code-workspace │ ├── lumier.code-workspace │ └── py.code-workspace ├── blog │ ├── app-use.md │ ├── assets │ │ ├── composite-agents.png │ │ ├── docker-ubuntu-support.png │ │ ├── hack-booth.png │ │ ├── hack-closing-ceremony.jpg │ │ ├── hack-cua-ollama-hud.jpeg │ │ ├── hack-leaderboard.png │ │ ├── hack-the-north.png │ │ ├── hack-winners.jpeg │ │ ├── hack-workshop.jpeg │ │ ├── hud-agent-evals.png │ │ └── trajectory-viewer.jpeg │ ├── bringing-computer-use-to-the-web.md │ ├── build-your-own-operator-on-macos-1.md │ ├── build-your-own-operator-on-macos-2.md │ ├── composite-agents.md │ ├── cua-hackathon.md │ ├── hack-the-north.md │ ├── hud-agent-evals.md │ ├── human-in-the-loop.md │ ├── introducing-cua-cloud-containers.md │ ├── lume-to-containerization.md │ ├── sandboxed-python-execution.md │ ├── training-computer-use-models-trajectories-1.md │ ├── trajectory-viewer.md │ ├── ubuntu-docker-support.md │ └── windows-sandbox.md ├── CONTRIBUTING.md ├── Development.md ├── Dockerfile ├── docs │ ├── .gitignore │ ├── .prettierrc │ ├── content │ │ └── docs │ │ ├── agent-sdk │ │ │ ├── agent-loops.mdx │ │ │ ├── benchmarks │ │ │ │ ├── index.mdx │ │ │ │ ├── interactive.mdx │ │ │ │ ├── introduction.mdx │ │ │ │ ├── meta.json │ │ │ │ ├── osworld-verified.mdx │ │ │ │ ├── screenspot-pro.mdx │ │ │ │ └── screenspot-v2.mdx │ │ │ ├── callbacks │ │ │ │ ├── agent-lifecycle.mdx │ │ │ │ ├── cost-saving.mdx │ │ │ │ ├── index.mdx │ │ │ │ ├── logging.mdx │ │ │ │ ├── meta.json │ │ │ │ ├── pii-anonymization.mdx │ │ │ │ └── trajectories.mdx │ │ │ ├── chat-history.mdx │ │ │ ├── custom-computer-handlers.mdx │ │ │ ├── custom-tools.mdx │ │ │ ├── customizing-computeragent.mdx │ │ │ ├── integrations │ │ │ │ ├── hud.mdx │ │ │ │ └── meta.json │ │ │ ├── message-format.mdx │ │ │ ├── meta.json │ │ │ ├── migration-guide.mdx │ │ │ ├── prompt-caching.mdx │ │ │ ├── supported-agents │ │ │ │ ├── composed-agents.mdx │ │ │ │ ├── computer-use-agents.mdx │ │ │ │ ├── grounding-models.mdx │ │ │ │ ├── human-in-the-loop.mdx │ │ │ │ └── meta.json │ │ │ ├── supported-model-providers │ │ │ │ ├── index.mdx │ │ │ │ └── local-models.mdx │ │ │ └── usage-tracking.mdx │ │ ├── computer-sdk │ │ │ ├── commands.mdx │ │ │ ├── computer-ui.mdx │ │ │ ├── computers.mdx │ │ │ ├── meta.json │ │ │ └── sandboxed-python.mdx │ │ ├── index.mdx │ │ ├── libraries │ │ │ ├── agent │ │ │ │ └── index.mdx │ │ │ ├── computer │ │ │ │ └── index.mdx │ │ │ ├── computer-server │ │ │ │ ├── Commands.mdx │ │ │ │ ├── index.mdx │ │ │ │ ├── REST-API.mdx │ │ │ │ └── WebSocket-API.mdx │ │ │ ├── core │ │ │ │ └── index.mdx │ │ │ ├── lume │ │ │ │ ├── cli-reference.mdx │ │ │ │ ├── faq.md │ │ │ │ ├── http-api.mdx │ │ │ │ ├── index.mdx │ │ │ │ ├── installation.mdx │ │ │ │ ├── meta.json │ │ │ │ └── prebuilt-images.mdx │ │ │ ├── lumier │ │ │ │ ├── building-lumier.mdx │ │ │ │ ├── docker-compose.mdx │ │ │ │ ├── docker.mdx │ │ │ │ ├── index.mdx │ │ │ │ ├── installation.mdx │ │ │ │ └── meta.json │ │ │ ├── mcp-server │ │ │ │ ├── client-integrations.mdx │ │ │ │ ├── configuration.mdx │ │ │ │ ├── index.mdx │ │ │ │ ├── installation.mdx │ │ │ │ ├── llm-integrations.mdx │ │ │ │ ├── meta.json │ │ │ │ ├── tools.mdx │ │ │ │ └── usage.mdx │ │ │ └── som │ │ │ ├── configuration.mdx │ │ │ └── index.mdx │ │ ├── meta.json │ │ ├── quickstart-cli.mdx │ │ ├── quickstart-devs.mdx │ │ └── telemetry.mdx │ ├── next.config.mjs │ ├── package-lock.json │ ├── package.json │ ├── pnpm-lock.yaml │ ├── postcss.config.mjs │ ├── public │ │ └── img │ │ ├── agent_gradio_ui.png │ │ ├── agent.png │ │ ├── cli.png │ │ ├── computer.png │ │ ├── som_box_threshold.png │ │ └── som_iou_threshold.png │ ├── README.md │ ├── source.config.ts │ ├── src │ │ ├── app │ │ │ ├── (home) │ │ │ │ ├── [[...slug]] │ │ │ │ │ └── page.tsx │ │ │ │ └── layout.tsx │ │ │ ├── api │ │ │ │ └── search │ │ │ │ └── route.ts │ │ │ ├── favicon.ico │ │ │ ├── global.css │ │ │ ├── layout.config.tsx │ │ │ ├── layout.tsx │ │ │ ├── llms.mdx │ │ │ │ └── [[...slug]] │ │ │ │ └── route.ts │ │ │ └── llms.txt │ │ │ └── route.ts │ │ ├── assets │ │ │ ├── discord-black.svg │ │ │ ├── discord-white.svg │ │ │ ├── logo-black.svg │ │ │ └── logo-white.svg │ │ ├── components │ │ │ ├── iou.tsx │ │ │ └── mermaid.tsx │ │ ├── lib │ │ │ ├── llms.ts │ │ │ └── source.ts │ │ └── mdx-components.tsx │ └── tsconfig.json ├── examples │ ├── agent_examples.py │ ├── agent_ui_examples.py │ ├── computer_examples_windows.py │ ├── computer_examples.py │ ├── computer_ui_examples.py │ ├── computer-example-ts │ │ ├── .env.example │ │ ├── .gitignore │ │ ├── .prettierrc │ │ ├── package-lock.json │ │ ├── package.json │ │ ├── pnpm-lock.yaml │ │ ├── README.md │ │ ├── src │ │ │ ├── helpers.ts │ │ │ └── index.ts │ │ └── tsconfig.json │ ├── docker_examples.py │ ├── evals │ │ ├── hud_eval_examples.py │ │ └── wikipedia_most_linked.txt │ ├── pylume_examples.py │ ├── sandboxed_functions_examples.py │ ├── som_examples.py │ ├── utils.py │ └── winsandbox_example.py ├── img │ ├── agent_gradio_ui.png │ ├── agent.png │ ├── cli.png │ ├── computer.png │ ├── logo_black.png │ └── logo_white.png ├── libs │ ├── kasm │ │ ├── Dockerfile │ │ ├── LICENSE │ │ ├── README.md │ │ └── src │ │ └── ubuntu │ │ └── install │ │ └── firefox │ │ ├── custom_startup.sh │ │ ├── firefox.desktop │ │ └── install_firefox.sh │ ├── lume │ │ ├── .cursorignore │ │ ├── CONTRIBUTING.md │ │ ├── Development.md │ │ ├── img │ │ │ └── cli.png │ │ ├── Package.resolved │ │ ├── Package.swift │ │ ├── README.md │ │ ├── resources │ │ │ └── lume.entitlements │ │ ├── scripts │ │ │ ├── build │ │ │ │ ├── build-debug.sh │ │ │ │ ├── build-release-notarized.sh │ │ │ │ └── build-release.sh │ │ │ └── install.sh │ │ ├── src │ │ │ ├── Commands │ │ │ │ ├── Clone.swift │ │ │ │ ├── Config.swift │ │ │ │ ├── Create.swift │ │ │ │ ├── Delete.swift │ │ │ │ ├── Get.swift │ │ │ │ ├── Images.swift │ │ │ │ ├── IPSW.swift │ │ │ │ ├── List.swift │ │ │ │ ├── Logs.swift │ │ │ │ ├── Options │ │ │ │ │ └── FormatOption.swift │ │ │ │ ├── Prune.swift │ │ │ │ ├── Pull.swift │ │ │ │ ├── Push.swift │ │ │ │ ├── Run.swift │ │ │ │ ├── Serve.swift │ │ │ │ ├── Set.swift │ │ │ │ └── Stop.swift │ │ │ ├── ContainerRegistry │ │ │ │ ├── ImageContainerRegistry.swift │ │ │ │ ├── ImageList.swift │ │ │ │ └── ImagesPrinter.swift │ │ │ ├── Errors │ │ │ │ └── Errors.swift │ │ │ ├── FileSystem │ │ │ │ ├── Home.swift │ │ │ │ ├── Settings.swift │ │ │ │ ├── VMConfig.swift │ │ │ │ ├── VMDirectory.swift │ │ │ │ └── VMLocation.swift │ │ │ ├── LumeController.swift │ │ │ ├── Main.swift │ │ │ ├── Server │ │ │ │ ├── Handlers.swift │ │ │ │ ├── HTTP.swift │ │ │ │ ├── Requests.swift │ │ │ │ ├── Responses.swift │ │ │ │ └── Server.swift │ │ │ ├── Utils │ │ │ │ ├── CommandRegistry.swift │ │ │ │ ├── CommandUtils.swift │ │ │ │ ├── Logger.swift │ │ │ │ ├── NetworkUtils.swift │ │ │ │ ├── Path.swift │ │ │ │ ├── ProcessRunner.swift │ │ │ │ ├── ProgressLogger.swift │ │ │ │ ├── String.swift │ │ │ │ └── Utils.swift │ │ │ ├── Virtualization │ │ │ │ ├── DarwinImageLoader.swift │ │ │ │ ├── DHCPLeaseParser.swift │ │ │ │ ├── ImageLoaderFactory.swift │ │ │ │ └── VMVirtualizationService.swift │ │ │ ├── VM │ │ │ │ ├── DarwinVM.swift │ │ │ │ ├── LinuxVM.swift │ │ │ │ ├── VM.swift │ │ │ │ ├── VMDetails.swift │ │ │ │ ├── VMDetailsPrinter.swift │ │ │ │ ├── VMDisplayResolution.swift │ │ │ │ └── VMFactory.swift │ │ │ └── VNC │ │ │ ├── PassphraseGenerator.swift │ │ │ └── VNCService.swift │ │ └── tests │ │ ├── Mocks │ │ │ ├── MockVM.swift │ │ │ ├── MockVMVirtualizationService.swift │ │ │ └── MockVNCService.swift │ │ ├── VM │ │ │ └── VMDetailsPrinterTests.swift │ │ ├── VMTests.swift │ │ ├── VMVirtualizationServiceTests.swift │ │ └── VNCServiceTests.swift │ ├── lumier │ │ ├── .dockerignore │ │ ├── Dockerfile │ │ ├── README.md │ │ └── src │ │ ├── bin │ │ │ └── entry.sh │ │ ├── config │ │ │ └── constants.sh │ │ ├── hooks │ │ │ └── on-logon.sh │ │ └── lib │ │ ├── utils.sh │ │ └── vm.sh │ ├── python │ │ ├── agent │ │ │ ├── agent │ │ │ │ ├── __init__.py │ │ │ │ ├── __main__.py │ │ │ │ ├── adapters │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── huggingfacelocal_adapter.py │ │ │ │ │ ├── human_adapter.py │ │ │ │ │ ├── mlxvlm_adapter.py │ │ │ │ │ └── models │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── generic.py │ │ │ │ │ ├── internvl.py │ │ │ │ │ ├── opencua.py │ │ │ │ │ └── qwen2_5_vl.py │ │ │ │ ├── agent.py │ │ │ │ ├── callbacks │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── budget_manager.py │ │ │ │ │ ├── image_retention.py │ │ │ │ │ ├── logging.py │ │ │ │ │ ├── operator_validator.py │ │ │ │ │ ├── pii_anonymization.py │ │ │ │ │ ├── prompt_instructions.py │ │ │ │ │ ├── telemetry.py │ │ │ │ │ └── trajectory_saver.py │ │ │ │ ├── cli.py │ │ │ │ ├── computers │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── cua.py │ │ │ │ │ └── custom.py │ │ │ │ ├── decorators.py │ │ │ │ ├── human_tool │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── __main__.py │ │ │ │ │ ├── server.py │ │ │ │ │ └── ui.py │ │ │ │ ├── integrations │ │ │ │ │ └── hud │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── agent.py │ │ │ │ │ └── proxy.py │ │ │ │ ├── loops │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── anthropic.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── composed_grounded.py │ │ │ │ │ ├── glm45v.py │ │ │ │ │ ├── gta1.py │ │ │ │ │ ├── holo.py │ │ │ │ │ ├── internvl.py │ │ │ │ │ ├── model_types.csv │ │ │ │ │ ├── moondream3.py │ │ │ │ │ ├── omniparser.py │ │ │ │ │ ├── openai.py │ │ │ │ │ ├── opencua.py │ │ │ │ │ └── uitars.py │ │ │ │ ├── proxy │ │ │ │ │ ├── examples.py │ │ │ │ │ └── handlers.py │ │ │ │ ├── responses.py │ │ │ │ ├── types.py │ │ │ │ └── ui │ │ │ │ ├── __init__.py │ │ │ │ ├── __main__.py │ │ │ │ └── gradio │ │ │ │ ├── __init__.py │ │ │ │ ├── app.py │ │ │ │ └── ui_components.py │ │ │ ├── benchmarks │ │ │ │ ├── .gitignore │ │ │ │ ├── contrib.md │ │ │ │ ├── interactive.py │ │ │ │ ├── models │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ └── gta1.py │ │ │ │ ├── README.md │ │ │ │ ├── ss-pro.py │ │ │ │ ├── ss-v2.py │ │ │ │ └── utils.py │ │ │ ├── example.py │ │ │ ├── pyproject.toml │ │ │ └── README.md │ │ ├── computer │ │ │ ├── computer │ │ │ │ ├── __init__.py │ │ │ │ ├── computer.py │ │ │ │ ├── diorama_computer.py │ │ │ │ ├── helpers.py │ │ │ │ ├── interface │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── factory.py │ │ │ │ │ ├── generic.py │ │ │ │ │ ├── linux.py │ │ │ │ │ ├── macos.py │ │ │ │ │ ├── models.py │ │ │ │ │ └── windows.py │ │ │ │ ├── logger.py │ │ │ │ ├── models.py │ │ │ │ ├── providers │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── cloud │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── provider.py │ │ │ │ │ ├── docker │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── provider.py │ │ │ │ │ ├── factory.py │ │ │ │ │ ├── lume │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── provider.py │ │ │ │ │ ├── lume_api.py │ │ │ │ │ ├── lumier │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── provider.py │ │ │ │ │ └── winsandbox │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── provider.py │ │ │ │ │ └── setup_script.ps1 │ │ │ │ ├── ui │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── __main__.py │ │ │ │ │ └── gradio │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── app.py │ │ │ │ └── utils.py │ │ │ ├── poetry.toml │ │ │ ├── pyproject.toml │ │ │ └── README.md │ │ ├── computer-server │ │ │ ├── computer_server │ │ │ │ ├── __init__.py │ │ │ │ ├── __main__.py │ │ │ │ ├── cli.py │ │ │ │ ├── diorama │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── diorama_computer.py │ │ │ │ │ ├── diorama.py │ │ │ │ │ ├── draw.py │ │ │ │ │ ├── macos.py │ │ │ │ │ └── safezone.py │ │ │ │ ├── handlers │ │ │ │ │ ├── base.py │ │ │ │ │ ├── factory.py │ │ │ │ │ ├── generic.py │ │ │ │ │ ├── linux.py │ │ │ │ │ ├── macos.py │ │ │ │ │ └── windows.py │ │ │ │ ├── main.py │ │ │ │ ├── server.py │ │ │ │ └── watchdog.py │ │ │ ├── examples │ │ │ │ ├── __init__.py │ │ │ │ └── usage_example.py │ │ │ ├── pyproject.toml │ │ │ ├── README.md │ │ │ ├── run_server.py │ │ │ └── test_connection.py │ │ ├── core │ │ │ ├── core │ │ │ │ ├── __init__.py │ │ │ │ └── telemetry │ │ │ │ ├── __init__.py │ │ │ │ └── posthog.py │ │ │ ├── poetry.toml │ │ │ ├── pyproject.toml │ │ │ └── README.md │ │ ├── mcp-server │ │ │ ├── mcp_server │ │ │ │ ├── __init__.py │ │ │ │ ├── __main__.py │ │ │ │ └── server.py │ │ │ ├── pyproject.toml │ │ │ ├── README.md │ │ │ └── scripts │ │ │ ├── install_mcp_server.sh │ │ │ └── start_mcp_server.sh │ │ ├── pylume │ │ │ ├── __init__.py │ │ │ ├── pylume │ │ │ │ ├── __init__.py │ │ │ │ ├── client.py │ │ │ │ ├── exceptions.py │ │ │ │ ├── lume │ │ │ │ ├── models.py │ │ │ │ ├── pylume.py │ │ │ │ └── server.py │ │ │ ├── pyproject.toml │ │ │ └── README.md │ │ └── som │ │ ├── LICENSE │ │ ├── poetry.toml │ │ ├── pyproject.toml │ │ ├── README.md │ │ ├── som │ │ │ ├── __init__.py │ │ │ ├── detect.py │ │ │ ├── detection.py │ │ │ ├── models.py │ │ │ ├── ocr.py │ │ │ ├── util │ │ │ │ └── utils.py │ │ │ └── visualization.py │ │ └── tests │ │ └── test_omniparser.py │ ├── typescript │ │ ├── .gitignore │ │ ├── .nvmrc │ │ ├── agent │ │ │ ├── examples │ │ │ │ ├── playground-example.html │ │ │ │ └── README.md │ │ │ ├── package.json │ │ │ ├── README.md │ │ │ ├── src │ │ │ │ ├── client.ts │ │ │ │ ├── index.ts │ │ │ │ └── types.ts │ │ │ ├── tests │ │ │ │ └── client.test.ts │ │ │ ├── tsconfig.json │ │ │ ├── tsdown.config.ts │ │ │ └── vitest.config.ts │ │ ├── biome.json │ │ ├── computer │ │ │ ├── .editorconfig │ │ │ ├── .gitattributes │ │ │ ├── .gitignore │ │ │ ├── LICENSE │ │ │ ├── package.json │ │ │ ├── README.md │ │ │ ├── src │ │ │ │ ├── computer │ │ │ │ │ ├── index.ts │ │ │ │ │ ├── providers │ │ │ │ │ │ ├── base.ts │ │ │ │ │ │ ├── cloud.ts │ │ │ │ │ │ └── index.ts │ │ │ │ │ └── types.ts │ │ │ │ ├── index.ts │ │ │ │ ├── interface │ │ │ │ │ ├── base.ts │ │ │ │ │ ├── factory.ts │ │ │ │ │ ├── index.ts │ │ │ │ │ ├── linux.ts │ │ │ │ │ ├── macos.ts │ │ │ │ │ └── windows.ts │ │ │ │ └── types.ts │ │ │ ├── tests │ │ │ │ ├── computer │ │ │ │ │ └── cloud.test.ts │ │ │ │ ├── interface │ │ │ │ │ ├── factory.test.ts │ │ │ │ │ ├── index.test.ts │ │ │ │ │ ├── linux.test.ts │ │ │ │ │ ├── macos.test.ts │ │ │ │ │ └── windows.test.ts │ │ │ │ └── setup.ts │ │ │ ├── tsconfig.json │ │ │ ├── tsdown.config.ts │ │ │ └── vitest.config.ts │ │ ├── core │ │ │ ├── .editorconfig │ │ │ ├── .gitattributes │ │ │ ├── .gitignore │ │ │ ├── LICENSE │ │ │ ├── package.json │ │ │ ├── README.md │ │ │ ├── src │ │ │ │ ├── index.ts │ │ │ │ └── telemetry │ │ │ │ ├── clients │ │ │ │ │ ├── index.ts │ │ │ │ │ └── posthog.ts │ │ │ │ └── index.ts │ │ │ ├── tests │ │ │ │ └── telemetry.test.ts │ │ │ ├── tsconfig.json │ │ │ ├── tsdown.config.ts │ │ │ └── vitest.config.ts │ │ ├── package.json │ │ ├── pnpm-lock.yaml │ │ ├── pnpm-workspace.yaml │ │ └── README.md │ └── xfce │ ├── .dockerignore │ ├── .gitignore │ ├── Dockerfile │ ├── README.md │ └── src │ ├── scripts │ │ ├── resize-display.sh │ │ ├── start-computer-server.sh │ │ ├── start-novnc.sh │ │ ├── start-vnc.sh │ │ └── xstartup.sh │ ├── supervisor │ │ └── supervisord.conf │ └── xfce-config │ ├── helpers.rc │ ├── xfce4-power-manager.xml │ └── xfce4-session.xml ├── LICENSE.md ├── notebooks │ ├── agent_nb.ipynb │ ├── blog │ │ ├── build-your-own-operator-on-macos-1.ipynb │ │ └── build-your-own-operator-on-macos-2.ipynb │ ├── composite_agents_docker_nb.ipynb │ ├── computer_nb.ipynb │ ├── computer_server_nb.ipynb │ ├── customizing_computeragent.ipynb │ ├── eval_osworld.ipynb │ ├── ollama_nb.ipynb │ ├── pylume_nb.ipynb │ ├── README.md │ ├── sota_hackathon_cloud.ipynb │ └── sota_hackathon.ipynb ├── pdm.lock ├── pyproject.toml ├── pyrightconfig.json ├── README.md ├── samples │ └── community │ ├── global-online │ │ └── README.md │ └── hack-the-north │ └── README.md ├── scripts │ ├── build-uv.sh │ ├── build.ps1 │ ├── build.sh │ ├── cleanup.sh │ ├── playground-docker.sh │ ├── playground.sh │ └── run-docker-dev.sh └── tests ├── pytest.ini ├── shell_cmd.py ├── test_files.py ├── test_shell_bash.py ├── test_telemetry.py ├── test_venv.py └── test_watchdog.py ``` # Files -------------------------------------------------------------------------------- /libs/typescript/.nvmrc: -------------------------------------------------------------------------------- ``` v24.2.0 ``` -------------------------------------------------------------------------------- /libs/typescript/computer/.gitattributes: -------------------------------------------------------------------------------- ``` * text=auto eol=lf ``` -------------------------------------------------------------------------------- /libs/typescript/core/.gitattributes: -------------------------------------------------------------------------------- ``` * text=auto eol=lf ``` -------------------------------------------------------------------------------- /examples/computer-example-ts/.gitignore: -------------------------------------------------------------------------------- ``` node_modules .DS_Store .env ``` -------------------------------------------------------------------------------- /libs/xfce/.gitignore: -------------------------------------------------------------------------------- ``` storage/ shared/ *.log .DS_Store ``` -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- ``` * text=auto *.sh text eol=lf ``` -------------------------------------------------------------------------------- /libs/xfce/.dockerignore: -------------------------------------------------------------------------------- ``` README.md .git .gitignore *.md LICENSE ``` -------------------------------------------------------------------------------- /libs/python/agent/benchmarks/.gitignore: -------------------------------------------------------------------------------- ``` output/ interactive_output/ *_results.md ``` -------------------------------------------------------------------------------- /libs/typescript/.gitignore: -------------------------------------------------------------------------------- ``` node_modules *.log .DS_Store .eslintcache ``` -------------------------------------------------------------------------------- /examples/computer-example-ts/.env.example: -------------------------------------------------------------------------------- ``` OPENAI_API_KEY= CUA_API_KEY= CUA_CONTAINER_NAME= ``` -------------------------------------------------------------------------------- /libs/typescript/computer/.gitignore: -------------------------------------------------------------------------------- ``` node_modules dist *.log .DS_Store .eslintcache ``` -------------------------------------------------------------------------------- /libs/typescript/core/.gitignore: -------------------------------------------------------------------------------- ``` node_modules dist *.log .DS_Store .eslintcache ``` -------------------------------------------------------------------------------- /libs/typescript/computer/.editorconfig: -------------------------------------------------------------------------------- ``` root = true [*] indent_size = 2 end_of_line = lf insert_final_newline = true ``` -------------------------------------------------------------------------------- /libs/typescript/core/.editorconfig: -------------------------------------------------------------------------------- ``` root = true [*] indent_size = 2 end_of_line = lf insert_final_newline = true ``` -------------------------------------------------------------------------------- /examples/computer-example-ts/.prettierrc: -------------------------------------------------------------------------------- ``` { "useTabs": false, "semi": true, "singleQuote": true, "trailingComma": "es5", "bracketSpacing": true } ``` -------------------------------------------------------------------------------- /docs/.prettierrc: -------------------------------------------------------------------------------- ``` { "useTabs": false, "semi": true, "singleQuote": true, "trailingComma": "es5", "bracketSpacing": true, "jsxBracketSameLine": true } ``` -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- ``` # deps /node_modules # generated content .contentlayer .content-collections .source # test & build /coverage /.next/ /out/ /build *.tsbuildinfo # misc .DS_Store *.pem /.pnp .pnp.js npm-debug.log* yarn-debug.log* yarn-error.log* # others .env*.local .vercel next-env.d.ts ``` -------------------------------------------------------------------------------- /libs/lumier/.dockerignore: -------------------------------------------------------------------------------- ``` # Ignore macOS system files and trash .DS_Store .Trashes **/.Trashes **/.* # Ignore Python cache __pycache__/ *.pyc *.pyo # Ignore virtual environments .venv/ venv/ # Ignore editor/project files .vscode/ .idea/ *.swp # Ignore test artifacts test-results/ # Ignore anything else you don't want in the Docker build context ./examples ``` -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- ``` # Version control .git .github .gitignore # Environment and cache .venv .env .env.local __pycache__ *.pyc *.pyo *.pyd .Python .pytest_cache .pdm-build # Distribution / packaging dist build *.egg-info # Development .vscode .idea *.swp *.swo # Docs docs/site # Notebooks notebooks/.ipynb_checkpoints # Docker Dockerfile .dockerignore ``` -------------------------------------------------------------------------------- /libs/lume/.cursorignore: -------------------------------------------------------------------------------- ``` # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ !libs/lume/scripts/build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ cover/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder .pybuilder/ target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py .pdm.toml .pdm-python .pdm-build/ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Scripts server/scripts/ # Pyre type checker .pyre/ # pytype static type analyzer .pytype/ # Cython debug symbols cython_debug/ # Ruff stuff: .ruff_cache/ # PyPI configuration file .pypirc # Conda .conda/ # Local environment .env.local # macOS DS_Store .DS_Store weights/ weights/icon_detect/ weights/icon_detect/model.pt weights/icon_detect/model.pt.zip weights/icon_detect/model.pt.zip.part* libs/omniparser/weights/icon_detect/model.pt # Example test data and output examples/test_data/ examples/output/ /screenshots/ /experiments/ /logs/ # Xcode # # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore ## User settings xcuserdata/ ## Obj-C/Swift specific *.hmap ## App packaging *.ipa *.dSYM.zip *.dSYM ## Playgrounds timeline.xctimeline playground.xcworkspace # Swift Package Manager # # Add this line if you want to avoid checking in source code from Swift Package Manager dependencies. # Packages/ # Package.pins # Package.resolved # *.xcodeproj # # Xcode automatically generates this directory with a .xcworkspacedata file and xcuserdata # hence it is not needed unless you have added a package configuration file to your project .swiftpm/ .build/ # CocoaPods # # We recommend against adding the Pods directory to your .gitignore. However # you should judge for yourself, the pros and cons are mentioned at: # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control # # Pods/ # # Add this line if you want to avoid checking in source code from the Xcode workspace # *.xcworkspace # Carthage # # Add this line if you want to avoid checking in source code from Carthage dependencies. # Carthage/Checkouts Carthage/Build/ # fastlane # # It is recommended to not store the screenshots in the git repo. # Instead, use fastlane to re-generate the screenshots whenever they are needed. # For more information about the recommended setup visit: # https://docs.fastlane.tools/best-practices/source-control/#source-control fastlane/report.xml fastlane/Preview.html fastlane/screenshots/**/*.png fastlane/test_output # Ignore folder ignore # .release .release/ ``` -------------------------------------------------------------------------------- /.cursorignore: -------------------------------------------------------------------------------- ``` # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ !libs/lume/scripts/build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ cover/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder .pybuilder/ target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py .pdm.toml .pdm-python .pdm-build/ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Scripts server/scripts/ # Pyre type checker .pyre/ # pytype static type analyzer .pytype/ # Cython debug symbols cython_debug/ # Ruff stuff: .ruff_cache/ # PyPI configuration file .pypirc # Conda .conda/ # Local environment .env.local # macOS DS_Store .DS_Store weights/ weights/icon_detect/ weights/icon_detect/model.pt weights/icon_detect/model.pt.zip weights/icon_detect/model.pt.zip.part* libs/python/omniparser/weights/icon_detect/model.pt # Example test data and output examples/test_data/ examples/output/ /screenshots/ /experiments/ /logs/ # Xcode # # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore ## User settings xcuserdata/ ## Obj-C/Swift specific *.hmap ## App packaging *.ipa *.dSYM.zip *.dSYM ## Playgrounds timeline.xctimeline playground.xcworkspace # Swift Package Manager # # Add this line if you want to avoid checking in source code from Swift Package Manager dependencies. # Packages/ # Package.pins # Package.resolved # *.xcodeproj # # Xcode automatically generates this directory with a .xcworkspacedata file and xcuserdata # hence it is not needed unless you have added a package configuration file to your project .swiftpm/ .build/ # CocoaPods # # We recommend against adding the Pods directory to your .gitignore. However # you should judge for yourself, the pros and cons are mentioned at: # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control # # Pods/ # # Add this line if you want to avoid checking in source code from the Xcode workspace # *.xcworkspace # Carthage # # Add this line if you want to avoid checking in source code from Carthage dependencies. # Carthage/Checkouts Carthage/Build/ # fastlane # # It is recommended to not store the screenshots in the git repo. # Instead, use fastlane to re-generate the screenshots whenever they are needed. # For more information about the recommended setup visit: # https://docs.fastlane.tools/best-practices/source-control/#source-control fastlane/report.xml fastlane/Preview.html fastlane/screenshots/**/*.png fastlane/test_output # Ignore folder ignore # .release .release/ ``` -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- ``` # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so node_modules/* */node_modules **/node_modules # Distribution / packaging .Python build/ !libs/lume/scripts/build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/* !libs/lumier/src/lib/ lib64/ parts/ sdist/ var/ wheels/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ cover/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder .pybuilder/ target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py .pdm.toml .pdm-python .pdm-build/ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Scripts server/scripts/ # Pyre type checker .pyre/ # pytype static type analyzer .pytype/ # Cython debug symbols cython_debug/ # Ruff stuff: .ruff_cache/ # PyPI configuration file .pypirc # Conda .conda/ # Local environment .env.local # macOS DS_Store .DS_Store weights/ weights/icon_detect/ weights/icon_detect/model.pt weights/icon_detect/model.pt.zip weights/icon_detect/model.pt.zip.part* libs/python/omniparser/weights/icon_detect/model.pt # Example test data and output examples/test_data/ examples/output/ /screenshots/ /experiments/ /logs/ # Xcode # # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore ## User settings xcuserdata/ ## Obj-C/Swift specific *.hmap ## App packaging *.ipa *.dSYM.zip *.dSYM ## Playgrounds timeline.xctimeline playground.xcworkspace # Swift Package Manager # # Add this line if you want to avoid checking in source code from Swift Package Manager dependencies. # Packages/ # Package.pins # Package.resolved # *.xcodeproj # # Xcode automatically generates this directory with a .xcworkspacedata file and xcuserdata # hence it is not needed unless you have added a package configuration file to your project .swiftpm/ .build/ # CocoaPods # # We recommend against adding the Pods directory to your .gitignore. However # you should judge for yourself, the pros and cons are mentioned at: # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control # # Pods/ # # Add this line if you want to avoid checking in source code from the Xcode workspace # *.xcworkspace # Carthage # # Add this line if you want to avoid checking in source code from Carthage dependencies. # Carthage/Checkouts Carthage/Build/ # fastlane # # It is recommended to not store the screenshots in the git repo. # Instead, use fastlane to re-generate the screenshots whenever they are needed. # For more information about the recommended setup visit: # https://docs.fastlane.tools/best-practices/source-control/#source-control fastlane/report.xml fastlane/Preview.html fastlane/screenshots/**/*.png fastlane/test_output # Ignore folder ignore # .release .release/ # Shared folder shared # Trajectories trajectories/ # Installation ID Storage .storage/ # Gradio settings .gradio_settings.json # Lumier Storage storage/ # Trashes .Trashes .Trash-1000/ post-provision ``` -------------------------------------------------------------------------------- /.all-contributorsrc: -------------------------------------------------------------------------------- ``` { "projectName": "cua", "projectOwner": "trycua", "files": [ "README.md" ], "commitType": "docs", "commitConvention": "angular", "contributorsPerLine": 7, "contributors": [ { "login": "f-trycua", "name": "f-trycua", "avatar_url": "https://avatars.githubusercontent.com/u/195596869?v=4", "profile": "https://github.com/f-trycua", "contributions": [ "code" ] }, { "login": "pepicrft", "name": "Pedro Piñera Buendía", "avatar_url": "https://avatars.githubusercontent.com/u/663605?v=4", "profile": "http://pepicrft.me", "contributions": [ "code" ] }, { "login": "aktech", "name": "Amit Kumar", "avatar_url": "https://avatars.githubusercontent.com/u/5647941?v=4", "profile": "https://iamit.in", "contributions": [ "code" ] }, { "login": "jellydn", "name": "Dung Duc Huynh (Kaka)", "avatar_url": "https://avatars.githubusercontent.com/u/870029?v=4", "profile": "https://productsway.com/", "contributions": [ "code" ] }, { "login": "ShrootBuck", "name": "Zayd Krunz", "avatar_url": "https://avatars.githubusercontent.com/u/70227235?v=4", "profile": "http://zaydkrunz.com", "contributions": [ "code" ] }, { "login": "PrashantRaj18198", "name": "Prashant Raj", "avatar_url": "https://avatars.githubusercontent.com/u/23168997?v=4", "profile": "https://github.com/PrashantRaj18198", "contributions": [ "code" ] }, { "login": "Leland-Takamine", "name": "Leland Takamine", "avatar_url": "https://avatars.githubusercontent.com/u/847683?v=4", "profile": "https://www.mobile.dev", "contributions": [ "code" ] }, { "login": "ddupont808", "name": "ddupont", "avatar_url": "https://avatars.githubusercontent.com/u/3820588?v=4", "profile": "https://github.com/ddupont808", "contributions": [ "code" ] }, { "login": "Lizzard1123", "name": "Ethan Gutierrez", "avatar_url": "https://avatars.githubusercontent.com/u/46036335?v=4", "profile": "https://github.com/Lizzard1123", "contributions": [ "code" ] }, { "login": "RicterZ", "name": "Ricter Zheng", "avatar_url": "https://avatars.githubusercontent.com/u/5282759?v=4", "profile": "https://ricterz.me", "contributions": [ "code" ] }, { "login": "rahulkarajgikar", "name": "Rahul Karajgikar", "avatar_url": "https://avatars.githubusercontent.com/u/50844303?v=4", "profile": "https://www.trytruffle.ai/", "contributions": [ "code" ] }, { "login": "trospix", "name": "trospix", "avatar_url": "https://avatars.githubusercontent.com/u/81363696?v=4", "profile": "https://github.com/trospix", "contributions": [ "code" ] }, { "login": "eltociear", "name": "Ikko Eltociear Ashimine", "avatar_url": "https://avatars.githubusercontent.com/u/22633385?v=4", "profile": "https://wavee.world/invitation/b96d00e6-b802-4a1b-8a66-2e3854a01ffd", "contributions": [ "code" ] }, { "login": "dp221125", "name": "한석호(MilKyo)", "avatar_url": "https://avatars.githubusercontent.com/u/10572119?v=4", "profile": "https://github.com/dp221125", "contributions": [ "code" ] }, { "login": "rahimnathwani", "name": "Rahim Nathwani", "avatar_url": "https://avatars.githubusercontent.com/u/891558?v=4", "profile": "https://www.encona.com/", "contributions": [ "code" ] }, { "login": "mjspeck", "name": "Matt Speck", "avatar_url": "https://avatars.githubusercontent.com/u/20689127?v=4", "profile": "https://mjspeck.github.io/", "contributions": [ "code" ] }, { "login": "FinnBorge", "name": "FinnBorge", "avatar_url": "https://avatars.githubusercontent.com/u/9272726?v=4", "profile": "https://github.com/FinnBorge", "contributions": [ "code" ] }, { "login": "jklapacz", "name": "Jakub Klapacz", "avatar_url": "https://avatars.githubusercontent.com/u/5343758?v=4", "profile": "https://github.com/jklapacz", "contributions": [ "code" ] }, { "login": "evnsnclr", "name": "Evan smith", "avatar_url": "https://avatars.githubusercontent.com/u/139897548?v=4", "profile": "https://github.com/evnsnclr", "contributions": [ "code" ] } ] } ``` -------------------------------------------------------------------------------- /libs/python/core/README.md: -------------------------------------------------------------------------------- ```markdown <div align="center"> <h1> <div class="image-wrapper" style="display: inline-block;"> <picture> <source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="https://raw.githubusercontent.com/trycua/cua/main/img/logo_white.png" style="display: block; margin: auto;"> <source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="https://raw.githubusercontent.com/trycua/cua/main/img/logo_black.png" style="display: block; margin: auto;"> <img alt="Shows my svg"> </picture> </div> [](#) [](#) [](https://discord.com/invite/mVnXXpdE85) [](https://pypi.org/project/cua-core/) </h1> </div> **Cua Core** provides essential shared functionality and utilities used across the Cua ecosystem: - Privacy-focused telemetry system for transparent usage analytics - Common helper functions and utilities used by other Cua packages - Core infrastructure components shared between modules ## Installation ```bash pip install cua-core ``` ``` -------------------------------------------------------------------------------- /examples/computer-example-ts/README.md: -------------------------------------------------------------------------------- ```markdown # cua-cloud-openai Example This example demonstrates how to control a Cua Cloud Sandbox using the OpenAI `computer-use-preview` model and the `@trycua/computer` TypeScript library. ## Overview - Connects to a Cua Cloud Sandbox via the `@trycua/computer` library - Sends screenshots and instructions to OpenAI's computer-use model - Executes AI-generated actions (clicks, typing, etc.) inside the sandbox - Designed for Linux sandboxes, but can be adapted for other OS types ## Getting Started 1. **Install dependencies:** ```bash npm install ``` 2. **Set up environment variables:** Create a `.env` file with the following variables: - `OPENAI_API_KEY` — your OpenAI API key - `CUA_API_KEY` — your Cua Cloud API key - `CUA_CONTAINER_NAME` — the name of your provisioned sandbox 3. **Run the example:** ```bash npx tsx src/index.ts ``` ## Files - `src/index.ts` — Main example script - `src/helpers.ts` — Helper for executing actions on the container ## Further Reading For a step-by-step tutorial and more detailed explanation, see the accompanying blog post: ➡️ [Controlling a Cua Cloud Sandbox with JavaScript](https://placeholder-url-to-blog-post.com) _(This link will be updated once the article is published.)_ --- If you have questions or issues, please open an issue or contact the maintainers. ``` -------------------------------------------------------------------------------- /notebooks/README.md: -------------------------------------------------------------------------------- ```markdown # CUA Notebooks This folder contains Jupyter notebooks that demonstrate the core functionality of the CUA (Computer Use Automation) system. These notebooks serve as interactive examples and quickstart guides for different components of the CUA platform. ## Available Notebooks ### Core Components - **`computer_nb.ipynb`** - Demonstrates the Computer API for programmatically operating sandbox VMs using either Cua Cloud Sandbox or local Lume VMs on Apple Silicon macOS systems - **`agent_nb.ipynb`** - Shows how to use CUA's Agent to run automated workflows in virtual sandboxes with various AI models (OpenAI, Anthropic, local models) - **`pylume_nb.ipynb`** - Quickstart guide for the pylume Python library, which handles VM creation, management, and image operations - **`computer_server_nb.ipynb`** - Demonstrates how to host and configure the Computer server that powers the Computer API ### Evaluation & Benchmarking - **`eval_osworld.ipynb`** - Shows ComputerAgent integration with HUD for OSWorld benchmarking, supporting both Claude and OpenAI models ### Tutorials - **`blog/`** - Tutorial notebooks from blog posts: - `build-your-own-operator-on-macos-1.ipynb` - Part 1: Building a CUA operator using OpenAI's computer-use-preview model - `build-your-own-operator-on-macos-2.ipynb` - Part 2: Using the cua-agent package for more advanced automation ``` -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- ```markdown # docs This is a Next.js application generated with [Create Fumadocs](https://github.com/fuma-nama/fumadocs). Run development server: ```bash npm run dev # or pnpm dev # or yarn dev ``` Open http://localhost:3000 with your browser to see the result. ## Explore In the project, you can see: - `lib/source.ts`: Code for content source adapter, [`loader()`](https://fumadocs.dev/docs/headless/source-api) provides the interface to access your content. - `app/layout.config.tsx`: Shared options for layouts, optional but preferred to keep. | Route | Description | | ------------------------- | ------------------------------------------------------ | | `app/(home)` | The route group for your landing page and other pages. | | `app/docs` | The documentation layout and pages. | | `app/api/search/route.ts` | The Route Handler for search. | ### Fumadocs MDX A `source.config.ts` config file has been included, you can customise different options like frontmatter schema. Read the [Introduction](https://fumadocs.dev/docs/mdx) for further details. ## Learn More To learn more about Next.js and Fumadocs, take a look at the following resources: - [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API. - [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial. - [Fumadocs](https://fumadocs.vercel.app) - learn about Fumadocs ``` -------------------------------------------------------------------------------- /libs/typescript/core/README.md: -------------------------------------------------------------------------------- ```markdown <div align="center"> <h1> <div class="image-wrapper" style="display: inline-block;"> <picture> <source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="https://raw.githubusercontent.com/trycua/cua/main/img/logo_white.png" style="display: block; margin: auto;"> <source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="https://raw.githubusercontent.com/trycua/cua/main/img/logo_black.png" style="display: block; margin: auto;"> <img alt="Shows my svg"> </picture> </div> [](#) [](#) [](https://discord.com/invite/mVnXXpdE85) [](https://www.npmjs.com/package/@trycua/core) </h1> </div> **Cua Core** provides essential shared functionality and utilities used across the Cua ecosystem: - Privacy-focused telemetry system for transparent usage analytics - Common helper functions and utilities used by other Cua packages - Core infrastructure components shared between modules ## Installation ```bash pnpm install @trycua/core ``` ## Development Install dependencies: ```bash pnpm install ``` Run the unit tests: ```bash pnpm test ``` Build the library: ```bash pnpm build ``` ## License [MIT](./LICENSE) License 2025 [CUA](https://github.com/trycua) ``` -------------------------------------------------------------------------------- /libs/typescript/README.md: -------------------------------------------------------------------------------- ```markdown # CUA TypeScript Libraries This repository contains TypeScript implementations of the CUA libraries: - `@trycua/core`: Core functionality including telemetry and logging - `@trycua/computer`: Computer interaction SDK for VM management and control ## Project Structure ```text libs/typescript/ ├── computer/ # Computer SDK package ├── core/ # Core functionality package ├── package.json # Root package configuration └── pnpm-workspace.yaml # Workspace configuration ``` ## Prerequisites - [Node.js](https://nodejs.org/) (v18 or later) - [pnpm](https://pnpm.io/) (v10 or later) ## Setup and Installation 1. Install dependencies for all packages: ```bash pnpm install ``` 1. Build all packages: ```bash pnpm build:all ``` ## Development Workflow ### Building Packages Build all packages in the correct dependency order: ```bash pnpm build:all ``` Build specific packages: ```bash # Build core package pnpm --filter @trycua/core build # Build computer package pnpm --filter @trycua/computer build ``` ### Running Tests Run tests for all packages: ```bash pnpm test:all ``` Run tests for specific packages: ```bash # Test core package pnpm --filter @trycua/core test # Test computer package pnpm --filter @trycua/computer test ``` ### Linting Lint all packages: ```bash pnpm lint:all ``` Fix linting issues: ```bash pnpm lint:fix:all ``` ## Package Details ### @trycua/core Core functionality for CUA libraries including: - Telemetry with PostHog integration - Common utilities and types ### @trycua/computer Computer interaction SDK for managing and controlling virtual machines: - VM provider system (Cloud) - Interface system for OS-specific interactions - Screenshot, keyboard, and mouse control - Command execution ## Publishing Prepare packages for publishing: ```bash pnpm -r build ``` Publish packages: ```bash pnpm -r publish ``` ``` -------------------------------------------------------------------------------- /libs/typescript/agent/examples/README.md: -------------------------------------------------------------------------------- ```markdown # CUA Agent Client Examples This directory contains examples demonstrating how to use the `@trycua/agent` client library. ## Browser Example ### `browser-example.html` A simple HTML page that demonstrates using the CUA Agent Client in a browser environment. **Features:** - Connect to HTTP/HTTPS or P2P (peer://) agent proxies - Send text messages to any supported model - View responses in real-time - Health check functionality - Clear, simple interface with no external dependencies **Usage:** 1. **Build the library first:** ```bash cd ../ pnpm build ``` 2. **Start a local web server** (required for ES modules): ```bash # Option 1: Using Python python -m http.server 8080 # Option 2: Using Node.js (if you have http-server installed) npx http-server -p 8080 # Option 3: Using any other local server ``` 3. **Open in browser:** Navigate to `http://localhost:8080/examples/playground-example.html` 4. **Configure and test:** - Enter an agent URL (e.g., `https://localhost:8000` or `peer://some-peer-id`) - Enter a model name (e.g., `anthropic/claude-3-5-sonnet-20241022`) - Type a message and click "Send Message" or press Enter - View the response in the output textarea **Supported URLs:** - **HTTP/HTTPS**: `https://localhost:8000`, `http://my-agent-server.com:8080` - **Peer-to-Peer**: `peer://computer-agent-proxy`, `peer://any-peer-id` **Example Models:** - `anthropic/claude-3-5-sonnet-20241022` - `openai/gpt-4` - `huggingface-local/microsoft/UI-TARS-7B` **Note:** Make sure you have a CUA agent proxy server running at the specified URL before testing. ## Running Agent Proxy Server To test the examples, you'll need a CUA agent proxy server running: ```bash # HTTP server (default port 8000) python -m agent.proxy.cli # P2P server python -m agent.proxy.cli --mode p2p # Both HTTP and P2P python -m agent.proxy.cli --mode both ``` ``` -------------------------------------------------------------------------------- /libs/python/computer-server/README.md: -------------------------------------------------------------------------------- ```markdown <div align="center"> <h1> <div class="image-wrapper" style="display: inline-block;"> <picture> <source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="https://raw.githubusercontent.com/trycua/cua/main/img/logo_white.png" style="display: block; margin: auto;"> <source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="https://raw.githubusercontent.com/trycua/cua/main/img/logo_black.png" style="display: block; margin: auto;"> <img alt="Shows my svg"> </picture> </div> [](#) [](#) [](https://discord.com/invite/mVnXXpdE85) [](https://pypi.org/project/cua-computer-server/) </h1> </div> **Computer Server** is the server component for the Computer-Use Interface (CUI) framework powering Cua for interacting with local macOS and Linux sandboxes, PyAutoGUI-compatible, and pluggable with any AI agent systems (Cua, Langchain, CrewAI, AutoGen). ## Features - WebSocket API for computer-use - Cross-platform support (macOS, Linux) - Integration with CUA computer library for screen control, keyboard/mouse automation, and accessibility ## Install To install the Computer-Use Interface (CUI): ```bash pip install cua-computer-server ``` ## Run Refer to this notebook for a step-by-step guide on how to use the Computer-Use Server on the host system or VM: - [Computer-Use Server](../../notebooks/computer_server_nb.ipynb) ## Docs - [Commands](https://trycua.com/docs/libraries/computer-server/Commands) - [REST-API](https://trycua.com/docs/libraries/computer-server/REST-API) - [WebSocket-API](https://trycua.com/docs/libraries/computer-server/WebSocket-API) - [Index](https://trycua.com/docs/libraries/computer-server/index) ``` -------------------------------------------------------------------------------- /libs/python/pylume/README.md: -------------------------------------------------------------------------------- ```markdown <div align="center"> <h1> <div class="image-wrapper" style="display: inline-block;"> <picture> <source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="https://raw.githubusercontent.com/trycua/cua/main/img/logo_white.png" style="display: block; margin: auto;"> <source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="https://raw.githubusercontent.com/trycua/cua/main/img/logo_black.png" style="display: block; margin: auto;"> <img alt="Shows my svg"> </picture> </div> [](#) [](#) [](https://discord.com/invite/mVnXXpdE85) [](https://pypi.org/project/pylume/) </h1> </div> **pylume** is a lightweight Python library based on [lume](https://github.com/trycua/lume) to create, run and manage macOS and Linux virtual machines (VMs) natively on Apple Silicon. ```bash pip install pylume ``` ## Usage Please refer to this [Notebook](./samples/nb.ipynb) for a quickstart. More details about the underlying API used by pylume are available [here](https://github.com/trycua/lume/docs/API-Reference.md). ## Prebuilt Images Pre-built images are available on [ghcr.io/trycua](https://github.com/orgs/trycua/packages). These images come pre-configured with an SSH server and auto-login enabled. ## Contributing We welcome and greatly appreciate contributions to lume! Whether you're improving documentation, adding new features, fixing bugs, or adding new VM images, your efforts help make pylume better for everyone. Join our [Discord community](https://discord.com/invite/mVnXXpdE85) to discuss ideas or get assistance. ## License lume is open-sourced under the MIT License - see the [LICENSE](LICENSE) file for details. ## Stargazers over time [](https://starchart.cc/trycua/pylume) ``` -------------------------------------------------------------------------------- /libs/python/agent/benchmarks/README.md: -------------------------------------------------------------------------------- ```markdown # Computer Agent Benchmarks This directory contains benchmarks designed to test agent providers in the Computer Agent SDK against reference agent implementations. ## Overview The benchmark system evaluates models on GUI grounding tasks, specifically click prediction accuracy. It supports both: - **Computer Agent SDK providers** (using model strings like `"huggingface-local/HelloKKMe/GTA1-7B"`) - **Reference agent implementations** (custom model classes implementing the `ModelProtocol`) ## Available Benchmarks ### 1. ScreenSpot-v2 (`ss-v2.py`) - **Dataset**: ScreenSpot-v2 (click-only GUI grounding) - **Format**: Standard resolution screenshots - **Task**: Predict click coordinates given an instruction and image - **Metrics**: Accuracy, Error Rate, Timing, VRAM usage ### 2. ScreenSpot-Pro (`ss-pro.py`) - **Dataset**: ScreenSpot-Pro (high-resolution click-only GUI grounding) - **Format**: High-resolution screenshots - **Task**: Predict click coordinates given an instruction and image - **Metrics**: Accuracy, Error Rate, Timing, VRAM usage ### 3. Interactive Testing (`interactive.py`) - **Real-time testing**: Take screenshots and visualize model predictions - **Commands**: - Type instruction → test all models on last screenshot - `screenshot` → take screenshot - `models` → list available models - `quit`/`exit` → exit tool - **Output**: Visual predictions with crosshairs for each model ## Running Benchmarks ### 1. Configure Models Edit `utils.py` to specify which models you want to test in `get_available_models()`. ### 2. Run Benchmark ```bash # ScreenSpot-v2 benchmark python ss-v2.py --samples 50 # ScreenSpot-Pro benchmark python ss-pro.py --samples 50 # Interactive testing python interactive.py ``` ## Output ### Console Output ``` Model Results: Accuracy: 85.50% (171/200) Avg Time: 1.23s (0.89s - 2.45s) VRAM Usage: 4.5GB (max) / 3.4GB (avg) ``` ### Generated Files - **Markdown Report**: `*_results.md` with detailed results tables - **Visualizations**: `output/` directory with prediction visualizations - **Interactive Output**: `interactive_output/` for interactive session results ## Contributing To add a new reference model, follow the instructions in [contrib.md](contrib.md). ``` -------------------------------------------------------------------------------- /.devcontainer/README.md: -------------------------------------------------------------------------------- ```markdown # Dev Container Setup This repository includes a Dev Container configuration that simplifies the development setup to just 3 steps: ## Quick Start  1. **Install the Dev Containers extension ([VS Code](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) or [WindSurf](https://docs.windsurf.com/windsurf/advanced#dev-containers-beta))** 2. **Open the repository in the Dev Container:** - Press `Ctrl+Shift+P` (or `⌘+Shift+P` on macOS) - Select `Dev Containers: Clone Repository in Container Volume...` and paste the repository URL: `https://github.com/trycua/cua.git` (if not cloned) or `Dev Containers: Open Folder in Container...` (if git cloned). > **Note**: On WindSurf, the post install hook might not run automatically. If so, run `/bin/bash .devcontainer/post-install.sh` manually. 3. **Open the VS Code workspace:** Once the post-install.sh is done running, open the `.vscode/py.code-workspace` workspace and press  . 4. **Run the Agent UI example:** Click  to start the Gradio UI. If prompted to install **debugpy (Python Debugger)** to enable remote debugging, select 'Yes' to proceed. 5. **Access the Gradio UI:** The Gradio UI will be available at `http://localhost:7860` and will automatically forward to your host machine. ## What's Included The dev container automatically: - ✅ Sets up Python 3.11 environment - ✅ Installs all system dependencies (build tools, OpenGL, etc.) - ✅ Configures Python paths for all packages - ✅ Installs Python extensions (Black, Ruff, Pylance) - ✅ Forwards port 7860 for the Gradio web UI - ✅ Mounts your source code for live editing - ✅ Creates the required `.env.local` file ## Running Examples After the container is built, you can run examples directly: ```bash # Run the agent UI (Gradio web interface) python examples/agent_ui_examples.py # Run computer examples python examples/computer_examples.py # Run computer UI examples python examples/computer_ui_examples.py ``` The Gradio UI will be available at `http://localhost:7860` and will automatically forward to your host machine. ## Environment Variables You'll need to add your API keys to `.env.local`: ```bash # Required for Anthropic provider ANTHROPIC_API_KEY=your_anthropic_key_here # Required for OpenAI provider OPENAI_API_KEY=your_openai_key_here ``` ## Notes - The container connects to `host.docker.internal:7777` for Lume server communication - All Python packages are pre-installed and configured - Source code changes are reflected immediately (no rebuild needed) - The container uses the same Dockerfile as the regular Docker development environment ``` -------------------------------------------------------------------------------- /libs/typescript/computer/README.md: -------------------------------------------------------------------------------- ```markdown <div align="center"> <h1> <div class="image-wrapper" style="display: inline-block;"> <picture> <source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="https://raw.githubusercontent.com/trycua/cua/main/img/logo_white.png" style="display: block; margin: auto;"> <source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="https://raw.githubusercontent.com/trycua/cua/main/img/logo_black.png" style="display: block; margin: auto;"> <img alt="Shows my svg"> </picture> </div> [](#) [](#) [](https://discord.com/invite/mVnXXpdE85) [](https://www.npmjs.com/package/@trycua/computer) </h1> </div> **@trycua/computer** is a Computer-Use Interface (CUI) framework powering Cua for interacting with local macOS and Linux sandboxes, Playwright-compatible, and pluggable with any AI agent systems (Cua, Langchain, CrewAI, AutoGen). Computer relies on [Lume](https://github.com/trycua/lume) for creating and managing sandbox environments. ### Get started with Computer <div align="center"> <img src="https://raw.githubusercontent.com/trycua/cua/main/img/computer.png"/> </div> ```typescript import { Computer, OSType } from '@trycua/computer'; // Create a new computer instance const computer = new Computer({ osType: OSType.LINUX, name: 's-linux-vm_id', apiKey: 'your-api-key' }); // Start the computer await computer.run(); // Get the computer interface for interaction const computerInterface = computer.interface; // Take a screenshot const screenshot = await computerInterface.getScreenshot(); // In a Node.js environment, you might save it like this: // import * as fs from 'fs'; // fs.writeFileSync('screenshot.png', Buffer.from(screenshot)); // Click at coordinates await computerInterface.click(500, 300); // Type text await computerInterface.typeText('Hello, world!'); // Stop the computer await computer.stop(); ``` ## Install To install the Computer-Use Interface (CUI): ```bash npm install @trycua/computer # or pnpm add @trycua/computer ``` The `@trycua/computer` package provides the TypeScript library for interacting with computer interfaces. ## Run Refer to this example for a step-by-step guide on how to use the Computer-Use Interface (CUI): - [Computer-Use Interface (CUI)](https://github.com/trycua/cua/tree/main/examples/computer-example-ts) ## Docs - [Computers](https://trycua.com/docs/computer-sdk/computers) - [Commands](https://trycua.com/docs/computer-sdk/commands) - [Computer UI](https://trycua.com/docs/computer-sdk/computer-ui) ## License [MIT](./LICENSE) License 2025 [CUA](https://github.com/trycua) ``` -------------------------------------------------------------------------------- /libs/python/computer/README.md: -------------------------------------------------------------------------------- ```markdown <div align="center"> <h1> <div class="image-wrapper" style="display: inline-block;"> <picture> <source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="https://raw.githubusercontent.com/trycua/cua/main/img/logo_white.png" style="display: block; margin: auto;"> <source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="https://raw.githubusercontent.com/trycua/cua/main/img/logo_black.png" style="display: block; margin: auto;"> <img alt="Shows my svg"> </picture> </div> [](#) [](#) [](https://discord.com/invite/mVnXXpdE85) [](https://pypi.org/project/cua-computer/) </h1> </div> **cua-computer** is a Computer-Use Interface (CUI) framework powering Cua for interacting with local macOS and Linux sandboxes, PyAutoGUI-compatible, and pluggable with any AI agent systems (Cua, Langchain, CrewAI, AutoGen). Computer relies on [Lume](https://github.com/trycua/lume) for creating and managing sandbox environments. ### Get started with Computer <div align="center"> <img src="https://raw.githubusercontent.com/trycua/cua/main/img/computer.png"/> </div> ```python from computer import Computer computer = Computer(os_type="macos", display="1024x768", memory="8GB", cpu="4") try: await computer.run() screenshot = await computer.interface.screenshot() with open("screenshot.png", "wb") as f: f.write(screenshot) await computer.interface.move_cursor(100, 100) await computer.interface.left_click() await computer.interface.right_click(300, 300) await computer.interface.double_click(400, 400) await computer.interface.type("Hello, World!") await computer.interface.press_key("enter") await computer.interface.set_clipboard("Test clipboard") content = await computer.interface.copy_to_clipboard() print(f"Clipboard content: {content}") finally: await computer.stop() ``` ## Install To install the Computer-Use Interface (CUI): ```bash pip install "cua-computer[all]" ``` The `cua-computer` PyPi package pulls automatically the latest executable version of Lume through [pylume](https://github.com/trycua/pylume). ## Run Refer to this notebook for a step-by-step guide on how to use the Computer-Use Interface (CUI): - [Computer-Use Interface (CUI)](https://github.com/trycua/cua/blob/main/notebooks/computer_nb.ipynb) ## Docs - [Computers](https://trycua.com/docs/computer-sdk/computers) - [Commands](https://trycua.com/docs/computer-sdk/commands) - [Computer UI](https://trycua.com/docs/computer-sdk/computer-ui) - [Sandboxed Python](https://trycua.com/docs/computer-sdk/sandboxed-python) ``` -------------------------------------------------------------------------------- /libs/python/mcp-server/README.md: -------------------------------------------------------------------------------- ```markdown <div align="center"> <h1> <div class="image-wrapper" style="display: inline-block;"> <picture> <source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="https://raw.githubusercontent.com/trycua/cua/main/img/logo_white.png" style="display: block; margin: auto;"> <source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="https://raw.githubusercontent.com/trycua/cua/main/img/logo_black.png" style="display: block; margin: auto;"> <img alt="Shows my svg"> </picture> </div> [](#) [](#) [](https://discord.com/invite/mVnXXpdE85) [](https://pypi.org/project/cua-computer/) </h1> </div> **cua-mcp-server** is a MCP server for the Computer-Use Agent (CUA), allowing you to run CUA through Claude Desktop or other MCP clients. ### Get started with Agent ## Prerequisites Cua MCP Server requires [lume](https://github.com/trycua/cua/blob/main/libs/lume/README.md#install) to be installed. ## Install Download and run the installation script: ```bash curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/python/mcp-server/scripts/install_mcp_server.sh | bash ``` You can then use the script in your MCP configuration like this: ```json { "mcpServers": { "cua-agent": { "command": "/bin/bash", "args": ["~/.cua/start_mcp_server.sh"], "env": { "CUA_MODEL_NAME": "anthropic/claude-3-5-sonnet-20241022" } } } } ``` ## Development Use this configuration to develop with the cua-mcp-server directly without installation: ```json { "mcpServers": { "cua-agent": { "command": "/bin/bash", "args": ["~/cua/libs/python/mcp-server/scripts/start_mcp_server.sh"], "env": { "CUA_MODEL_NAME": "huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B" } } } } ``` This configuration: - Uses the start_mcp_server.sh script which automatically sets up the Python path and runs the server module - Works with Claude Desktop, Cursor, or any other MCP client - Automatically uses your development code without requiring installation Just add this to your MCP client's configuration and it will use your local development version of the server. ## Docs - [Installation](https://trycua.com/docs/libraries/mcp-server/installation) - [Configuration](https://trycua.com/docs/libraries/mcp-server/configuration) - [Usage](https://trycua.com/docs/libraries/mcp-server/usage) - [Tools](https://trycua.com/docs/libraries/mcp-server/tools) - [Client Integrations](https://trycua.com/docs/libraries/mcp-server/client-integrations) - [LLM Integrations](https://trycua.com/docs/libraries/mcp-server/llm-integrations) ``` -------------------------------------------------------------------------------- /libs/lume/README.md: -------------------------------------------------------------------------------- ```markdown <div align="center"> <h1> <div class="image-wrapper" style="display: inline-block;"> <picture> <source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="../../img/logo_white.png" style="display: block; margin: auto;"> <source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="../../img/logo_black.png" style="display: block; margin: auto;"> <img alt="Shows my svg"> </picture> </div> [](#) [](#) [](https://discord.com/invite/mVnXXpdE85) </h1> </div> **lume** is a lightweight Command Line Interface and local API server to create, run and manage macOS and Linux virtual machines (VMs) with near-native performance on Apple Silicon, using Apple's `Virtualization.Framework`. ### Run prebuilt macOS images in just 1 step <div align="center"> <img src="../../img/cli.png" alt="lume cli"> </div> ```bash lume run macos-sequoia-vanilla:latest ``` ## Quickstart Install and run a prebuilt macOS VM in two commands: ```bash # Install Lume /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" # Pull & start a macOS image lume run macos-sequoia-vanilla:latest ``` <Callout title="Security Note"> All prebuilt images use the default password `lume`. Change this immediately after your first login using the `passwd` command. </Callout> **System Requirements**: - Apple Silicon Mac (M1, M2, M3, etc.) - macOS 13.0 or later - At least 8GB of RAM (16GB recommended) - At least 50GB of free disk space ## Development To get set up with Lume for development, read [these instructions](Development.md). ## Docs - [Installation](https://trycua.com/docs/libraries/lume/installation) - [Prebuilt Images](https://trycua.com/docs/libraries/lume/prebuilt-images) - [CLI Reference](https://trycua.com/docs/libraries/lume/cli-reference) - [HTTP API](https://trycua.com/docs/libraries/lume/http-api) - [FAQ](https://trycua.com/docs/libraries/lume/faq) ## Contributing We welcome and greatly appreciate contributions to lume! Whether you're improving documentation, adding new features, fixing bugs, or adding new VM images, your efforts help make lume better for everyone. For detailed instructions on how to contribute, please refer to our [Contributing Guidelines](CONTRIBUTING.md). Join our [Discord community](https://discord.com/invite/mVnXXpdE85) to discuss ideas or get assistance. ## License lume is open-sourced under the MIT License - see the [LICENSE](LICENSE) file for details. ## Trademarks Apple, macOS, and Apple Silicon are trademarks of Apple Inc. Ubuntu and Canonical are registered trademarks of Canonical Ltd. This project is not affiliated with, endorsed by, or sponsored by Apple Inc. or Canonical Ltd. ``` -------------------------------------------------------------------------------- /libs/kasm/README.md: -------------------------------------------------------------------------------- ```markdown # CUA Ubuntu Container Containerized virtual desktop for Computer-Using Agents (CUA). Utilizes Kasm's MIT-licensed Ubuntu XFCE container as a base with computer-server pre-installed. ## Features - Ubuntu 22.04 (Jammy) with XFCE desktop environment - Pre-installed computer-server for remote computer control - VNC access for visual desktop interaction - Python 3.11 with necessary libraries - Screen capture tools (gnome-screenshot, wmctrl, ffmpeg) - Clipboard utilities (xclip, socat) ## Usage ### Building the Container ```bash docker build -t cua-ubuntu:latest . ``` ### Pushing to Registry ```bash # Tag for Docker Hub (replace 'trycua' with your Docker Hub username) docker tag cua-ubuntu:latest trycua/cua-ubuntu:latest # Login to Docker Hub docker login # Push to Docker Hub docker push trycua/cua-ubuntu:latest ``` ### Running the Container Manually ```bash docker run --rm -it --shm-size=512m -p 6901:6901 -p 8000:8000 -e VNCOPTIONS=-disableBasicAuth cua-ubuntu:latest ``` - **VNC Access**: Available at `http://localhost:6901` - **Computer Server API**: Available at `http://localhost:8000` ### Using with CUA Docker Provider This container is designed to work with the CUA Docker provider for automated container management: ```python from computer.providers.factory import VMProviderFactory # Create docker provider provider = VMProviderFactory.create_provider( provider_type="docker", image="cua-ubuntu:latest", port=8000, # computer-server API port noVNC_port=6901 # VNC port ) # Run a container async with provider: vm_info = await provider.run_vm( image="cua-ubuntu:latest", name="my-cua-container", run_opts={ "memory": "4GB", "cpu": 2, "vnc_port": 6901, "api_port": 8000 } ) ``` ## Container Configuration ### Ports - **6901**: VNC web interface (noVNC) - **8080**: Computer-server API endpoint ### Environment Variables - `VNC_PW`: VNC password (default: "password") - `DISPLAY`: X11 display (set to ":0") ### Volumes - `/home/kasm-user/storage`: Persistent storage mount point - `/home/kasm-user/shared`: Shared folder mount point ## Creating Filesystem Snapshots You can create a filesystem snapshot of the container at any time: ```bash docker commit <container_id> cua-ubuntu-snapshot:latest ``` Then run the snapshot: ```bash docker run --rm -it --shm-size=512m -p 6901:6901 -p 8080:8080 -e VNCOPTIONS=-disableBasicAuth cua-ubuntu-snapshot:latest ``` Memory snapshots are available using the experimental `docker checkpoint` command. [Docker Checkpoint Documentation](https://docs.docker.com/reference/cli/docker/checkpoint/) ## Integration with CUA System This container integrates seamlessly with the CUA computer provider system: - **Automatic Management**: Use the Docker provider for lifecycle management - **Resource Control**: Configure memory, CPU, and storage limits - **Network Access**: Automatic port mapping and IP detection - **Storage Persistence**: Mount host directories for persistent data - **Monitoring**: Real-time container status and health checking ``` -------------------------------------------------------------------------------- /libs/python/som/README.md: -------------------------------------------------------------------------------- ```markdown <div align="center"> <h1> <div class="image-wrapper" style="display: inline-block;"> <picture> <source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="https://raw.githubusercontent.com/trycua/cua/main/img/logo_white.png" style="display: block; margin: auto;"> <source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="https://raw.githubusercontent.com/trycua/cua/main/img/logo_black.png" style="display: block; margin: auto;"> <img alt="Shows my svg"> </picture> </div> [](#) [](#) [](https://discord.com/invite/mVnXXpdE85) [](https://pypi.org/project/cua-computer/) </h1> </div> **Som** (Set-of-Mark) is a visual grounding component for the Computer-Use Agent (CUA) framework powering Cua, for detecting and analyzing UI elements in screenshots. Optimized for macOS Silicon with Metal Performance Shaders (MPS), it combines YOLO-based icon detection with EasyOCR text recognition to provide comprehensive UI element analysis. ## Features - Optimized for Apple Silicon with MPS acceleration - Icon detection using YOLO with multi-scale processing - Text recognition using EasyOCR (GPU-accelerated) - Automatic hardware detection (MPS → CUDA → CPU) - Smart detection parameters tuned for UI elements - Detailed visualization with numbered annotations - Performance benchmarking tools ## System Requirements - **Recommended**: macOS with Apple Silicon - Uses Metal Performance Shaders (MPS) - Multi-scale detection enabled - ~0.4s average detection time - **Supported**: Any Python 3.11+ environment - Falls back to CPU if no GPU available - Single-scale detection on CPU - ~1.3s average detection time ## Installation ```bash # Using PDM (recommended) pdm install # Using pip pip install -e . ``` ## Quick Start ```python from som import OmniParser from PIL import Image # Initialize parser parser = OmniParser() # Process an image image = Image.open("screenshot.png") result = parser.parse( image, box_threshold=0.3, # Confidence threshold iou_threshold=0.1, # Overlap threshold use_ocr=True # Enable text detection ) # Access results for elem in result.elements: if elem.type == "icon": print(f"Icon: confidence={elem.confidence:.3f}, bbox={elem.bbox.coordinates}") else: # text print(f"Text: '{elem.content}', confidence={elem.confidence:.3f}") ``` ## Docs - [Configuration](http://localhost:8090/docs/libraries/som/configuration) ## Development ### Test Data - Place test screenshots in `examples/test_data/` - Not tracked in git to keep repository size manageable - Default test image: `test_screen.png` (1920x1080) ### Running Tests ```bash # Run benchmark with no OCR python examples/omniparser_examples.py examples/test_data/test_screen.png --runs 5 --ocr none # Run benchmark with OCR python examples/omniparser_examples.py examples/test_data/test_screen.png --runs 5 --ocr easyocr ``` ## License MIT License - See LICENSE file for details. ``` -------------------------------------------------------------------------------- /libs/lumier/README.md: -------------------------------------------------------------------------------- ```markdown <div align="center"> <h1> <div class="image-wrapper" style="display: inline-block;"> <picture> <source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="../../img/logo_white.png" style="display: block; margin: auto;"> <source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="../../img/logo_black.png" style="display: block; margin: auto;"> <img alt="Shows my svg"> </picture> </div> [](#) [](#) [](https://discord.com/invite/mVnXXpdE85) </h1> </div> macOS and Linux virtual machines in a Docker container. <div align="center"> <video src="https://github.com/user-attachments/assets/2ecca01c-cb6f-4c35-a5a7-69bc58bd94e2" width="800" controls></video> </div> ## What is Lumier? **Lumier** is an interface for running macOS virtual machines with minimal setup. It uses Docker as a packaging system to deliver a pre-configured environment that connects to the `lume` virtualization service running on your host machine. With Lumier, you get: - A ready-to-use macOS or Linux virtual machine in minutes - Browser-based VNC access to your VM - Easy file sharing between your host and VM - Simple configuration through environment variables ## Requirements Before using Lumier, make sure you have: 1. **Docker for Apple Silicon** - download it [here](https://desktop.docker.com/mac/main/arm64/Docker.dmg) and follow the installation instructions. 2. **Lume** - This is the virtualization CLI that powers Lumier. Install it with this command: ```bash /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" ``` ## Getting Started ```bash # Run the container with temporary storage (using pre-built image from Docker Hub) docker run -it --rm \ --name macos-vm \ -p 8006:8006 \ -e VM_NAME=macos-vm \ -e VERSION=ghcr.io/trycua/macos-sequoia-cua:latest \ -e CPU_CORES=4 \ -e RAM_SIZE=8192 \ trycua/lumier:latest ``` After running the command above, you can access your macOS VM through a web browser (e.g., http://localhost:8006). > **Note:** With the basic setup above, your VM will be reset when you stop the container (ephemeral mode). This means any changes you make inside the macOS VM will be lost. See [the documentation](https://trycua.com/docs/libraries/lumier/docker) for how to save your VM state. ## Docs - [Installation](https://trycua.com/docs/libraries/lumier/installation) - [Docker](https://trycua.com/docs/libraries/lumier/docker) - [Docker Compose](https://trycua.com/docs/libraries/lumier/docker-compose) - [Building Lumier](https://trycua.com/docs/libraries/lumier/building-lumier) ## Credits This project was inspired by [dockur/windows](https://github.com/dockur/windows) and [dockur/macos](https://github.com/dockur/macos), which pioneered the approach of running Windows and macOS VMs in Docker containers. Main differences with dockur/macos: - Lumier is specifically designed for macOS virtualization - Lumier supports Apple Silicon (M1/M2/M3/M4) while dockur/macos only supports Intel - Lumier uses the Apple Virtualization Framework (Vz) through the `lume` CLI to create true virtual machines, while dockur relies on KVM. - Image specification is different, with Lumier and Lume relying on Apple Vz spec (disk.img and nvram.bin) ``` -------------------------------------------------------------------------------- /libs/python/agent/README.md: -------------------------------------------------------------------------------- ```markdown <div align="center"> <h1> <div class="image-wrapper" style="display: inline-block;"> <picture> <source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="https://raw.githubusercontent.com/trycua/cua/main/img/logo_white.png" style="display: block; margin: auto;"> <source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="https://raw.githubusercontent.com/trycua/cua/main/img/logo_black.png" style="display: block; margin: auto;"> <img alt="Shows my svg"> </picture> </div> [](#) [](#) [](https://discord.com/invite/mVnXXpdE85) [](https://pypi.org/project/cua-computer/) </h1> </div> **cua-agent** is a general Computer-Use framework with liteLLM integration for running agentic workflows on macOS, Windows, and Linux sandboxes. It provides a unified interface for computer-use agents across multiple LLM providers with advanced callback system for extensibility. ## Features - **Safe Computer-Use/Tool-Use**: Using Computer SDK for sandboxed desktops - **Multi-Agent Support**: Anthropic Claude, OpenAI computer-use-preview, UI-TARS, Omniparser + any LLM - **Multi-API Support**: Take advantage of liteLLM supporting 100+ LLMs / model APIs, including local models (`huggingface-local/`, `ollama_chat/`, `mlx/`) - **Cross-Platform**: Works on Windows, macOS, and Linux with cloud and local computer instances - **Extensible Callbacks**: Built-in support for image retention, cache control, PII anonymization, budget limits, and trajectory tracking ## Install ```bash pip install "cua-agent[all]" ``` ## Quick Start ```python import asyncio import os from agent import ComputerAgent from computer import Computer async def main(): # Set up computer instance async with Computer( os_type="linux", provider_type="cloud", name=os.getenv("CUA_CONTAINER_NAME"), api_key=os.getenv("CUA_API_KEY") ) as computer: # Create agent agent = ComputerAgent( model="anthropic/claude-3-5-sonnet-20241022", tools=[computer], only_n_most_recent_images=3, trajectory_dir="trajectories", max_trajectory_budget=5.0 # $5 budget limit ) # Run agent messages = [{"role": "user", "content": "Take a screenshot and tell me what you see"}] async for result in agent.run(messages): for item in result["output"]: if item["type"] == "message": print(item["content"][0]["text"]) if __name__ == "__main__": asyncio.run(main()) ``` ## Docs - [Agent Loops](https://trycua.com/docs/agent-sdk/agent-loops) - [Supported Agents](https://trycua.com/docs/agent-sdk/supported-agents) - [Supported Models](https://trycua.com/docs/agent-sdk/supported-models) - [Chat History](https://trycua.com/docs/agent-sdk/chat-history) - [Callbacks](https://trycua.com/docs/agent-sdk/callbacks) - [Custom Tools](https://trycua.com/docs/agent-sdk/custom-tools) - [Custom Computer Handlers](https://trycua.com/docs/agent-sdk/custom-computer-handlers) - [Prompt Caching](https://trycua.com/docs/agent-sdk/prompt-caching) - [Usage Tracking](https://trycua.com/docs/agent-sdk/usage-tracking) - [Benchmarks](https://trycua.com/docs/agent-sdk/benchmarks) ## License MIT License - see LICENSE file for details. ``` -------------------------------------------------------------------------------- /libs/typescript/agent/README.md: -------------------------------------------------------------------------------- ```markdown # @trycua/agent TypeScript SDK for CUA agent interaction. Connect to CUA agent proxies via HTTP/HTTPS or peer-to-peer (WebRTC) connections. ## Installation ```bash npm install @trycua/agent # or pnpm add @trycua/agent # or yarn add @trycua/agent ``` ## Usage ### Basic Usage ```typescript import AgentClient from "@trycua/agent"; // Connect to local HTTP server const client = new AgentClient("https://localhost:8000"); // Connect to a cloud container (port 8443 over HTTPS) const cloud = new AgentClient( "https://m-linux-96lcxd2c2k.containers.cloud.trycua.com:8443", { apiKey: process.env.NEXT_PUBLIC_CUA_API_KEY || "" } ); // Connect to peer const peerClient = new AgentClient("peer://my-agent-proxy"); // Send a simple text request const response = await client.responses.create({ model: "anthropic/claude-3-5-sonnet-20241022", input: "Write a one-sentence bedtime story about a unicorn.", // Optional per-request env overrides env: { OPENAI_API_KEY: "sk-..." } }); console.log(response.output); ``` ### Multi-modal Requests ```typescript const response = await client.responses.create({ model: "anthropic/claude-3-5-sonnet-20241022", input: [ { role: "user", content: [ { type: "input_text", text: "What is in this image?" }, { type: "input_image", image_url: "https://example.com/image.jpg" } ] } ], env: { OPENROUTER_API_KEY: "sk-..." } }); ``` ### Advanced Configuration ```typescript const client = new AgentClient("https://localhost:8000", { timeout: 60000, // 60 second timeout retries: 5, // 5 retry attempts apiKey: "cua_...", // sent as X-API-Key header when using HTTP/HTTPS }); const response = await client.responses.create({ model: "anthropic/claude-3-5-sonnet-20241022", input: "Hello, world!", agent_kwargs: { save_trajectory: true, verbosity: 20 }, computer_kwargs: { os_type: "linux", provider_type: "cloud" }, // Per-request env overrides env: { ANTHROPIC_API_KEY: "sk-...", OPENROUTER_API_KEY: "sk-..." } }); ``` ### Health Check ```typescript const health = await client.health(); console.log(health.status); // 'healthy', 'unhealthy', 'unreachable', 'connected', 'disconnected' ``` ### Cleanup ```typescript // Clean up peer connections when done await client.disconnect(); ``` ## API Reference ### AgentClient #### Constructor ```typescript new AgentClient(url: string, options?: AgentClientOptions) ``` - `url`: Connection URL. Supports `http://`, `https://`, or `peer://` protocols - `options`: Optional configuration object #### Methods ##### responses.create(request: AgentRequest): Promise<AgentResponse> Send a request to the agent and get a response. ##### health(): Promise<{status: string}> Check the health/connection status of the agent. ##### disconnect(): Promise<void> Clean up resources and close connections. ### Types #### AgentRequest ```typescript interface AgentRequest { model: string; input: string | AgentMessage[]; agent_kwargs?: { save_trajectory?: boolean; verbosity?: number; [key: string]: any; }; computer_kwargs?: { os_type?: string; provider_type?: string; [key: string]: any; }; // Optional per-request environment overrides env?: Record<string, string>; } ``` #### AgentResponse ```typescript interface AgentResponse { output: AgentMessage[]; usage: Usage; } interface Usage { prompt_tokens: number; completion_tokens: number; total_tokens: number; response_cost: number; } ``` The `output` array contains the conversation history including: - User messages - Agent reasoning/thinking - Computer actions and their results - Final agent responses The `usage` object provides token counts and cost information for the request. ## Connection Types ### HTTP/HTTPS Connect to a CUA agent proxy server: ```typescript // Local const client = new AgentClient("https://my-agent-server.com:8000", { apiKey: "cua_..." }); // Cloud container (port 8443) const cloud = new AgentClient( "https://m-linux-96lcxd2c2k.containers.cloud.trycua.com:8443", { apiKey: "cua_..." } ); ``` Notes: - The client sends the API key as `X-API-Key` for HTTP/HTTPS connections. - Cloud containers listen on `:8443` with HTTPS. ### Peer-to-Peer (WebRTC) Connect directly to another peer using WebRTC: ```typescript const client = new AgentClient("peer://agent-proxy-peer-id"); ``` The client uses PeerJS with default configuration for peer connections. ## License MIT ``` -------------------------------------------------------------------------------- /.github/scripts/tests/README.md: -------------------------------------------------------------------------------- ```markdown # Tests for .github/scripts This directory contains comprehensive tests for the GitHub workflow scripts using Python's built-in testing framework. ## Requirements **No external dependencies required!** This test suite uses: - `unittest` - Python's built-in testing framework - `tomllib` - Python 3.11+ built-in TOML parser For Python < 3.11, the `toml` package is used as a fallback. ## Running Tests ### Run all tests ```bash cd .github/scripts/tests python3 -m unittest discover -v ``` ### Run a specific test file ```bash python3 -m unittest test_get_pyproject_version -v ``` ### Run a specific test class ```bash python3 -m unittest test_get_pyproject_version.TestGetPyprojectVersion -v ``` ### Run a specific test method ```bash python3 -m unittest test_get_pyproject_version.TestGetPyprojectVersion.test_matching_versions -v ``` ### Run tests directly from the test file ```bash python3 test_get_pyproject_version.py ``` ## Test Structure ### test_get_pyproject_version.py Comprehensive tests for `get_pyproject_version.py` covering: - ✅ **Version matching**: Tests successful version validation - ✅ **Version mismatch**: Tests error handling when versions don't match - ✅ **Missing version**: Tests handling of pyproject.toml without version field - ✅ **Missing project section**: Tests handling of pyproject.toml without project section - ✅ **File not found**: Tests handling of non-existent files - ✅ **Malformed TOML**: Tests handling of invalid TOML syntax - ✅ **Argument validation**: Tests proper argument count validation - ✅ **Semantic versioning**: Tests various semantic version formats - ✅ **Pre-release tags**: Tests versions with alpha, beta, rc tags - ✅ **Build metadata**: Tests versions with build metadata - ✅ **Edge cases**: Tests empty versions and other edge cases **Total Tests**: 17+ test cases covering all functionality ## Best Practices Implemented 1. **Fixture Management**: Uses `setUp()` and `tearDown()` for clean test isolation 2. **Helper Methods**: Provides reusable helpers for creating test fixtures 3. **Temporary Files**: Uses `tempfile` for file creation with proper cleanup 4. **Comprehensive Coverage**: Tests happy paths, error conditions, and edge cases 5. **Clear Documentation**: Each test has a descriptive docstring 6. **Output Capture**: Uses `unittest.mock.patch` and `StringIO` to test stdout/stderr 7. **Exit Code Validation**: Properly tests script exit codes with `assertRaises(SystemExit)` 8. **Type Hints**: Uses type hints in helper methods for clarity 9. **PEP 8 Compliance**: Follows Python style guidelines 10. **Zero External Dependencies**: Uses only Python standard library ## Continuous Integration These tests can be integrated into GitHub Actions workflows with no additional dependencies: ```yaml - name: Run .github scripts tests run: | cd .github/scripts/tests python3 -m unittest discover -v ``` ## Test Output Example ``` test_empty_version_string (test_get_pyproject_version.TestGetPyprojectVersion) Test handling of empty version string. ... ok test_file_not_found (test_get_pyproject_version.TestGetPyprojectVersion) Test handling of non-existent pyproject.toml file. ... ok test_malformed_toml (test_get_pyproject_version.TestGetPyprojectVersion) Test handling of malformed TOML file. ... ok test_matching_versions (test_get_pyproject_version.TestGetPyprojectVersion) Test that matching versions result in success. ... ok test_missing_project_section (test_get_pyproject_version.TestGetPyprojectVersion) Test handling of pyproject.toml without a project section. ... ok test_missing_version_field (test_get_pyproject_version.TestGetPyprojectVersion) Test handling of pyproject.toml without a version field. ... ok test_no_arguments (test_get_pyproject_version.TestGetPyprojectVersion) Test that providing no arguments results in usage error. ... ok test_semantic_version_0_0_1 (test_get_pyproject_version.TestGetPyprojectVersion) Test semantic version 0.0.1. ... ok test_semantic_version_1_0_0 (test_get_pyproject_version.TestGetPyprojectVersion) Test semantic version 1.0.0. ... ok test_semantic_version_10_20_30 (test_get_pyproject_version.TestGetPyprojectVersion) Test semantic version 10.20.30. ... ok test_semantic_version_alpha (test_get_pyproject_version.TestGetPyprojectVersion) Test semantic version with alpha tag. ... ok test_semantic_version_beta (test_get_pyproject_version.TestGetPyprojectVersion) Test semantic version with beta tag. ... ok test_semantic_version_rc_with_build (test_get_pyproject_version.TestGetPyprojectVersion) Test semantic version with rc and build metadata. ... ok test_too_few_arguments (test_get_pyproject_version.TestGetPyprojectVersion) Test that providing too few arguments results in usage error. ... ok test_too_many_arguments (test_get_pyproject_version.TestGetPyprojectVersion) Test that providing too many arguments results in usage error. ... ok test_version_mismatch (test_get_pyproject_version.TestGetPyprojectVersion) Test that mismatched versions result in failure with appropriate error message. ... ok test_version_with_build_metadata (test_get_pyproject_version.TestGetPyprojectVersion) Test matching versions with build metadata. ... ok test_version_with_prerelease_tags (test_get_pyproject_version.TestGetPyprojectVersion) Test matching versions with pre-release tags like alpha, beta, rc. ... ok ---------------------------------------------------------------------- Ran 18 tests in 0.XXXs OK ``` ``` -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- ```markdown <div align="center"> <picture> <source media="(prefers-color-scheme: dark)" alt="Cua logo" height="150" srcset="img/logo_white.png"> <source media="(prefers-color-scheme: light)" alt="Cua logo" height="150" srcset="img/logo_black.png"> <img alt="Cua logo" height="150" src="img/logo_black.png"> </picture> [](#) [](#) [](#) [](https://discord.com/invite/mVnXXpdE85) <br> <a href="https://trendshift.io/repositories/13685" target="_blank"><img src="https://trendshift.io/api/badge/repositories/13685" alt="trycua%2Fcua | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a> </div> > We’re hosting the **Computer-Use Agents SOTA Challenge** at [Hack the North](https://hackthenorth.com) and online! >> **Track A (On-site @ UWaterloo)**: Reserved for participants accepted to Hack the North. 🏆 Prize: **YC interview guaranteed**. >> **Track B (Remote)**: Open to everyone worldwide. 🏆 Prize: **Cash award**. >>> 👉 Sign up here: [trycua.com/hackathon](https://www.trycua.com/hackathon) **cua** ("koo-ah") is Docker for [Computer-Use Agents](https://www.oneusefulthing.org/p/when-you-give-a-claude-a-mouse) - it enables AI agents to control full operating systems in virtual containers and deploy them locally or to the cloud. <div align="center"> <video src="https://github.com/user-attachments/assets/c619b4ea-bb8e-4382-860e-f3757e36af20" width="600" controls></video> </div> With the Computer SDK, you can: - automate Windows, Linux, and macOS VMs with a consistent, [pyautogui-like API](https://docs.trycua.com/docs/libraries/computer#interface-actions) - create & manage VMs [locally](https://docs.trycua.com/docs/computer-sdk/computers#cua-local-containers) or using [cua cloud](https://www.trycua.com/) With the Agent SDK, you can: - run computer-use models with a [consistent schema](https://docs.trycua.com/docs/agent-sdk/message-format) - benchmark on OSWorld-Verified, SheetBench-V2, and more [with a single line of code using HUD](https://docs.trycua.com/docs/agent-sdk/integrations/hud) ([Notebook](https://github.com/trycua/cua/blob/main/notebooks/eval_osworld.ipynb)) - combine UI grounding models with any LLM using [composed agents](https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents) - use new UI agent models and UI grounding models from the Model Zoo below with just a model string (e.g., `ComputerAgent(model="openai/computer-use-preview")`) - use API or local inference by changing a prefix (e.g., `openai/`, `openrouter/`, `ollama/`, `huggingface-local/`, `mlx/`, [etc.](https://docs.litellm.ai/docs/providers)) ### CUA Model Zoo 🐨 | [All-in-one CUAs](https://docs.trycua.com/docs/agent-sdk/supported-agents/computer-use-agents) | [UI Grounding Models](https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents) | [UI Planning Models](https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents) | |---|---|---| | `anthropic/claude-sonnet-4-5-20250929` | `huggingface-local/xlangai/OpenCUA-{7B,32B}` | any all-in-one CUA | | `openai/computer-use-preview` | `huggingface-local/HelloKKMe/GTA1-{7B,32B,72B}` | any VLM (using liteLLM, requires `tools` parameter) | | `openrouter/z-ai/glm-4.5v` | `huggingface-local/Hcompany/Holo1.5-{3B,7B,72B}` | any LLM (using liteLLM, requires `moondream3+` prefix ) | | `huggingface-local/OpenGVLab/InternVL3_5-{1B,2B,4B,8B,...}` | any all-in-one CUA | | | `huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B` | | | `moondream3+{ui planning}` (supports text-only models) | | | `omniparser+{ui planning}` | | | | `{ui grounding}+{ui planning}` | | | - `human/human` → [Human-in-the-Loop](https://docs.trycua.com/docs/agent-sdk/supported-agents/human-in-the-loop) Missing a model? [Raise a feature request](https://github.com/trycua/cua/issues/new?assignees=&labels=enhancement&projects=&title=%5BAgent%5D%3A+Add+model+support+for+) or [contribute](https://github.com/trycua/cua/blob/main/CONTRIBUTING.md)! <br/> # Quick Start - [Get started with a Computer-Use Agent UI](https://docs.trycua.com/docs/quickstart-ui) - [Get started with the Computer-Use Agent CLI](https://docs.trycua.com/docs/quickstart-cli) - [Get started with the Python SDKs](https://docs.trycua.com/docs/quickstart-devs) <br/> # Usage ([Docs](https://docs.trycua.com/docs)) ```bash pip install cua-agent[all] ``` ```python from agent import ComputerAgent agent = ComputerAgent( model="anthropic/claude-3-5-sonnet-20241022", tools=[computer], max_trajectory_budget=5.0 ) messages = [{"role": "user", "content": "Take a screenshot and tell me what you see"}] async for result in agent.run(messages): for item in result["output"]: if item["type"] == "message": print(item["content"][0]["text"]) ``` ### Output format (OpenAI Agent Responses Format): ```json { "output": [ # user input { "role": "user", "content": "go to trycua on gh" }, # first agent turn adds the model output to the history { "summary": [ { "text": "Searching Firefox for Trycua GitHub", "type": "summary_text" } ], "type": "reasoning" }, { "action": { "text": "Trycua GitHub", "type": "type" }, "call_id": "call_QI6OsYkXxl6Ww1KvyJc4LKKq", "status": "completed", "type": "computer_call" }, # second agent turn adds the computer output to the history { "type": "computer_call_output", "call_id": "call_QI6OsYkXxl6Ww1KvyJc4LKKq", "output": { "type": "input_image", "image_url": "data:image/png;base64,..." } }, # final agent turn adds the agent output text to the history { "type": "message", "role": "assistant", "content": [ { "text": "Success! The Trycua GitHub page has been opened.", "type": "output_text" } ] } ], "usage": { "prompt_tokens": 150, "completion_tokens": 75, "total_tokens": 225, "response_cost": 0.01, } } ``` # Computer ([Docs](https://docs.trycua.com/docs/computer-sdk/computers)) ```bash pip install cua-computer[all] ``` ```python from computer import Computer async with Computer( os_type="linux", provider_type="cloud", name="your-sandbox-name", api_key="your-api-key" ) as computer: # Take screenshot screenshot = await computer.interface.screenshot() # Click and type await computer.interface.left_click(100, 100) await computer.interface.type("Hello!") ``` # Resources - [How to use the MCP Server with Claude Desktop or other MCP clients](./libs/python/mcp-server/README.md) - One of the easiest ways to get started with Cua - [How to use OpenAI Computer-Use, Anthropic, OmniParser, or UI-TARS for your Computer-Use Agent](./libs/python/agent/README.md) - [How to use Lume CLI for managing desktops](./libs/lume/README.md) - [Training Computer-Use Models: Collecting Human Trajectories with Cua (Part 1)](https://www.trycua.com/blog/training-computer-use-models-trajectories-1) ## Modules | Module | Description | Installation | |--------|-------------|---------------| | [**Lume**](./libs/lume/README.md) | VM management for macOS/Linux using Apple's Virtualization.Framework | `curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh \| bash` | | [**Lumier**](./libs/lumier/README.md) | Docker interface for macOS and Linux VMs | `docker pull trycua/lumier:latest` | | [**Computer (Python)**](./libs/python/computer/README.md) | Python Interface for controlling virtual machines | `pip install "cua-computer[all]"` | | [**Computer (Typescript)**](./libs/typescript/computer/README.md) | Typescript Interface for controlling virtual machines | `npm install @trycua/computer` | | [**Agent**](./libs/python/agent/README.md) | AI agent framework for automating tasks | `pip install "cua-agent[all]"` | | [**MCP Server**](./libs/python/mcp-server/README.md) | MCP server for using CUA with Claude Desktop | `pip install cua-mcp-server` | | [**SOM**](./libs/python/som/README.md) | Self-of-Mark library for Agent | `pip install cua-som` | | [**Computer Server**](./libs/python/computer-server/README.md) | Server component for Computer | `pip install cua-computer-server` | | [**Core (Python)**](./libs/python/core/README.md) | Python Core utilities | `pip install cua-core` | | [**Core (Typescript)**](./libs/typescript/core/README.md) | Typescript Core utilities | `npm install @trycua/core` | ## Community Join our [Discord community](https://discord.com/invite/mVnXXpdE85) to discuss ideas, get assistance, or share your demos! ## License Cua is open-sourced under the MIT License - see the [LICENSE](LICENSE.md) file for details. Portions of this project, specifically components adapted from Kasm Technologies Inc., are also licensed under the MIT License. See [libs/kasm/LICENSE](libs/kasm/LICENSE) for details. Microsoft's OmniParser, which is used in this project, is licensed under the Creative Commons Attribution 4.0 International License (CC-BY-4.0). See the [OmniParser LICENSE](https://github.com/microsoft/OmniParser/blob/master/LICENSE) for details. ### Third-Party Licenses and Optional Components Some optional extras for this project depend on third-party packages that are licensed under terms different from the MIT License. - The optional "omni" extra (installed via `pip install "cua-agent[omni]"`) installs the `cua-som` module, which includes `ultralytics` and is licensed under the AGPL-3.0. When you choose to install and use such optional extras, your use, modification, and distribution of those third-party components are governed by their respective licenses (e.g., AGPL-3.0 for `ultralytics`). ## Contributing We welcome contributions to Cua! Please refer to our [Contributing Guidelines](CONTRIBUTING.md) for details. ## Trademarks Apple, macOS, and Apple Silicon are trademarks of Apple Inc. Ubuntu and Canonical are registered trademarks of Canonical Ltd. Microsoft is a registered trademark of Microsoft Corporation. This project is not affiliated with, endorsed by, or sponsored by Apple Inc., Canonical Ltd., Microsoft Corporation, or Kasm Technologies. ## Stargazers Thank you to all our supporters! [](https://starchart.cc/trycua/cua) ## Sponsors Thank you to all our [GitHub Sponsors](https://github.com/sponsors/trycua)! <img width="300" alt="coderabbit-cli" src="https://github.com/user-attachments/assets/23a98e38-7897-4043-8ef7-eb990520dccc" /> ``` -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- ```markdown MIT License Copyright (c) 2025 Cua AI, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ``` -------------------------------------------------------------------------------- /libs/lume/CONTRIBUTING.md: -------------------------------------------------------------------------------- ```markdown # Contributing to lume We deeply appreciate your interest in contributing to lume! Whether you're reporting bugs, suggesting enhancements, improving docs, or submitting pull requests, your contributions help improve the project for everyone. ## Reporting Bugs If you've encountered a bug in the project, we encourage you to report it. Please follow these steps: 1. **Check the Issue Tracker**: Before submitting a new bug report, please check our issue tracker to see if the bug has already been reported. 2. **Create a New Issue**: If the bug hasn't been reported, create a new issue with: - A clear title and detailed description - Steps to reproduce the issue - Expected vs actual behavior - Your environment (macOS version, lume version) - Any relevant logs or error messages 3. **Label Your Issue**: Label your issue as a `bug` to help maintainers identify it quickly. ## Suggesting Enhancements We're always looking for suggestions to make lume better. If you have an idea: 1. **Check Existing Issues**: See if someone else has already suggested something similar. 2. **Create a New Issue**: If your enhancement is new, create an issue describing: - The problem your enhancement solves - How your enhancement would work - Any potential implementation details - Why this enhancement would benefit lume users ## Documentation Documentation improvements are always welcome. You can: - Fix typos or unclear explanations - Add examples and use cases - Improve API documentation - Add tutorials or guides For detailed instructions on setting up your development environment and submitting code contributions, please see our [Development.md](docs/Development.md) guide. Feel free to join our [Discord community](https://discord.com/invite/mVnXXpdE85) to discuss ideas or get help with your contributions. ``` -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- ```markdown # Contributing to cua We deeply appreciate your interest in contributing to cua! Whether you're reporting bugs, suggesting enhancements, improving docs, or submitting pull requests, your contributions help improve the project for everyone. ## Reporting Bugs If you've encountered a bug in the project, we encourage you to report it. Please follow these steps: 1. **Check the Issue Tracker**: Before submitting a new bug report, please check our issue tracker to see if the bug has already been reported. 2. **Create a New Issue**: If the bug hasn't been reported, create a new issue with: - A clear title and detailed description - Steps to reproduce the issue - Expected vs actual behavior - Your environment (macOS version, lume version) - Any relevant logs or error messages 3. **Label Your Issue**: Label your issue as a `bug` to help maintainers identify it quickly. ## Suggesting Enhancements We're always looking for suggestions to make lume better. If you have an idea: 1. **Check Existing Issues**: See if someone else has already suggested something similar. 2. **Create a New Issue**: If your enhancement is new, create an issue describing: - The problem your enhancement solves - How your enhancement would work - Any potential implementation details - Why this enhancement would benefit lume users ## Code Formatting We follow strict code formatting guidelines to ensure consistency across the codebase. Before submitting any code: 1. **Review Our Format Guide**: Please review our [Code Formatting Standards](Development.md#code-formatting-standards) section in the Getting Started guide. 2. **Configure Your IDE**: We recommend using the workspace settings provided in `.vscode/` for automatic formatting. 3. **Run Formatting Tools**: Always run the formatting tools before submitting a PR: ```bash # For Python code pdm run black . pdm run ruff check --fix . ``` 4. **Validate Your Code**: Ensure your code passes all checks: ```bash pdm run mypy . ``` ## Documentation Documentation improvements are always welcome. You can: - Fix typos or unclear explanations - Add examples and use cases - Improve API documentation - Add tutorials or guides For detailed instructions on setting up your development environment and submitting code contributions, please see our [Developer-Guide](Development.md). Feel free to join our [Discord community](https://discord.com/invite/mVnXXpdE85) to discuss ideas or get help with your contributions. ``` -------------------------------------------------------------------------------- /libs/python/computer-server/computer_server/diorama/__init__.py: -------------------------------------------------------------------------------- ```python ``` -------------------------------------------------------------------------------- /libs/typescript/core/src/telemetry/clients/index.ts: -------------------------------------------------------------------------------- ```typescript export * from './posthog'; ``` -------------------------------------------------------------------------------- /libs/python/computer/poetry.toml: -------------------------------------------------------------------------------- ```toml [virtualenvs] in-project = true ``` -------------------------------------------------------------------------------- /libs/python/core/poetry.toml: -------------------------------------------------------------------------------- ```toml [virtualenvs] in-project = true ``` -------------------------------------------------------------------------------- /libs/python/som/poetry.toml: -------------------------------------------------------------------------------- ```toml [virtualenvs] in-project = true ``` -------------------------------------------------------------------------------- /.github/scripts/tests/__init__.py: -------------------------------------------------------------------------------- ```python """Tests for .github/scripts.""" ``` -------------------------------------------------------------------------------- /libs/python/computer/computer/ui/__init__.py: -------------------------------------------------------------------------------- ```python """UI modules for the Computer Interface.""" ``` -------------------------------------------------------------------------------- /libs/typescript/pnpm-workspace.yaml: -------------------------------------------------------------------------------- ```yaml packages: - "computer" - "core" - "agent" ``` -------------------------------------------------------------------------------- /libs/typescript/computer/src/computer/providers/index.ts: -------------------------------------------------------------------------------- ```typescript export * from './base'; export * from './cloud'; ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/integrations/meta.json: -------------------------------------------------------------------------------- ```json { "title": "Integrations", "pages": ["hud"] } ``` -------------------------------------------------------------------------------- /libs/python/computer-server/examples/__init__.py: -------------------------------------------------------------------------------- ```python """ Examples package for the CUA Computer API. """ ``` -------------------------------------------------------------------------------- /libs/typescript/computer/src/computer/index.ts: -------------------------------------------------------------------------------- ```typescript export { BaseComputer, CloudComputer } from './providers'; ``` -------------------------------------------------------------------------------- /libs/python/agent/benchmarks/models/__init__.py: -------------------------------------------------------------------------------- ```python from .base import ModelProtocol __all__ = ["ModelProtocol"] ``` -------------------------------------------------------------------------------- /tests/pytest.ini: -------------------------------------------------------------------------------- ``` [pytest] asyncio_mode = auto markers = asyncio: asyncio mark ``` -------------------------------------------------------------------------------- /docs/postcss.config.mjs: -------------------------------------------------------------------------------- ``` export default { plugins: { '@tailwindcss/postcss': {}, }, }; ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/ui/__main__.py: -------------------------------------------------------------------------------- ```python from .gradio import launch_ui if __name__ == "__main__": launch_ui() ``` -------------------------------------------------------------------------------- /libs/python/core/core/__init__.py: -------------------------------------------------------------------------------- ```python """Core functionality shared across Cua components.""" __version__ = "0.1.8" ``` -------------------------------------------------------------------------------- /libs/typescript/core/vitest.config.ts: -------------------------------------------------------------------------------- ```typescript import { defineConfig } from 'vitest/config'; export default defineConfig({}); ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/lumier/meta.json: -------------------------------------------------------------------------------- ```json { "pages": [ "installation", "docker", "docker-compose", "building-lumier" ] } ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/lume/meta.json: -------------------------------------------------------------------------------- ```json { "pages": [ "installation", "prebuilt-images", "cli-reference", "http-api", "faq" ] } ``` -------------------------------------------------------------------------------- /docs/src/app/global.css: -------------------------------------------------------------------------------- ```css @import 'tailwindcss'; @import 'fumadocs-ui/css/neutral.css'; @import 'fumadocs-ui/css/preset.css'; ``` -------------------------------------------------------------------------------- /libs/lume/src/Commands/Options/FormatOption.swift: -------------------------------------------------------------------------------- ```swift import ArgumentParser enum FormatOption: String, ExpressibleByArgument { case json case text } ``` -------------------------------------------------------------------------------- /libs/lume/src/ContainerRegistry/ImageList.swift: -------------------------------------------------------------------------------- ```swift public struct ImageList: Codable { public let local: [String] public let remote: [String] } ``` -------------------------------------------------------------------------------- /libs/python/computer/computer/ui/gradio/__init__.py: -------------------------------------------------------------------------------- ```python """Gradio UI for Computer UI.""" import gradio as gr from typing import Optional from .app import create_gradio_ui ``` -------------------------------------------------------------------------------- /libs/typescript/computer/tests/setup.ts: -------------------------------------------------------------------------------- ```typescript import { afterAll, afterEach, beforeAll } from 'vitest'; beforeAll(() => {}); afterAll(() => {}); afterEach(() => {}); ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/mcp-server/meta.json: -------------------------------------------------------------------------------- ```json { "pages": [ "installation", "configuration", "usage", "tools", "client-integrations", "llm-integrations" ] } ``` -------------------------------------------------------------------------------- /libs/typescript/agent/vitest.config.ts: -------------------------------------------------------------------------------- ```typescript import { defineConfig } from 'vitest/config' export default defineConfig({ test: { environment: 'happy-dom', }, }) ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/ui/__init__.py: -------------------------------------------------------------------------------- ```python """ UI components for agent """ from .gradio import launch_ui, create_gradio_ui __all__ = ["launch_ui", "create_gradio_ui"] ``` -------------------------------------------------------------------------------- /libs/python/mcp-server/mcp_server/__main__.py: -------------------------------------------------------------------------------- ```python #!/usr/bin/env python """Entry point for the MCP server module.""" from .server import main if __name__ == "__main__": main() ``` -------------------------------------------------------------------------------- /libs/python/computer/computer/providers/cloud/__init__.py: -------------------------------------------------------------------------------- ```python """CloudProvider module for interacting with cloud-based virtual machines.""" from .provider import CloudProvider __all__ = ["CloudProvider"] ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/ui/gradio/__init__.py: -------------------------------------------------------------------------------- ```python """ Gradio UI for agent """ from .app import launch_ui from .ui_components import create_gradio_ui __all__ = ["launch_ui", "create_gradio_ui"] ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/benchmarks/meta.json: -------------------------------------------------------------------------------- ```json { "pages": [ "introduction", "screenspot-v2", "screenspot-pro", "interactive", "osworld-verified" ] } ``` -------------------------------------------------------------------------------- /libs/typescript/core/tsdown.config.ts: -------------------------------------------------------------------------------- ```typescript import { defineConfig } from 'tsdown'; export default defineConfig([ { entry: ['./src/index.ts'], platform: 'node', dts: true, }, ]); ``` -------------------------------------------------------------------------------- /libs/typescript/computer/src/types.ts: -------------------------------------------------------------------------------- ```typescript export enum OSType { MACOS = 'macos', WINDOWS = 'windows', LINUX = 'linux', } export interface ScreenSize { width: number; height: number; } ``` -------------------------------------------------------------------------------- /docs/src/app/api/search/route.ts: -------------------------------------------------------------------------------- ```typescript import { source } from '@/lib/source'; import { createFromSource } from 'fumadocs-core/search/server'; export const { GET } = createFromSource(source); ``` -------------------------------------------------------------------------------- /libs/lume/src/Utils/String.swift: -------------------------------------------------------------------------------- ```swift import Foundation extension String { func padding(_ toLength: Int) -> String { return self.padding(toLength: toLength, withPad: " ", startingAt: 0) } } ``` -------------------------------------------------------------------------------- /libs/typescript/computer/src/index.ts: -------------------------------------------------------------------------------- ```typescript // Export classes export { CloudComputer as Computer } from './computer'; //todo: figure out what types to export and how to do that // export { OSType } from './types'; ``` -------------------------------------------------------------------------------- /libs/lume/src/Utils/CommandUtils.swift: -------------------------------------------------------------------------------- ```swift import ArgumentParser import Foundation func completeVMName(_ arguments: [String]) -> [String] { (try? Home().getAllVMDirectories().map { $0.directory.name }) ?? [] } ``` -------------------------------------------------------------------------------- /libs/typescript/computer/vitest.config.ts: -------------------------------------------------------------------------------- ```typescript import { defineConfig } from 'vitest/config'; export default defineConfig({ test: { setupFiles: ['./tests/setup.ts'], environment: 'node', globals: true, }, }); ``` -------------------------------------------------------------------------------- /libs/typescript/core/src/index.ts: -------------------------------------------------------------------------------- ```typescript /** * This module provides the core telemetry functionality for CUA libraries. * * It provides a low-overhead way to collect anonymous usage data. */ export * from './telemetry'; ``` -------------------------------------------------------------------------------- /libs/typescript/computer/tsdown.config.ts: -------------------------------------------------------------------------------- ```typescript import { defineConfig } from 'tsdown'; export default defineConfig([ { entry: ['./src/index.ts'], platform: 'node', dts: true, external: ['child_process', 'util'], }, ]); ``` -------------------------------------------------------------------------------- /libs/python/computer/computer/providers/lume/__init__.py: -------------------------------------------------------------------------------- ```python """Lume VM provider implementation.""" try: from .provider import LumeProvider HAS_LUME = True __all__ = ["LumeProvider"] except ImportError: HAS_LUME = False __all__ = [] ``` -------------------------------------------------------------------------------- /libs/python/pylume/__init__.py: -------------------------------------------------------------------------------- ```python """ PyLume Python SDK - A client library for managing macOS VMs with PyLume. """ from pylume.pylume import * from pylume.models import * from pylume.exceptions import * __version__ = "0.1.0" ``` -------------------------------------------------------------------------------- /libs/python/computer/computer/providers/__init__.py: -------------------------------------------------------------------------------- ```python """Provider implementations for different VM backends.""" # Import specific providers only when needed to avoid circular imports __all__ = [] # Let each provider module handle its own exports ``` -------------------------------------------------------------------------------- /docs/content/docs/computer-sdk/meta.json: -------------------------------------------------------------------------------- ```json { "title": "Computer SDK", "description": "Build computer-using agents with the Computer SDK", "pages": [ "computers", "commands", "computer-ui", "sandboxed-python" ] } ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/callbacks/meta.json: -------------------------------------------------------------------------------- ```json { "title": "Callbacks", "description": "Extending agents with callback hooks and built-in handlers", "pages": [ "agent-lifecycle", "trajectories", "logging", "cost-saving", "pii-anonymization" ] } ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/supported-agents/meta.json: -------------------------------------------------------------------------------- ```json { "title": "Supported Agents", "description": "Models and configurations supported by the Agent SDK", "pages": [ "computer-use-agents", "grounding-models", "composed-agents", "human-in-the-loop" ] } ``` -------------------------------------------------------------------------------- /libs/python/computer/computer/providers/lumier/__init__.py: -------------------------------------------------------------------------------- ```python """Lumier VM provider implementation.""" try: # Use the same import approach as in the Lume provider from .provider import LumierProvider HAS_LUMIER = True except ImportError: HAS_LUMIER = False ``` -------------------------------------------------------------------------------- /libs/typescript/core/src/telemetry/index.ts: -------------------------------------------------------------------------------- ```typescript /** * This module provides the core telemetry functionality for CUA libraries. * * It provides a low-overhead way to collect anonymous usage data. */ export { PostHogTelemetryClient as Telemetry } from './clients'; ``` -------------------------------------------------------------------------------- /libs/python/computer-server/computer_server/__main__.py: -------------------------------------------------------------------------------- ```python """ Main entry point for running the Computer Server as a module. This allows the server to be started with `python -m computer_server`. """ import sys from .cli import main if __name__ == "__main__": sys.exit(main()) ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/mcp-server/tools.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Tools --- ## Available Tools The MCP server exposes the following tools to Claude: 1. `run_cua_task` - Run a single Computer-Use Agent task with the given instruction 2. `run_multi_cua_tasks` - Run multiple tasks in sequence ``` -------------------------------------------------------------------------------- /libs/python/computer-server/computer_server/diorama/base.py: -------------------------------------------------------------------------------- ```python class BaseDioramaHandler: """Base Diorama handler for unsupported OSes.""" async def diorama_cmd(self, action: str, arguments: dict = None) -> dict: return {"success": False, "error": "Diorama is not supported on this OS yet."} ``` -------------------------------------------------------------------------------- /libs/python/computer/computer/providers/winsandbox/__init__.py: -------------------------------------------------------------------------------- ```python """Windows Sandbox provider for CUA Computer.""" try: import winsandbox HAS_WINSANDBOX = True except ImportError: HAS_WINSANDBOX = False from .provider import WinSandboxProvider __all__ = ["WinSandboxProvider", "HAS_WINSANDBOX"] ``` -------------------------------------------------------------------------------- /libs/python/computer/computer/interface/__init__.py: -------------------------------------------------------------------------------- ```python """ Interface package for Computer SDK. """ from .factory import InterfaceFactory from .base import BaseComputerInterface from .macos import MacOSComputerInterface __all__ = [ "InterfaceFactory", "BaseComputerInterface", "MacOSComputerInterface", ] ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/benchmarks/osworld-verified.mdx: -------------------------------------------------------------------------------- ```markdown --- title: OSWorld-Verified description: Benchmark ComputerAgent on OSWorld tasks using HUD --- OSWorld-Verified is a curated subset of OSWorld tasks that can be run using the HUD framework. Use [ComputerAgent with HUD](../integrations/hud) to benchmark on these tasks. ``` -------------------------------------------------------------------------------- /libs/xfce/src/scripts/start-computer-server.sh: -------------------------------------------------------------------------------- ```bash #!/bin/bash set -e # Wait for X server to be ready echo "Waiting for X server to start..." while ! xdpyinfo -display :1 >/dev/null 2>&1; do sleep 1 done echo "X server is ready" # Start computer-server export DISPLAY=:1 python3 -m computer_server --port ${API_PORT:-8000} ``` -------------------------------------------------------------------------------- /libs/python/computer/computer/ui/__main__.py: -------------------------------------------------------------------------------- ```python """ Main entry point for computer.ui module. This allows running the computer UI with: python -m computer.ui Instead of: python -m computer.ui.gradio.app """ from .gradio.app import create_gradio_ui if __name__ == "__main__": app = create_gradio_ui() app.launch() ``` -------------------------------------------------------------------------------- /libs/xfce/src/scripts/start-novnc.sh: -------------------------------------------------------------------------------- ```bash #!/bin/bash set -e # Give VNC a moment to start (supervisor starts it with priority 10, this is priority 20) echo "Waiting for VNC server to start..." sleep 5 # Start noVNC cd /opt/noVNC /opt/noVNC/utils/novnc_proxy \ --vnc localhost:${VNC_PORT:-5901} \ --listen ${NOVNC_PORT:-6901} ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/adapters/__init__.py: -------------------------------------------------------------------------------- ```python """ Adapters package for agent - Custom LLM adapters for LiteLLM """ from .huggingfacelocal_adapter import HuggingFaceLocalAdapter from .human_adapter import HumanAdapter from .mlxvlm_adapter import MLXVLMAdapter __all__ = [ "HuggingFaceLocalAdapter", "HumanAdapter", "MLXVLMAdapter", ] ``` -------------------------------------------------------------------------------- /docs/src/app/llms.txt/route.ts: -------------------------------------------------------------------------------- ```typescript import { source } from '@/lib/source'; import { getLLMText } from '@/lib/llms'; // cached forever export const revalidate = false; export async function GET() { const scan = source.getPages().map(getLLMText); const scanned = await Promise.all(scan); return new Response(scanned.join('\n\n')); } ``` -------------------------------------------------------------------------------- /libs/xfce/src/scripts/xstartup.sh: -------------------------------------------------------------------------------- ```bash #!/bin/bash set -e # Start D-Bus if [ -z "$DBUS_SESSION_BUS_ADDRESS" ]; then eval $(dbus-launch --sh-syntax --exit-with-session) fi # Start XFCE startxfce4 & # Wait for XFCE to start sleep 2 # Disable screensaver and power management xset s off xset -dpms xset s noblank # Wait for the session wait ``` -------------------------------------------------------------------------------- /libs/typescript/computer/src/interface/index.ts: -------------------------------------------------------------------------------- ```typescript export { BaseComputerInterface } from './base'; export type { MouseButton, CursorPosition, AccessibilityNode } from './base'; export { InterfaceFactory } from './factory'; export { MacOSComputerInterface } from './macos'; export { LinuxComputerInterface } from './linux'; export { WindowsComputerInterface } from './windows'; ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/mcp-server/index.mdx: -------------------------------------------------------------------------------- ```markdown --- title: MCP Server description: Reference for the current version of the MCP Server library. pypi: cua-mcp-server github: - https://github.com/trycua/cua/tree/main/libs/python/mcp-server --- **cua-mcp-server** is a MCP server for the Computer-Use Agent (CUA), allowing you to run CUA through Claude Desktop or other MCP clients. ``` -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- ```json { "name": "Cua - OSS", "build": { "dockerfile": "../Dockerfile" }, "containerEnv": { "DISPLAY": "", "PYLUME_HOST": "host.docker.internal" }, "forwardPorts": [7860], "portsAttributes": { "7860": { "label": "Cua web client (Gradio)", "onAutoForward": "silent" } }, "postCreateCommand": "/bin/bash .devcontainer/post-install.sh" } ``` -------------------------------------------------------------------------------- /libs/python/core/core/telemetry/__init__.py: -------------------------------------------------------------------------------- ```python """This module provides the core telemetry functionality for CUA libraries. It provides a low-overhead way to collect anonymous usage data. """ from core.telemetry.posthog import ( record_event, is_telemetry_enabled, destroy_telemetry_client, ) __all__ = [ "record_event", "is_telemetry_enabled", "destroy_telemetry_client", ] ``` -------------------------------------------------------------------------------- /libs/typescript/agent/tsdown.config.ts: -------------------------------------------------------------------------------- ```typescript import { defineConfig } from "tsdown"; export default defineConfig({ entry: ["src/index.ts"], format: ["module"], platform: "browser", dts: true, clean: true, // Remove if we don't need to support including the library via '<script/>' tags. // noExternal bundles this list of libraries within the final 'dist' noExternal: ['peerjs'] }); ``` -------------------------------------------------------------------------------- /docs/content/docs/meta.json: -------------------------------------------------------------------------------- ```json { "title": "Home", "description": "Documentation Home", "root": true, "defaultOpen": true, "pages": [ "index", "quickstart-devs", "quickstart-cli", "telemetry", "---[BookCopy]Computer Playbook---", "...computer-sdk", "---[BookCopy]Agent Playbook---", "...agent-sdk", "---[CodeXml]API Reference---", "...libraries" ] } ``` -------------------------------------------------------------------------------- /docs/src/app/(home)/layout.tsx: -------------------------------------------------------------------------------- ```typescript import { baseOptions } from '@/app/layout.config'; import { source } from '@/lib/source'; import { DocsLayout } from 'fumadocs-ui/layouts/docs'; import type { ReactNode } from 'react'; export default function Layout({ children }: { children: ReactNode }) { return ( <DocsLayout tree={source.pageTree} {...baseOptions}> {children} </DocsLayout> ); } ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/core/index.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Core description: Reference for the current version of the Core library. pypi: cua-core npm: '@trycua/core' github: - https://github.com/trycua/cua/tree/main/libs/python/core - https://github.com/trycua/cua/tree/main/libs/typescript/core --- ## Overview The Core library provides foundational utilities and shared functionality across the CUA ecosystem. ``` -------------------------------------------------------------------------------- /libs/python/computer-server/computer_server/__init__.py: -------------------------------------------------------------------------------- ```python """ Computer API package. Provides a server interface for the Computer API. """ from __future__ import annotations __version__: str = "0.1.0" # Explicitly export Server for static type checkers from .server import Server as Server # noqa: F401 __all__ = ["Server", "run_cli"] def run_cli() -> None: """Entry point for CLI""" from .cli import main main() ``` -------------------------------------------------------------------------------- /libs/python/computer-server/run_server.py: -------------------------------------------------------------------------------- ```python #!/usr/bin/env python """ Entrypoint script for the Computer Server. This script provides a simple way to start the Computer Server from the command line or using a launch configuration in an IDE. Usage: python run_server.py [--host HOST] [--port PORT] [--log-level LEVEL] """ import sys from computer_server.cli import main if __name__ == "__main__": sys.exit(main()) ``` -------------------------------------------------------------------------------- /libs/typescript/computer/tests/computer/cloud.test.ts: -------------------------------------------------------------------------------- ```typescript import { describe, expect, it } from 'vitest'; import { Computer } from '../../src'; import { OSType } from '../../src/types'; describe('Computer Cloud', () => { it('Should create computer instance', () => { const cloud = new Computer({ apiKey: 'asdf', name: 's-linux-1234', osType: OSType.LINUX, }); expect(cloud).toBeInstanceOf(Computer); }); }); ``` -------------------------------------------------------------------------------- /libs/python/computer/computer/providers/docker/__init__.py: -------------------------------------------------------------------------------- ```python """Docker provider for running containers with computer-server.""" from .provider import DockerProvider # Check if Docker is available try: import subprocess subprocess.run(["docker", "--version"], capture_output=True, check=True) HAS_DOCKER = True except (subprocess.SubprocessError, FileNotFoundError): HAS_DOCKER = False __all__ = ["DockerProvider", "HAS_DOCKER"] ``` -------------------------------------------------------------------------------- /libs/typescript/computer/src/interface/linux.ts: -------------------------------------------------------------------------------- ```typescript /** * Linux computer interface implementation. */ import { MacOSComputerInterface } from './macos'; /** * Linux interface implementation. * Since the cloud provider uses the same WebSocket protocol for all OS types, * we can reuse the macOS implementation. */ export class LinuxComputerInterface extends MacOSComputerInterface { // Linux uses the same WebSocket interface as macOS for cloud provider } ``` -------------------------------------------------------------------------------- /libs/python/som/som/__init__.py: -------------------------------------------------------------------------------- ```python """SOM - Computer Vision and OCR library for detecting and analyzing UI elements.""" __version__ = "0.1.0" from .detect import OmniParser from .models import ( BoundingBox, UIElement, IconElement, TextElement, ParserMetadata, ParseResult ) __all__ = [ "OmniParser", "BoundingBox", "UIElement", "IconElement", "TextElement", "ParserMetadata", "ParseResult" ] ``` -------------------------------------------------------------------------------- /libs/python/som/tests/test_omniparser.py: -------------------------------------------------------------------------------- ```python # """Basic tests for the omniparser package.""" # import pytest # from omniparser import IconDetector # def test_icon_detector_import(): # """Test that we can import the IconDetector class.""" # assert IconDetector is not None # def test_icon_detector_init(): # """Test that we can create an IconDetector instance.""" # detector = IconDetector(force_cpu=True) # assert detector is not None ``` -------------------------------------------------------------------------------- /libs/python/computer/computer/interface/linux.py: -------------------------------------------------------------------------------- ```python from typing import Optional from .generic import GenericComputerInterface class LinuxComputerInterface(GenericComputerInterface): """Interface for Linux.""" def __init__(self, ip_address: str, username: str = "lume", password: str = "lume", api_key: Optional[str] = None, vm_name: Optional[str] = None): super().__init__(ip_address, username, password, api_key, vm_name, "computer.interface.linux") ``` -------------------------------------------------------------------------------- /libs/lume/src/Utils/ProcessRunner.swift: -------------------------------------------------------------------------------- ```swift import Foundation /// Protocol for process execution protocol ProcessRunner { func run(executable: String, arguments: [String]) throws } class DefaultProcessRunner: ProcessRunner { func run(executable: String, arguments: [String]) throws { let process = Process() process.executableURL = URL(fileURLWithPath: executable) process.arguments = arguments try process.run() } } ``` -------------------------------------------------------------------------------- /libs/typescript/computer/src/interface/windows.ts: -------------------------------------------------------------------------------- ```typescript /** * Windows computer interface implementation. */ import { MacOSComputerInterface } from './macos'; /** * Windows interface implementation. * Since the cloud provider uses the same WebSocket protocol for all OS types, * we can reuse the macOS implementation. */ export class WindowsComputerInterface extends MacOSComputerInterface { // Windows uses the same WebSocket interface as macOS for cloud provider } ``` -------------------------------------------------------------------------------- /libs/python/computer/computer/interface/windows.py: -------------------------------------------------------------------------------- ```python from typing import Optional from .generic import GenericComputerInterface class WindowsComputerInterface(GenericComputerInterface): """Interface for Windows.""" def __init__(self, ip_address: str, username: str = "lume", password: str = "lume", api_key: Optional[str] = None, vm_name: Optional[str] = None): super().__init__(ip_address, username, password, api_key, vm_name, "computer.interface.windows") ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/lume/index.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Lume description: Reference for the current version of the Lume CLI. github: - https://github.com/trycua/cua/tree/main/libs/lume --- Lume is a lightweight Command Line Interface and local API server for creating, running and managing **macOS and Linux virtual machines** with near-native performance on Apple Silicon, using Apple's [Virtualization.Framework](https://developer.apple.com/documentation/virtualization). ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/meta.json: -------------------------------------------------------------------------------- ```json { "title": "Agent SDK", "description": "Build computer-using agents with the Agent SDK", "pages": [ "agent-loops", "supported-agents", "supported-model-providers", "chat-history", "message-format", "customizing-computeragent", "callbacks", "custom-tools", "custom-computer-handlers", "prompt-caching", "usage-tracking", "benchmarks", "migration-guide", "integrations" ] } ``` -------------------------------------------------------------------------------- /libs/lume/src/Commands/Prune.swift: -------------------------------------------------------------------------------- ```swift import ArgumentParser import Foundation struct Prune: AsyncParsableCommand { static let configuration: CommandConfiguration = CommandConfiguration( commandName: "prune", abstract: "Remove cached images" ) init() { } @MainActor func run() async throws { let manager = LumeController() try await manager.pruneImages() print("Successfully removed cached images") } } ``` -------------------------------------------------------------------------------- /pyrightconfig.json: -------------------------------------------------------------------------------- ```json { "include": [ "**/*.py" ], "exclude": [ "**/node_modules/**", "**/__pycache__/**", "**/.*/**", "**/venv/**", "**/.venv/**", "**/dist/**", "**/build/**", ".pdm-build/**", "**/.git/**", "examples/**", "notebooks/**", "logs/**", "screenshots/**" ], "typeCheckingMode": "basic", "useLibraryCodeForTypes": true, "reportMissingImports": false, "reportMissingModuleSource": false } ``` -------------------------------------------------------------------------------- /libs/typescript/core/tsconfig.json: -------------------------------------------------------------------------------- ```json { "compilerOptions": { "target": "esnext", "lib": ["es2023"], "moduleDetection": "force", "module": "preserve", "moduleResolution": "bundler", "resolveJsonModule": true, "types": ["node"], "strict": true, "noUnusedLocals": true, "declaration": true, "emitDeclarationOnly": true, "esModuleInterop": true, "isolatedModules": true, "verbatimModuleSyntax": true, "skipLibCheck": true }, "include": ["src"] } ``` -------------------------------------------------------------------------------- /libs/lumier/src/config/constants.sh: -------------------------------------------------------------------------------- ```bash #!/usr/bin/env bash # Port configuration TUNNEL_PORT=8080 VNC_PORT=8006 # Host configuration TUNNEL_HOST="host.docker.internal" # Default VM configuration DEFAULT_RAM_SIZE="8192" DEFAULT_CPU_CORES="4" DEFAULT_DISK_SIZE="100" DEFAULT_VM_NAME="lumier" DEFAULT_VM_VERSION="ghcr.io/trycua/macos-sequoia-vanilla:latest" # Paths NOVNC_PATH="/opt/noVNC" LIFECYCLE_HOOKS_DIR="/run/hooks" # VM connection details HOST_USER="lume" HOST_PASSWORD="lume" SSH_RETRY_ATTEMPTS=20 SSH_RETRY_INTERVAL=5 ``` -------------------------------------------------------------------------------- /.devcontainer/post-install.sh: -------------------------------------------------------------------------------- ```bash #!/usr/bin/env bash WORKSPACE="/workspaces/cua" # Setup .env.local echo "PYTHON_BIN=python" > /workspaces/cua/.env.local # Run /scripts/build.sh ./scripts/build.sh # --- # Build is complete. Show user a clear message to open the workspace manually. # --- cat << 'EOM' ============================================ 🚀 Build complete! 👉 Next steps: 1. Open '.vscode/py.code-workspace' 2. Press 'Open Workspace' Happy coding! ============================================ EOM ``` -------------------------------------------------------------------------------- /libs/lume/src/Utils/ProgressLogger.swift: -------------------------------------------------------------------------------- ```swift import Foundation struct ProgressLogger { private var lastLoggedProgress: Double = 0.0 private let threshold: Double init(threshold: Double = 0.05) { self.threshold = threshold } mutating func logProgress(current: Double, context: String) { if current - lastLoggedProgress >= threshold { lastLoggedProgress = current let percentage = Int(current * 100) Logger.info("\(context) Progress: \(percentage)%") } } } ``` -------------------------------------------------------------------------------- /libs/lume/src/Utils/CommandRegistry.swift: -------------------------------------------------------------------------------- ```swift import ArgumentParser enum CommandRegistry { static var allCommands: [ParsableCommand.Type] { [ Create.self, Pull.self, Push.self, Images.self, Clone.self, Get.self, Set.self, List.self, Run.self, Stop.self, IPSW.self, Serve.self, Delete.self, Prune.self, Config.self, Logs.self, ] } } ``` -------------------------------------------------------------------------------- /libs/lume/src/Commands/IPSW.swift: -------------------------------------------------------------------------------- ```swift import ArgumentParser import Foundation struct IPSW: AsyncParsableCommand { static let configuration = CommandConfiguration( abstract: "Get macOS restore image IPSW URL", discussion: "Download IPSW file manually, then use in create command with --ipsw" ) init() { } @MainActor func run() async throws { let vmController = LumeController() let url = try await vmController.getLatestIPSWURL() print(url.absoluteString) } } ``` -------------------------------------------------------------------------------- /libs/lume/src/Commands/Images.swift: -------------------------------------------------------------------------------- ```swift import ArgumentParser import Foundation struct Images: AsyncParsableCommand { static let configuration = CommandConfiguration( abstract: "List available macOS images from local cache" ) @Option(help: "Organization to list from. Defaults to trycua") var organization: String = "trycua" init() {} @MainActor func run() async throws { let vmController = LumeController() _ = try await vmController.getImages(organization: organization) } } ``` -------------------------------------------------------------------------------- /examples/computer-example-ts/package.json: -------------------------------------------------------------------------------- ```json { "name": "computer-example-ts", "version": "1.0.0", "description": "", "type": "module", "main": "index.js", "scripts": { "dev": "tsx watch src/index.ts", "start": "tsx src/index.ts" }, "keywords": [], "author": "", "license": "MIT", "packageManager": "[email protected]", "dependencies": { "@trycua/computer": "^0.1.3", "dotenv": "^16.5.0", "openai": "^5.7.0" }, "devDependencies": { "@types/node": "^22.15.33", "tsx": "^4.20.3", "typescript": "^5.8.3" } } ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/loops/__init__.py: -------------------------------------------------------------------------------- ```python """ Agent loops for agent """ # Import the loops to register them from . import anthropic from . import openai from . import uitars from . import omniparser from . import gta1 from . import composed_grounded from . import glm45v from . import opencua from . import internvl from . import holo from . import moondream3 __all__ = [ "anthropic", "openai", "uitars", "omniparser", "gta1", "composed_grounded", "glm45v", "opencua", "internvl", "holo", "moondream3", ] ``` -------------------------------------------------------------------------------- /docs/src/mdx-components.tsx: -------------------------------------------------------------------------------- ```typescript import defaultMdxComponents from 'fumadocs-ui/mdx'; import * as TabsComponents from 'fumadocs-ui/components/tabs'; import type { MDXComponents } from 'mdx/types'; import { Mermaid } from './components/mermaid'; import IOU from './components/iou'; // use this function to get MDX components, you will need it for rendering MDX export function getMDXComponents(components?: MDXComponents): MDXComponents { return { ...defaultMdxComponents, Mermaid, IOU, ...TabsComponents, ...components, }; } ``` -------------------------------------------------------------------------------- /libs/xfce/src/scripts/resize-display.sh: -------------------------------------------------------------------------------- ```bash #!/bin/bash # Dynamic display resolution script # Can be called to change the VNC display resolution RESOLUTION=${1:-1920x1080} # Wait for display to be ready for i in {1..10}; do if DISPLAY=:1 xdpyinfo >/dev/null 2>&1; then break fi sleep 1 done # Change resolution using xrandr DISPLAY=:1 xrandr --output VNC-0 --mode "$RESOLUTION" 2>/dev/null || \ DISPLAY=:1 xrandr --fb "$RESOLUTION" 2>/dev/null || \ echo "Failed to set resolution to $RESOLUTION" echo "Display resolution set to: $RESOLUTION" ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/mcp-server/configuration.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Configuration --- The server is configured using environment variables (can be set in the Claude Desktop config): | Variable | Description | Default | |----------|-------------|---------| | `CUA_MODEL_NAME` | Model string (e.g., "anthropic/claude-3-5-sonnet-20241022", "openai/computer-use-preview", "huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B", "omniparser+litellm/gpt-4o", "omniparser+ollama_chat/gemma3") | anthropic/claude-3-5-sonnet-20241022 | | `CUA_MAX_IMAGES` | Maximum number of images to keep in context | 3 | ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/__main__.py: -------------------------------------------------------------------------------- ```python """ Entry point for running agent CLI module. Usage: python -m agent.cli <model_string> """ import sys import asyncio from .cli import main if __name__ == "__main__": # Check if 'cli' is specified as the module if len(sys.argv) > 1 and sys.argv[1] == "cli": # Remove 'cli' from arguments and run CLI sys.argv.pop(1) asyncio.run(main()) else: print("Usage: python -m agent.cli <model_string>") print("Example: python -m agent.cli openai/computer-use-preview") sys.exit(1) ``` -------------------------------------------------------------------------------- /libs/typescript/computer/tsconfig.json: -------------------------------------------------------------------------------- ```json { "compilerOptions": { "target": "esnext", "lib": ["es2023"], "moduleDetection": "force", "module": "preserve", "moduleResolution": "bundler", "allowImportingTsExtensions": true, "resolveJsonModule": true, "types": ["node"], "allowSyntheticDefaultImports": true, "strict": true, "noUnusedLocals": true, "declaration": true, "emitDeclarationOnly": true, "esModuleInterop": true, "isolatedModules": true, "verbatimModuleSyntax": true, "skipLibCheck": true }, "include": ["src"] } ``` -------------------------------------------------------------------------------- /docs/src/app/llms.mdx/[[...slug]]/route.ts: -------------------------------------------------------------------------------- ```typescript import { type NextRequest, NextResponse } from 'next/server'; import { getLLMText } from '@/lib/llms'; import { source } from '@/lib/source'; import { notFound } from 'next/navigation'; export const revalidate = false; export async function GET( _req: NextRequest, { params }: { params: Promise<{ slug?: string[] }> } ) { const { slug } = await params; const page = source.getPage(slug); if (!page) notFound(); return new NextResponse(await getLLMText(page)); } export function generateStaticParams() { return source.generateParams(); } ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/benchmarks/screenspot-v2.mdx: -------------------------------------------------------------------------------- ```markdown --- title: ScreenSpot-v2 description: Standard resolution GUI grounding benchmark --- ScreenSpot-v2 is a benchmark for evaluating click prediction accuracy on standard resolution GUI screenshots. ## Usage ```bash # Run the benchmark cd libs/python/agent/benchmarks python ss-v2.py # Run with custom sample limit python ss-v2.py --samples 100 ``` ## Results | Model | Accuracy | Failure Rate | Samples | |-------|----------|--------------|---------| | Coming Soon | - | - | - | Results will be populated after running benchmarks with various models. ``` -------------------------------------------------------------------------------- /libs/xfce/src/scripts/start-vnc.sh: -------------------------------------------------------------------------------- ```bash #!/bin/bash set -e # Clean up any existing VNC lock files rm -rf /tmp/.X1-lock /tmp/.X11-unix/X1 # Start VNC server without password authentication vncserver :1 \ -geometry ${VNC_RESOLUTION:-1920x1080} \ -depth ${VNC_COL_DEPTH:-24} \ -rfbport ${VNC_PORT:-5901} \ -localhost no \ -SecurityTypes None \ -AlwaysShared \ -AcceptPointerEvents \ -AcceptKeyEvents \ -AcceptCutText \ -SendCutText \ -xstartup /usr/local/bin/xstartup.sh \ --I-KNOW-THIS-IS-INSECURE # Keep the process running tail -f /home/cua/.vnc/*.log ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/benchmarks/screenspot-pro.mdx: -------------------------------------------------------------------------------- ```markdown --- title: ScreenSpot-Pro description: High-resolution GUI grounding benchmark --- ScreenSpot-Pro is a benchmark for evaluating click prediction accuracy on high-resolution GUI screenshots with complex layouts. ## Usage ```bash # Run the benchmark cd libs/python/agent/benchmarks python ss-pro.py # Run with custom sample limit python ss-pro.py --samples 50 ``` ## Results | Model | Accuracy | Failure Rate | Samples | |-------|----------|--------------|---------| | Coming Soon | - | - | - | Results will be populated after running benchmarks with various models. ``` -------------------------------------------------------------------------------- /libs/python/mcp-server/mcp_server/__init__.py: -------------------------------------------------------------------------------- ```python """MCP Server for Computer-Use Agent (CUA).""" import sys import os # Add detailed debugging at import time with open("/tmp/mcp_server_debug.log", "w") as f: f.write(f"Python executable: {sys.executable}\n") f.write(f"Python version: {sys.version}\n") f.write(f"Working directory: {os.getcwd()}\n") f.write(f"Python path:\n{chr(10).join(sys.path)}\n") f.write(f"Environment variables:\n") for key, value in os.environ.items(): f.write(f"{key}={value}\n") from .server import server, main __version__ = "0.1.0" __all__ = ["server", "main"] ``` -------------------------------------------------------------------------------- /libs/python/mcp-server/scripts/start_mcp_server.sh: -------------------------------------------------------------------------------- ```bash #!/bin/bash set -e # Set the CUA repository path based on script location SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" CUA_REPO_DIR="$( cd "$SCRIPT_DIR/../../.." &> /dev/null && pwd )" PYTHON_PATH="${CUA_REPO_DIR}/.venv/bin/python" # Set Python path to include all necessary libraries export PYTHONPATH="${CUA_REPO_DIR}/libs/python/mcp-server:${CUA_REPO_DIR}/libs/python/agent:${CUA_REPO_DIR}/libs/python/computer:${CUA_REPO_DIR}/libs/python/core:${CUA_REPO_DIR}/libs/python/pylume" # Run the MCP server directly as a module $PYTHON_PATH -m mcp_server.server ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/mcp-server/usage.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Usage --- ## Usage Once configured, you can simply ask Claude to perform computer tasks: - "Open Chrome and go to github.com" - "Create a folder called 'Projects' on my desktop" - "Find all PDFs in my Downloads folder" - "Take a screenshot and highlight the error message" Claude will automatically use your CUA agent to perform these tasks. ### First-time Usage Notes **API Keys**: Ensure you have valid API keys: - Add your Anthropic API key, or other model provider API key in the Claude Desktop config (as shown above) - Or set it as an environment variable in your shell profile ``` -------------------------------------------------------------------------------- /examples/computer-example-ts/tsconfig.json: -------------------------------------------------------------------------------- ```json { "compilerOptions": { "target": "esnext", "lib": [ "es2023" ], "moduleDetection": "force", "module": "preserve", "moduleResolution": "bundler", "allowImportingTsExtensions": true, "resolveJsonModule": true, "types": [ "node" ], "allowSyntheticDefaultImports": true, "strict": true, "noUnusedLocals": true, "declaration": true, "emitDeclarationOnly": true, "esModuleInterop": true, "isolatedModules": true, "verbatimModuleSyntax": true, "skipLibCheck": true, "outDir": "build", }, "include": [ "src" ] } ``` -------------------------------------------------------------------------------- /libs/lume/src/Commands/Stop.swift: -------------------------------------------------------------------------------- ```swift import ArgumentParser import Foundation struct Stop: AsyncParsableCommand { static let configuration = CommandConfiguration( abstract: "Stop a virtual machine" ) @Argument(help: "Name of the virtual machine", completion: .custom(completeVMName)) var name: String @Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location") var storage: String? init() { } @MainActor func run() async throws { let vmController = LumeController() try await vmController.stopVM(name: name, storage: storage) } } ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/computer-server/index.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Computer Server descrption: Reference for the current version of the Computer Server library. pypi: cua-computer-server github: - https://github.com/trycua/cua/tree/main/libs/python/computer-server --- <Callout>A corresponding <a href="https://github.com/trycua/cua/blob/main/notebooks/computer_server_nb.ipynb" target="_blank">Jupyter Notebook</a> is available for this documentation.</Callout> The Computer Server API reference documentation is currently under development. ## Overview The Computer Server provides WebSocket and REST API endpoints for remote computer control and automation. ``` -------------------------------------------------------------------------------- /libs/typescript/agent/src/index.ts: -------------------------------------------------------------------------------- ```typescript // Export the main AgentClient class as default export { AgentClient as default } from './client.js'; // Also export as named export for flexibility export { AgentClient } from './client.js'; // Export types for TypeScript users export type { AgentRequest, AgentResponse, AgentMessage, UserMessage, AssistantMessage, ReasoningMessage, ComputerCallMessage, ComputerCallOutputMessage, OutputContent, SummaryContent, InputContent, ComputerAction, ClickAction, TypeAction, KeyPressAction, ScrollAction, WaitAction, Usage, ConnectionType, AgentClientOptions, } from './types'; ``` -------------------------------------------------------------------------------- /libs/typescript/computer/src/computer/types.ts: -------------------------------------------------------------------------------- ```typescript import type { OSType, ScreenSize } from '../types'; /** * Display configuration for the computer. */ export interface Display extends ScreenSize { scale_factor?: number; } /** * Computer configuration model. */ export interface BaseComputerConfig { /** * The VM name * @default "" */ name: string; /** * The operating system type ('macos', 'windows', or 'linux') * @default "macos" */ osType: OSType; } export interface CloudComputerConfig extends BaseComputerConfig { /** * Optional API key for cloud providers */ apiKey: string; } export enum VMProviderType { CLOUD = 'cloud', } ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/benchmarks/interactive.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Interactive Tool description: Real-time testing and visualization tool for GUI grounding models --- This tool allows you to test multiple models interactively by providing natural language instructions. It automatically captures screenshots and tests all configured models sequentially, providing immediate feedback and visual results. ## Usage ```bash # Start the interactive tool cd libs/python/agent/benchmarks python interactive.py ``` ## Commands - **Type instruction**: Screenshot + test all models - **`screenshot`**: Take screenshot without prediction - **`models`**: List available models - **`quit`/`exit`**: Exit the tool ``` -------------------------------------------------------------------------------- /examples/agent_ui_examples.py: -------------------------------------------------------------------------------- ```python #!/usr/bin/env python3 """ Simple example script for the Computer-Use Agent Gradio UI. This script launches the advanced Gradio UI for the Computer-Use Agent with full model selection and configuration options. It can be run directly from the command line. """ from utils import load_dotenv_files load_dotenv_files() # Import the create_gradio_ui function from agent.ui.gradio.ui_components import create_gradio_ui if __name__ == "__main__": print("Launching Computer-Use Agent Gradio UI with advanced features...") app = create_gradio_ui() app.launch( share=False, server_name="0.0.0.0", server_port=7860, ) ``` -------------------------------------------------------------------------------- /docs/src/app/layout.tsx: -------------------------------------------------------------------------------- ```typescript import './global.css'; import { RootProvider } from 'fumadocs-ui/provider'; import { Inter } from 'next/font/google'; import type { ReactNode } from 'react'; const inter = Inter({ subsets: ['latin'], }); export default function Layout({ children }: { children: ReactNode }) { return ( <html lang="en" className={inter.className} suppressHydrationWarning> <head> <link rel="icon" href="/docs/favicon.ico" sizes="any" /> </head> <body className="flex min-h-screen flex-col"> <RootProvider search={{ options: { api: '/docs/api/search' } }}> {children} </RootProvider> </body> </html> ); } ``` -------------------------------------------------------------------------------- /docs/src/lib/llms.ts: -------------------------------------------------------------------------------- ```typescript import { remark } from 'remark'; import remarkGfm from 'remark-gfm'; import remarkMdx from 'remark-mdx'; import { remarkInclude } from 'fumadocs-mdx/config'; import { source } from '@/lib/source'; import type { InferPageType } from 'fumadocs-core/source'; const processor = remark() .use(remarkMdx) // needed for Fumadocs MDX .use(remarkInclude) .use(remarkGfm); export async function getLLMText(page: InferPageType<typeof source>) { const processed = await processor.process({ path: page.data._file.absolutePath, value: page.data.content, }); return `# ${page.data.title} URL: ${page.url} ${page.data.description} ${processed.value}`; } ``` -------------------------------------------------------------------------------- /libs/python/computer/computer/interface/macos.py: -------------------------------------------------------------------------------- ```python from .generic import GenericComputerInterface from typing import Optional class MacOSComputerInterface(GenericComputerInterface): """Interface for macOS.""" def __init__(self, ip_address: str, username: str = "lume", password: str = "lume", api_key: Optional[str] = None, vm_name: Optional[str] = None): super().__init__(ip_address, username, password, api_key, vm_name, "computer.interface.macos") async def diorama_cmd(self, action: str, arguments: Optional[dict] = None) -> dict: """Send a diorama command to the server (macOS only).""" return await self._send_command("diorama_cmd", {"action": action, "arguments": arguments or {}}) ``` -------------------------------------------------------------------------------- /libs/typescript/agent/tsconfig.json: -------------------------------------------------------------------------------- ```json { "compilerOptions": { "target": "esnext", "lib": [ "es2023" ], "moduleDetection": "force", "module": "preserve", "moduleResolution": "bundler", "allowImportingTsExtensions": true, "resolveJsonModule": true, "types": [ "node" ], "allowSyntheticDefaultImports": true, "strict": true, "noUnusedLocals": true, "declaration": true, "emitDeclarationOnly": true, "esModuleInterop": true, "isolatedModules": true, "verbatimModuleSyntax": true, "skipLibCheck": true }, "include": [ "src" ] } ``` -------------------------------------------------------------------------------- /libs/lume/src/Virtualization/ImageLoaderFactory.swift: -------------------------------------------------------------------------------- ```swift import Foundation /// Protocol defining a factory for creating image loaders based on the image type protocol ImageLoaderFactory { /// Creates an appropriate ImageLoader based on the image path or type func createImageLoader() -> ImageLoader } /// Default implementation of ImageLoaderFactory that creates appropriate loaders based on image type final class DefaultImageLoaderFactory: ImageLoaderFactory { func createImageLoader() -> ImageLoader { // For now, we only support Darwin images // In the future, this can be extended to support other OS types // by analyzing the image path or having explicit OS type parameter return DarwinImageLoader() } } ``` -------------------------------------------------------------------------------- /.github/workflows/test-validation-script.yml: -------------------------------------------------------------------------------- ```yaml name: Test valididation script on: pull_request: paths: - '.github/scripts/**' - '.github/workflows/test-scripts.yml' push: branches: - main paths: - '.github/scripts/**' - '.github/workflows/test-scripts.yml' jobs: test: runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 with: python-version: '3.11' - name: Install dependencies run: | python -m pip install --upgrade pip pip install pytest toml - name: Run tests run: | cd .github/scripts pytest tests/ -v ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/agent/index.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Agent description: Reference for the current version of the Agent library. pypi: cua-agent github: - https://github.com/trycua/cua/tree/main/libs/python/agent --- The Agent library provides the ComputerAgent class and tools for building AI agents that automate workflows on Cua Computers. ## Agent Loops See the [Agent Loops](../agent-sdk/agent-loops) documentation for how agents process information and take actions. ## Chat History See the [Chat History](../agent-sdk/chat-history) documentation for managing conversational context and turn-by-turn interactions. ## Callbacks See the [Callbacks](../agent-sdk/callbacks) documentation for extending and customizing agent behavior with custom hooks. ``` -------------------------------------------------------------------------------- /docs/source.config.ts: -------------------------------------------------------------------------------- ```typescript import { defineConfig, defineDocs, frontmatterSchema, metaSchema, } from 'fumadocs-mdx/config'; import { z } from 'zod'; // You can customise Zod schemas for frontmatter and `meta.json` here // see https://fumadocs.vercel.app/docs/mdx/collections#define-docs export const docs = defineDocs({ docs: { schema: frontmatterSchema.extend({ pypi: z.string().optional(), npm: z.string().optional(), github: z.array(z.string()).optional(), macos: z.boolean().default(false), windows: z.boolean().default(false), linux: z.boolean().default(false), }), }, meta: { schema: metaSchema, }, }); export default defineConfig({ mdxOptions: { // MDX options }, }); ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/lumier/installation.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Installation --- Before using Lumier, make sure you have: 1. **Docker for Apple Silicon** - download it [here](https://desktop.docker.com/mac/main/arm64/Docker.dmg) and follow the installation instructions. 2. **Lume** - This is the virtualization CLI that powers Lumier. Install it with this command: ```bash /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" ``` After installation, Lume runs as a background service and listens on port 7777. This service allows Lumier to create and manage virtual machines. If port 7777 is already in use on your system, you can specify a different port with the `--port` option when running the `install.sh` script. ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/callbacks/__init__.py: -------------------------------------------------------------------------------- ```python """ Callback system for ComputerAgent preprocessing and postprocessing hooks. """ from .base import AsyncCallbackHandler from .image_retention import ImageRetentionCallback from .logging import LoggingCallback from .trajectory_saver import TrajectorySaverCallback from .budget_manager import BudgetManagerCallback from .telemetry import TelemetryCallback from .operator_validator import OperatorNormalizerCallback from .prompt_instructions import PromptInstructionsCallback __all__ = [ "AsyncCallbackHandler", "ImageRetentionCallback", "LoggingCallback", "TrajectorySaverCallback", "BudgetManagerCallback", "TelemetryCallback", "OperatorNormalizerCallback", "PromptInstructionsCallback", ] ``` -------------------------------------------------------------------------------- /.github/workflows/docker-publish-xfce.yml: -------------------------------------------------------------------------------- ```yaml name: Build and Publish CUA XFCE Container on: push: branches: - main tags: - "docker-xfce-v*.*.*" paths: - "libs/xfce/**" - ".github/workflows/docker-publish-xfce.yml" - ".github/workflows/docker-reusable-publish.yml" pull_request: paths: - "libs/xfce/**" - ".github/workflows/docker-publish-xfce.yml" - ".github/workflows/docker-reusable-publish.yml" jobs: publish: uses: ./.github/workflows/docker-reusable-publish.yml with: image_name: cua-xfce context_dir: libs/xfce dockerfile_path: Dockerfile tag_prefix: docker-xfce-v docker_hub_org: trycua secrets: DOCKER_HUB_TOKEN: ${{ secrets.DOCKER_HUB_TOKEN }} ``` -------------------------------------------------------------------------------- /.github/workflows/docker-publish-kasm.yml: -------------------------------------------------------------------------------- ```yaml name: Build and Publish CUA Ubuntu Container on: push: branches: - main tags: - "docker-kasm-v*.*.*" paths: - "libs/kasm/**" - ".github/workflows/docker-publish-kasm.yml" - ".github/workflows/docker-reusable-publish.yml" pull_request: paths: - "libs/kasm/**" - ".github/workflows/docker-publish-kasm.yml" - ".github/workflows/docker-reusable-publish.yml" jobs: publish: uses: ./.github/workflows/docker-reusable-publish.yml with: image_name: cua-ubuntu context_dir: libs/kasm dockerfile_path: Dockerfile tag_prefix: docker-kasm-v docker_hub_org: trycua secrets: DOCKER_HUB_TOKEN: ${{ secrets.DOCKER_HUB_TOKEN }} ``` -------------------------------------------------------------------------------- /libs/python/mcp-server/pyproject.toml: -------------------------------------------------------------------------------- ```toml [build-system] requires = ["pdm-backend"] build-backend = "pdm.backend" [project] name = "cua-mcp-server" description = "MCP Server for Computer-Use Agent (CUA)" readme = "README.md" requires-python = ">=3.11" version = "0.1.0" authors = [ {name = "TryCua", email = "[email protected]"} ] dependencies = [ "mcp>=1.6.0,<2.0.0", "cua-agent[all]>=0.4.0,<0.5.0", "cua-computer>=0.4.0,<0.5.0", ] [project.scripts] cua-mcp-server = "mcp_server.server:main" [tool.pdm] distribution = true [tool.pdm.dev-dependencies] dev = [ "black>=23.9.1", "ruff>=0.0.292", ] [tool.black] line-length = 100 target-version = ["py311"] [tool.ruff] line-length = 100 target-version = "py311" select = ["E", "F", "B", "I"] fix = true ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/supported-model-providers/index.mdx: -------------------------------------------------------------------------------- ```markdown --- title: Supported Model Providers --- ## Supported Models ### Anthropic Claude (Computer Use API) ```python model="anthropic/claude-3-5-sonnet-20241022" model="anthropic/claude-3-7-sonnet-20250219" model="anthropic/claude-opus-4-20250514" model="anthropic/claude-sonnet-4-20250514" ``` ### OpenAI Computer Use Preview ```python model="openai/computer-use-preview" ``` ### UI-TARS (Local or Huggingface Inference) ```python model="huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B" model="ollama_chat/0000/ui-tars-1.5-7b" ``` ### Omniparser + Any LLM ```python model="omniparser+ollama_chat/mistral-small3.2" model="omniparser+vertex_ai/gemini-pro" model="omniparser+anthropic/claude-3-5-sonnet-20241022" model="omniparser+openai/gpt-4o" ``` ``` -------------------------------------------------------------------------------- /libs/lume/src/Utils/NetworkUtils.swift: -------------------------------------------------------------------------------- ```swift import Foundation enum NetworkUtils { /// Checks if an IP address is reachable by sending a ping /// - Parameter ipAddress: The IP address to check /// - Returns: true if the IP is reachable, false otherwise static func isReachable(ipAddress: String) -> Bool { let process = Process() process.executableURL = URL(fileURLWithPath: "/sbin/ping") process.arguments = ["-c", "1", "-t", "1", ipAddress] let pipe = Pipe() process.standardOutput = pipe process.standardError = pipe do { try process.run() process.waitUntilExit() return process.terminationStatus == 0 } catch { return false } } } ``` -------------------------------------------------------------------------------- /.github/workflows/ci-lume.yml: -------------------------------------------------------------------------------- ```yaml name: lume on: push: branches: - "main" pull_request: {} concurrency: group: lume-${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true # Runner images: https://github.com/actions/runner-images jobs: test: name: Test runs-on: macos-15 steps: - uses: actions/checkout@v4 - run: uname -a - run: sudo xcode-select -s /Applications/Xcode_16.app # Swift 6.0 - run: swift test working-directory: ./libs/lume build: name: Release build runs-on: macos-15 steps: - uses: actions/checkout@v4 - run: uname -a - run: sudo xcode-select -s /Applications/Xcode_16.app # Swift 6.0 - run: swift build --configuration release working-directory: ./libs/lume ``` -------------------------------------------------------------------------------- /libs/xfce/src/supervisor/supervisord.conf: -------------------------------------------------------------------------------- ``` [supervisord] nodaemon=true user=root logfile=/var/log/supervisor/supervisord.log pidfile=/var/run/supervisord.pid childlogdir=/var/log/supervisor [program:vncserver] command=/usr/local/bin/start-vnc.sh user=cua autorestart=true stdout_logfile=/var/log/supervisor/vncserver.log stderr_logfile=/var/log/supervisor/vncserver.error.log priority=10 [program:novnc] command=/usr/local/bin/start-novnc.sh user=cua autorestart=true stdout_logfile=/var/log/supervisor/novnc.log stderr_logfile=/var/log/supervisor/novnc.error.log priority=20 [program:computer-server] command=/usr/local/bin/start-computer-server.sh user=cua autorestart=true stdout_logfile=/var/log/supervisor/computer-server.log stderr_logfile=/var/log/supervisor/computer-server.error.log priority=30 ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/human_tool/__init__.py: -------------------------------------------------------------------------------- ```python """ Human-in-the-Loop Completion Tool This package provides a human-in-the-loop completion system that allows AI agents to request human assistance for complex decisions or responses. Components: - server.py: FastAPI server with completion queue management - ui.py: Gradio UI for human interaction - __main__.py: Combined server and UI application Usage: # Run the server and UI python -m agent.human_tool # Or run components separately python -m agent.human_tool.server # API server only python -m agent.human_tool.ui # UI only """ from .server import CompletionQueue, completion_queue from .ui import HumanCompletionUI, create_ui __all__ = [ "CompletionQueue", "completion_queue", "HumanCompletionUI", "create_ui" ] ``` -------------------------------------------------------------------------------- /docs/tsconfig.json: -------------------------------------------------------------------------------- ```json { "compilerOptions": { "baseUrl": ".", "target": "ESNext", "lib": [ "dom", "dom.iterable", "esnext" ], "allowJs": true, "skipLibCheck": true, "strict": true, "forceConsistentCasingInFileNames": true, "noEmit": true, "esModuleInterop": true, "module": "esnext", "moduleResolution": "bundler", "resolveJsonModule": true, "isolatedModules": true, "jsx": "preserve", "incremental": true, "paths": { "@/.source": [ "./.source/index.ts" ], "@/*": [ "./src/*" ] }, "plugins": [ { "name": "next" } ] }, "include": [ "next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts" ], "exclude": [ "node_modules" ] } ``` -------------------------------------------------------------------------------- /libs/lume/src/VM/VMDisplayResolution.swift: -------------------------------------------------------------------------------- ```swift import Foundation import ArgumentParser struct VMDisplayResolution: Codable, ExpressibleByArgument { let width: Int let height: Int init?(string: String) { let components = string.components(separatedBy: "x") guard components.count == 2, let width = Int(components[0]), let height = Int(components[1]), width > 0, height > 0 else { return nil } self.width = width self.height = height } var string: String { "\(width)x\(height)" } init?(argument: String) { guard let resolution = VMDisplayResolution(string: argument) else { return nil } self = resolution } } ``` -------------------------------------------------------------------------------- /libs/typescript/package.json: -------------------------------------------------------------------------------- ```json { "name": "cua-ts", "version": "1.0.0", "description": "The cua typescript libs.", "keywords": [], "author": "cua", "license": "MIT", "scripts": { "lint": "biome check", "lint:fix": "biome check --fix", "build:core": "pnpm --filter @trycua/core build", "build:computer": "pnpm --filter @trycua/computer build", "build": "pnpm build:core && pnpm build:computer", "test:core": "pnpm --filter @trycua/core test", "test:computer": "pnpm --filter @trycua/computer test", "test": "pnpm -r test", "typecheck": "pnpm -r typecheck" }, "packageManager": "[email protected]", "devDependencies": { "@biomejs/biome": "^1.9.4" }, "pnpm": { "onlyBuiltDependencies": [ "@biomejs/biome", "esbuild", "protobufjs", "sharp", "unrs-resolver" ] } } ``` -------------------------------------------------------------------------------- /libs/lume/src/Commands/Get.swift: -------------------------------------------------------------------------------- ```swift import ArgumentParser import Foundation struct Get: AsyncParsableCommand { static let configuration = CommandConfiguration( abstract: "Get detailed information about a virtual machine" ) @Argument(help: "Name of the virtual machine", completion: .custom(completeVMName)) var name: String @Option(name: [.long, .customShort("f")], help: "Output format (json|text)") var format: FormatOption = .text @Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location") var storage: String? init() { } @MainActor func run() async throws { let vmController = LumeController() let vm = try vmController.get(name: name, storage: storage) try VMDetailsPrinter.printStatus([vm.details], format: self.format) } } ``` -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- ```yaml # These are supported funding model platforms github: trycua patreon: # Replace with a single Patreon username open_collective: # Replace with a single Open Collective username ko_fi: # Replace with a single Ko-fi username tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry liberapay: # Replace with a single Liberapay username issuehunt: # Replace with a single IssueHunt username lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry polar: # Replace with a single Polar username buy_me_a_coffee: # Replace with a single Buy Me a Coffee username thanks_dev: # Replace with a single thanks.dev username custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/mcp-server/llm-integrations.mdx: -------------------------------------------------------------------------------- ```markdown --- title: LLM Integrations --- ## LiteLLM Integration This MCP server features comprehensive liteLLM integration, allowing you to use any supported LLM provider with a simple model string configuration. - **Unified Configuration**: Use a single `CUA_MODEL_NAME` environment variable with a model string - **Automatic Provider Detection**: The agent automatically detects the provider and capabilities from the model string - **Extensive Provider Support**: Works with Anthropic, OpenAI, local models, and any liteLLM-compatible provider ### Model String Examples: - **Anthropic**: `"anthropic/claude-3-5-sonnet-20241022"` - **OpenAI**: `"openai/computer-use-preview"` - **UI-TARS**: `"huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B"` - **Omni + Any LiteLLM**: `"omniparser+litellm/gpt-4o"`, `"omniparser+litellm/claude-3-haiku"`, `"omniparser+ollama_chat/gemma3"` ``` -------------------------------------------------------------------------------- /examples/computer_ui_examples.py: -------------------------------------------------------------------------------- ```python #!/usr/bin/env python3 """ Simple example script for the Computer Interface Gradio UI. This script launches the advanced Gradio UI for the Computer Interface with full model selection and configuration options. It can be run directly from the command line. """ from utils import load_dotenv_files load_dotenv_files() # Import the create_gradio_ui function from computer.ui.gradio.app import create_gradio_ui if __name__ == "__main__": print("Launching Computer Interface Gradio UI with advanced features...") app = create_gradio_ui() app.launch( share=False, server_name="0.0.0.0", server_port=7860, ) # Optional: Using the saved dataset # import datasets # from computer.ui.utils import convert_to_unsloth # ds = datasets.load_dataset("ddupont/highquality-cua-demonstrations") # ds = convert_to_unsloth(ds) ``` -------------------------------------------------------------------------------- /libs/lume/src/Utils/Logger.swift: -------------------------------------------------------------------------------- ```swift import Foundation struct Logger { typealias Metadata = [String: String] enum Level: String { case info case error case debug } static func info(_ message: String, metadata: Metadata = [:]) { log(.info, message, metadata) } static func error(_ message: String, metadata: Metadata = [:]) { log(.error, message, metadata) } static func debug(_ message: String, metadata: Metadata = [:]) { log(.debug, message, metadata) } private static func log(_ level: Level, _ message: String, _ metadata: Metadata) { let timestamp = ISO8601DateFormatter().string(from: Date()) let metadataString = metadata.isEmpty ? "" : " " + metadata.map { "\($0.key)=\($0.value)" }.joined(separator: " ") print("[\(timestamp)] \(level.rawValue.uppercased()): \(message)\(metadataString)") } } ``` -------------------------------------------------------------------------------- /libs/typescript/computer/tests/interface/linux.test.ts: -------------------------------------------------------------------------------- ```typescript import { describe, expect, it } from 'vitest'; import { LinuxComputerInterface } from '../../src/interface/linux.ts'; import { MacOSComputerInterface } from '../../src/interface/macos.ts'; describe('LinuxComputerInterface', () => { const testParams = { ipAddress: 'test.cua.com', // TEST-NET-1 address (RFC 5737) - guaranteed not to be routable username: 'testuser', password: 'testpass', apiKey: 'test-api-key', vmName: 'test-vm', }; describe('Inheritance', () => { it('should extend MacOSComputerInterface', () => { const linuxInterface = new LinuxComputerInterface( testParams.ipAddress, testParams.username, testParams.password, testParams.apiKey, testParams.vmName ); expect(linuxInterface).toBeInstanceOf(MacOSComputerInterface); expect(linuxInterface).toBeInstanceOf(LinuxComputerInterface); }); }); }); ``` -------------------------------------------------------------------------------- /libs/lume/src/Commands/Clone.swift: -------------------------------------------------------------------------------- ```swift import ArgumentParser import Foundation struct Clone: AsyncParsableCommand { static let configuration = CommandConfiguration( abstract: "Clone an existing virtual machine" ) @Argument(help: "Name of the source virtual machine", completion: .custom(completeVMName)) var name: String @Argument(help: "Name for the cloned virtual machine") var newName: String @Option(name: .customLong("source-storage"), help: "Source VM storage location") var sourceStorage: String? @Option(name: .customLong("dest-storage"), help: "Destination VM storage location") var destStorage: String? init() {} @MainActor func run() async throws { let vmController = LumeController() try vmController.clone( name: name, newName: newName, sourceLocation: sourceStorage, destLocation: destStorage ) } } ``` -------------------------------------------------------------------------------- /docs/next.config.mjs: -------------------------------------------------------------------------------- ``` import { createMDX } from 'fumadocs-mdx/next'; const withMDX = createMDX(); /** @type {import('next').NextConfig} */ const config = { reactStrictMode: true, trailingSlash: false, basePath: '/docs', assetPrefix: '/docs', async rewrites() { return [ { source: '/:path*.mdx', destination: '/llms.mdx/:path*', }, ]; }, async redirects() { return [ { source: '/', destination: '/docs', basePath: false, // Important: this bypasses the basePath permanent: false, }, ]; }, images: { dangerouslyAllowSVG: true, remotePatterns: [ { protocol: 'https', hostname: 'img.shields.io', }, { protocol: 'https', hostname: 'starchart.cc', }, { protocol: 'https', hostname: 'github.com', }, ], }, }; export default withMDX(config); ```