This is page 2 of 20. Use http://codebase.md/trycua/cua?lines=true&page={x} to view the full context. # Directory Structure ``` ├── .all-contributorsrc ├── .cursorignore ├── .devcontainer │ ├── devcontainer.json │ ├── post-install.sh │ └── README.md ├── .dockerignore ├── .gitattributes ├── .github │ ├── FUNDING.yml │ ├── scripts │ │ ├── get_pyproject_version.py │ │ └── tests │ │ ├── __init__.py │ │ ├── README.md │ │ └── test_get_pyproject_version.py │ └── workflows │ ├── ci-lume.yml │ ├── docker-publish-kasm.yml │ ├── docker-publish-xfce.yml │ ├── docker-reusable-publish.yml │ ├── npm-publish-computer.yml │ ├── npm-publish-core.yml │ ├── publish-lume.yml │ ├── pypi-publish-agent.yml │ ├── pypi-publish-computer-server.yml │ ├── pypi-publish-computer.yml │ ├── pypi-publish-core.yml │ ├── pypi-publish-mcp-server.yml │ ├── pypi-publish-pylume.yml │ ├── pypi-publish-som.yml │ ├── pypi-reusable-publish.yml │ └── test-validation-script.yml ├── .gitignore ├── .vscode │ ├── docs.code-workspace │ ├── launch.json │ ├── libs-ts.code-workspace │ ├── lume.code-workspace │ ├── lumier.code-workspace │ └── py.code-workspace ├── blog │ ├── app-use.md │ ├── assets │ │ ├── composite-agents.png │ │ ├── docker-ubuntu-support.png │ │ ├── hack-booth.png │ │ ├── hack-closing-ceremony.jpg │ │ ├── hack-cua-ollama-hud.jpeg │ │ ├── hack-leaderboard.png │ │ ├── hack-the-north.png │ │ ├── hack-winners.jpeg │ │ ├── hack-workshop.jpeg │ │ ├── hud-agent-evals.png │ │ └── trajectory-viewer.jpeg │ ├── bringing-computer-use-to-the-web.md │ ├── build-your-own-operator-on-macos-1.md │ ├── build-your-own-operator-on-macos-2.md │ ├── composite-agents.md │ ├── cua-hackathon.md │ ├── hack-the-north.md │ ├── hud-agent-evals.md │ ├── human-in-the-loop.md │ ├── introducing-cua-cloud-containers.md │ ├── lume-to-containerization.md │ ├── sandboxed-python-execution.md │ ├── training-computer-use-models-trajectories-1.md │ ├── trajectory-viewer.md │ ├── ubuntu-docker-support.md │ └── windows-sandbox.md ├── CONTRIBUTING.md ├── Development.md ├── Dockerfile ├── docs │ ├── .gitignore │ ├── .prettierrc │ ├── content │ │ └── docs │ │ ├── agent-sdk │ │ │ ├── agent-loops.mdx │ │ │ ├── benchmarks │ │ │ │ ├── index.mdx │ │ │ │ ├── interactive.mdx │ │ │ │ ├── introduction.mdx │ │ │ │ ├── meta.json │ │ │ │ ├── osworld-verified.mdx │ │ │ │ ├── screenspot-pro.mdx │ │ │ │ └── screenspot-v2.mdx │ │ │ ├── callbacks │ │ │ │ ├── agent-lifecycle.mdx │ │ │ │ ├── cost-saving.mdx │ │ │ │ ├── index.mdx │ │ │ │ ├── logging.mdx │ │ │ │ ├── meta.json │ │ │ │ ├── pii-anonymization.mdx │ │ │ │ └── trajectories.mdx │ │ │ ├── chat-history.mdx │ │ │ ├── custom-computer-handlers.mdx │ │ │ ├── custom-tools.mdx │ │ │ ├── customizing-computeragent.mdx │ │ │ ├── integrations │ │ │ │ ├── hud.mdx │ │ │ │ └── meta.json │ │ │ ├── message-format.mdx │ │ │ ├── meta.json │ │ │ ├── migration-guide.mdx │ │ │ ├── prompt-caching.mdx │ │ │ ├── supported-agents │ │ │ │ ├── composed-agents.mdx │ │ │ │ ├── computer-use-agents.mdx │ │ │ │ ├── grounding-models.mdx │ │ │ │ ├── human-in-the-loop.mdx │ │ │ │ └── meta.json │ │ │ ├── supported-model-providers │ │ │ │ ├── index.mdx │ │ │ │ └── local-models.mdx │ │ │ └── usage-tracking.mdx │ │ ├── computer-sdk │ │ │ ├── commands.mdx │ │ │ ├── computer-ui.mdx │ │ │ ├── computers.mdx │ │ │ ├── meta.json │ │ │ └── sandboxed-python.mdx │ │ ├── index.mdx │ │ ├── libraries │ │ │ ├── agent │ │ │ │ └── index.mdx │ │ │ ├── computer │ │ │ │ └── index.mdx │ │ │ ├── computer-server │ │ │ │ ├── Commands.mdx │ │ │ │ ├── index.mdx │ │ │ │ ├── REST-API.mdx │ │ │ │ └── WebSocket-API.mdx │ │ │ ├── core │ │ │ │ └── index.mdx │ │ │ ├── lume │ │ │ │ ├── cli-reference.mdx │ │ │ │ ├── faq.md │ │ │ │ ├── http-api.mdx │ │ │ │ ├── index.mdx │ │ │ │ ├── installation.mdx │ │ │ │ ├── meta.json │ │ │ │ └── prebuilt-images.mdx │ │ │ ├── lumier │ │ │ │ ├── building-lumier.mdx │ │ │ │ ├── docker-compose.mdx │ │ │ │ ├── docker.mdx │ │ │ │ ├── index.mdx │ │ │ │ ├── installation.mdx │ │ │ │ └── meta.json │ │ │ ├── mcp-server │ │ │ │ ├── client-integrations.mdx │ │ │ │ ├── configuration.mdx │ │ │ │ ├── index.mdx │ │ │ │ ├── installation.mdx │ │ │ │ ├── llm-integrations.mdx │ │ │ │ ├── meta.json │ │ │ │ ├── tools.mdx │ │ │ │ └── usage.mdx │ │ │ └── som │ │ │ ├── configuration.mdx │ │ │ └── index.mdx │ │ ├── meta.json │ │ ├── quickstart-cli.mdx │ │ ├── quickstart-devs.mdx │ │ └── telemetry.mdx │ ├── next.config.mjs │ ├── package-lock.json │ ├── package.json │ ├── pnpm-lock.yaml │ ├── postcss.config.mjs │ ├── public │ │ └── img │ │ ├── agent_gradio_ui.png │ │ ├── agent.png │ │ ├── cli.png │ │ ├── computer.png │ │ ├── som_box_threshold.png │ │ └── som_iou_threshold.png │ ├── README.md │ ├── source.config.ts │ ├── src │ │ ├── app │ │ │ ├── (home) │ │ │ │ ├── [[...slug]] │ │ │ │ │ └── page.tsx │ │ │ │ └── layout.tsx │ │ │ ├── api │ │ │ │ └── search │ │ │ │ └── route.ts │ │ │ ├── favicon.ico │ │ │ ├── global.css │ │ │ ├── layout.config.tsx │ │ │ ├── layout.tsx │ │ │ ├── llms.mdx │ │ │ │ └── [[...slug]] │ │ │ │ └── route.ts │ │ │ └── llms.txt │ │ │ └── route.ts │ │ ├── assets │ │ │ ├── discord-black.svg │ │ │ ├── discord-white.svg │ │ │ ├── logo-black.svg │ │ │ └── logo-white.svg │ │ ├── components │ │ │ ├── iou.tsx │ │ │ └── mermaid.tsx │ │ ├── lib │ │ │ ├── llms.ts │ │ │ └── source.ts │ │ └── mdx-components.tsx │ └── tsconfig.json ├── examples │ ├── agent_examples.py │ ├── agent_ui_examples.py │ ├── computer_examples_windows.py │ ├── computer_examples.py │ ├── computer_ui_examples.py │ ├── computer-example-ts │ │ ├── .env.example │ │ ├── .gitignore │ │ ├── .prettierrc │ │ ├── package-lock.json │ │ ├── package.json │ │ ├── pnpm-lock.yaml │ │ ├── README.md │ │ ├── src │ │ │ ├── helpers.ts │ │ │ └── index.ts │ │ └── tsconfig.json │ ├── docker_examples.py │ ├── evals │ │ ├── hud_eval_examples.py │ │ └── wikipedia_most_linked.txt │ ├── pylume_examples.py │ ├── sandboxed_functions_examples.py │ ├── som_examples.py │ ├── utils.py │ └── winsandbox_example.py ├── img │ ├── agent_gradio_ui.png │ ├── agent.png │ ├── cli.png │ ├── computer.png │ ├── logo_black.png │ └── logo_white.png ├── libs │ ├── kasm │ │ ├── Dockerfile │ │ ├── LICENSE │ │ ├── README.md │ │ └── src │ │ └── ubuntu │ │ └── install │ │ └── firefox │ │ ├── custom_startup.sh │ │ ├── firefox.desktop │ │ └── install_firefox.sh │ ├── lume │ │ ├── .cursorignore │ │ ├── CONTRIBUTING.md │ │ ├── Development.md │ │ ├── img │ │ │ └── cli.png │ │ ├── Package.resolved │ │ ├── Package.swift │ │ ├── README.md │ │ ├── resources │ │ │ └── lume.entitlements │ │ ├── scripts │ │ │ ├── build │ │ │ │ ├── build-debug.sh │ │ │ │ ├── build-release-notarized.sh │ │ │ │ └── build-release.sh │ │ │ └── install.sh │ │ ├── src │ │ │ ├── Commands │ │ │ │ ├── Clone.swift │ │ │ │ ├── Config.swift │ │ │ │ ├── Create.swift │ │ │ │ ├── Delete.swift │ │ │ │ ├── Get.swift │ │ │ │ ├── Images.swift │ │ │ │ ├── IPSW.swift │ │ │ │ ├── List.swift │ │ │ │ ├── Logs.swift │ │ │ │ ├── Options │ │ │ │ │ └── FormatOption.swift │ │ │ │ ├── Prune.swift │ │ │ │ ├── Pull.swift │ │ │ │ ├── Push.swift │ │ │ │ ├── Run.swift │ │ │ │ ├── Serve.swift │ │ │ │ ├── Set.swift │ │ │ │ └── Stop.swift │ │ │ ├── ContainerRegistry │ │ │ │ ├── ImageContainerRegistry.swift │ │ │ │ ├── ImageList.swift │ │ │ │ └── ImagesPrinter.swift │ │ │ ├── Errors │ │ │ │ └── Errors.swift │ │ │ ├── FileSystem │ │ │ │ ├── Home.swift │ │ │ │ ├── Settings.swift │ │ │ │ ├── VMConfig.swift │ │ │ │ ├── VMDirectory.swift │ │ │ │ └── VMLocation.swift │ │ │ ├── LumeController.swift │ │ │ ├── Main.swift │ │ │ ├── Server │ │ │ │ ├── Handlers.swift │ │ │ │ ├── HTTP.swift │ │ │ │ ├── Requests.swift │ │ │ │ ├── Responses.swift │ │ │ │ └── Server.swift │ │ │ ├── Utils │ │ │ │ ├── CommandRegistry.swift │ │ │ │ ├── CommandUtils.swift │ │ │ │ ├── Logger.swift │ │ │ │ ├── NetworkUtils.swift │ │ │ │ ├── Path.swift │ │ │ │ ├── ProcessRunner.swift │ │ │ │ ├── ProgressLogger.swift │ │ │ │ ├── String.swift │ │ │ │ └── Utils.swift │ │ │ ├── Virtualization │ │ │ │ ├── DarwinImageLoader.swift │ │ │ │ ├── DHCPLeaseParser.swift │ │ │ │ ├── ImageLoaderFactory.swift │ │ │ │ └── VMVirtualizationService.swift │ │ │ ├── VM │ │ │ │ ├── DarwinVM.swift │ │ │ │ ├── LinuxVM.swift │ │ │ │ ├── VM.swift │ │ │ │ ├── VMDetails.swift │ │ │ │ ├── VMDetailsPrinter.swift │ │ │ │ ├── VMDisplayResolution.swift │ │ │ │ └── VMFactory.swift │ │ │ └── VNC │ │ │ ├── PassphraseGenerator.swift │ │ │ └── VNCService.swift │ │ └── tests │ │ ├── Mocks │ │ │ ├── MockVM.swift │ │ │ ├── MockVMVirtualizationService.swift │ │ │ └── MockVNCService.swift │ │ ├── VM │ │ │ └── VMDetailsPrinterTests.swift │ │ ├── VMTests.swift │ │ ├── VMVirtualizationServiceTests.swift │ │ └── VNCServiceTests.swift │ ├── lumier │ │ ├── .dockerignore │ │ ├── Dockerfile │ │ ├── README.md │ │ └── src │ │ ├── bin │ │ │ └── entry.sh │ │ ├── config │ │ │ └── constants.sh │ │ ├── hooks │ │ │ └── on-logon.sh │ │ └── lib │ │ ├── utils.sh │ │ └── vm.sh │ ├── python │ │ ├── agent │ │ │ ├── agent │ │ │ │ ├── __init__.py │ │ │ │ ├── __main__.py │ │ │ │ ├── adapters │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── huggingfacelocal_adapter.py │ │ │ │ │ ├── human_adapter.py │ │ │ │ │ ├── mlxvlm_adapter.py │ │ │ │ │ └── models │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── generic.py │ │ │ │ │ ├── internvl.py │ │ │ │ │ ├── opencua.py │ │ │ │ │ └── qwen2_5_vl.py │ │ │ │ ├── agent.py │ │ │ │ ├── callbacks │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── budget_manager.py │ │ │ │ │ ├── image_retention.py │ │ │ │ │ ├── logging.py │ │ │ │ │ ├── operator_validator.py │ │ │ │ │ ├── pii_anonymization.py │ │ │ │ │ ├── prompt_instructions.py │ │ │ │ │ ├── telemetry.py │ │ │ │ │ └── trajectory_saver.py │ │ │ │ ├── cli.py │ │ │ │ ├── computers │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── cua.py │ │ │ │ │ └── custom.py │ │ │ │ ├── decorators.py │ │ │ │ ├── human_tool │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── __main__.py │ │ │ │ │ ├── server.py │ │ │ │ │ └── ui.py │ │ │ │ ├── integrations │ │ │ │ │ └── hud │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── agent.py │ │ │ │ │ └── proxy.py │ │ │ │ ├── loops │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── anthropic.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── composed_grounded.py │ │ │ │ │ ├── glm45v.py │ │ │ │ │ ├── gta1.py │ │ │ │ │ ├── holo.py │ │ │ │ │ ├── internvl.py │ │ │ │ │ ├── model_types.csv │ │ │ │ │ ├── moondream3.py │ │ │ │ │ ├── omniparser.py │ │ │ │ │ ├── openai.py │ │ │ │ │ ├── opencua.py │ │ │ │ │ └── uitars.py │ │ │ │ ├── proxy │ │ │ │ │ ├── examples.py │ │ │ │ │ └── handlers.py │ │ │ │ ├── responses.py │ │ │ │ ├── types.py │ │ │ │ └── ui │ │ │ │ ├── __init__.py │ │ │ │ ├── __main__.py │ │ │ │ └── gradio │ │ │ │ ├── __init__.py │ │ │ │ ├── app.py │ │ │ │ └── ui_components.py │ │ │ ├── benchmarks │ │ │ │ ├── .gitignore │ │ │ │ ├── contrib.md │ │ │ │ ├── interactive.py │ │ │ │ ├── models │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ └── gta1.py │ │ │ │ ├── README.md │ │ │ │ ├── ss-pro.py │ │ │ │ ├── ss-v2.py │ │ │ │ └── utils.py │ │ │ ├── example.py │ │ │ ├── pyproject.toml │ │ │ └── README.md │ │ ├── computer │ │ │ ├── computer │ │ │ │ ├── __init__.py │ │ │ │ ├── computer.py │ │ │ │ ├── diorama_computer.py │ │ │ │ ├── helpers.py │ │ │ │ ├── interface │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── factory.py │ │ │ │ │ ├── generic.py │ │ │ │ │ ├── linux.py │ │ │ │ │ ├── macos.py │ │ │ │ │ ├── models.py │ │ │ │ │ └── windows.py │ │ │ │ ├── logger.py │ │ │ │ ├── models.py │ │ │ │ ├── providers │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── cloud │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── provider.py │ │ │ │ │ ├── docker │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── provider.py │ │ │ │ │ ├── factory.py │ │ │ │ │ ├── lume │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── provider.py │ │ │ │ │ ├── lume_api.py │ │ │ │ │ ├── lumier │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── provider.py │ │ │ │ │ └── winsandbox │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── provider.py │ │ │ │ │ └── setup_script.ps1 │ │ │ │ ├── ui │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── __main__.py │ │ │ │ │ └── gradio │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── app.py │ │ │ │ └── utils.py │ │ │ ├── poetry.toml │ │ │ ├── pyproject.toml │ │ │ └── README.md │ │ ├── computer-server │ │ │ ├── computer_server │ │ │ │ ├── __init__.py │ │ │ │ ├── __main__.py │ │ │ │ ├── cli.py │ │ │ │ ├── diorama │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── diorama_computer.py │ │ │ │ │ ├── diorama.py │ │ │ │ │ ├── draw.py │ │ │ │ │ ├── macos.py │ │ │ │ │ └── safezone.py │ │ │ │ ├── handlers │ │ │ │ │ ├── base.py │ │ │ │ │ ├── factory.py │ │ │ │ │ ├── generic.py │ │ │ │ │ ├── linux.py │ │ │ │ │ ├── macos.py │ │ │ │ │ └── windows.py │ │ │ │ ├── main.py │ │ │ │ ├── server.py │ │ │ │ └── watchdog.py │ │ │ ├── examples │ │ │ │ ├── __init__.py │ │ │ │ └── usage_example.py │ │ │ ├── pyproject.toml │ │ │ ├── README.md │ │ │ ├── run_server.py │ │ │ └── test_connection.py │ │ ├── core │ │ │ ├── core │ │ │ │ ├── __init__.py │ │ │ │ └── telemetry │ │ │ │ ├── __init__.py │ │ │ │ └── posthog.py │ │ │ ├── poetry.toml │ │ │ ├── pyproject.toml │ │ │ └── README.md │ │ ├── mcp-server │ │ │ ├── mcp_server │ │ │ │ ├── __init__.py │ │ │ │ ├── __main__.py │ │ │ │ └── server.py │ │ │ ├── pyproject.toml │ │ │ ├── README.md │ │ │ └── scripts │ │ │ ├── install_mcp_server.sh │ │ │ └── start_mcp_server.sh │ │ ├── pylume │ │ │ ├── __init__.py │ │ │ ├── pylume │ │ │ │ ├── __init__.py │ │ │ │ ├── client.py │ │ │ │ ├── exceptions.py │ │ │ │ ├── lume │ │ │ │ ├── models.py │ │ │ │ ├── pylume.py │ │ │ │ └── server.py │ │ │ ├── pyproject.toml │ │ │ └── README.md │ │ └── som │ │ ├── LICENSE │ │ ├── poetry.toml │ │ ├── pyproject.toml │ │ ├── README.md │ │ ├── som │ │ │ ├── __init__.py │ │ │ ├── detect.py │ │ │ ├── detection.py │ │ │ ├── models.py │ │ │ ├── ocr.py │ │ │ ├── util │ │ │ │ └── utils.py │ │ │ └── visualization.py │ │ └── tests │ │ └── test_omniparser.py │ ├── typescript │ │ ├── .gitignore │ │ ├── .nvmrc │ │ ├── agent │ │ │ ├── examples │ │ │ │ ├── playground-example.html │ │ │ │ └── README.md │ │ │ ├── package.json │ │ │ ├── README.md │ │ │ ├── src │ │ │ │ ├── client.ts │ │ │ │ ├── index.ts │ │ │ │ └── types.ts │ │ │ ├── tests │ │ │ │ └── client.test.ts │ │ │ ├── tsconfig.json │ │ │ ├── tsdown.config.ts │ │ │ └── vitest.config.ts │ │ ├── biome.json │ │ ├── computer │ │ │ ├── .editorconfig │ │ │ ├── .gitattributes │ │ │ ├── .gitignore │ │ │ ├── LICENSE │ │ │ ├── package.json │ │ │ ├── README.md │ │ │ ├── src │ │ │ │ ├── computer │ │ │ │ │ ├── index.ts │ │ │ │ │ ├── providers │ │ │ │ │ │ ├── base.ts │ │ │ │ │ │ ├── cloud.ts │ │ │ │ │ │ └── index.ts │ │ │ │ │ └── types.ts │ │ │ │ ├── index.ts │ │ │ │ ├── interface │ │ │ │ │ ├── base.ts │ │ │ │ │ ├── factory.ts │ │ │ │ │ ├── index.ts │ │ │ │ │ ├── linux.ts │ │ │ │ │ ├── macos.ts │ │ │ │ │ └── windows.ts │ │ │ │ └── types.ts │ │ │ ├── tests │ │ │ │ ├── computer │ │ │ │ │ └── cloud.test.ts │ │ │ │ ├── interface │ │ │ │ │ ├── factory.test.ts │ │ │ │ │ ├── index.test.ts │ │ │ │ │ ├── linux.test.ts │ │ │ │ │ ├── macos.test.ts │ │ │ │ │ └── windows.test.ts │ │ │ │ └── setup.ts │ │ │ ├── tsconfig.json │ │ │ ├── tsdown.config.ts │ │ │ └── vitest.config.ts │ │ ├── core │ │ │ ├── .editorconfig │ │ │ ├── .gitattributes │ │ │ ├── .gitignore │ │ │ ├── LICENSE │ │ │ ├── package.json │ │ │ ├── README.md │ │ │ ├── src │ │ │ │ ├── index.ts │ │ │ │ └── telemetry │ │ │ │ ├── clients │ │ │ │ │ ├── index.ts │ │ │ │ │ └── posthog.ts │ │ │ │ └── index.ts │ │ │ ├── tests │ │ │ │ └── telemetry.test.ts │ │ │ ├── tsconfig.json │ │ │ ├── tsdown.config.ts │ │ │ └── vitest.config.ts │ │ ├── package.json │ │ ├── pnpm-lock.yaml │ │ ├── pnpm-workspace.yaml │ │ └── README.md │ └── xfce │ ├── .dockerignore │ ├── .gitignore │ ├── Dockerfile │ ├── README.md │ └── src │ ├── scripts │ │ ├── resize-display.sh │ │ ├── start-computer-server.sh │ │ ├── start-novnc.sh │ │ ├── start-vnc.sh │ │ └── xstartup.sh │ ├── supervisor │ │ └── supervisord.conf │ └── xfce-config │ ├── helpers.rc │ ├── xfce4-power-manager.xml │ └── xfce4-session.xml ├── LICENSE.md ├── notebooks │ ├── agent_nb.ipynb │ ├── blog │ │ ├── build-your-own-operator-on-macos-1.ipynb │ │ └── build-your-own-operator-on-macos-2.ipynb │ ├── composite_agents_docker_nb.ipynb │ ├── computer_nb.ipynb │ ├── computer_server_nb.ipynb │ ├── customizing_computeragent.ipynb │ ├── eval_osworld.ipynb │ ├── ollama_nb.ipynb │ ├── pylume_nb.ipynb │ ├── README.md │ ├── sota_hackathon_cloud.ipynb │ └── sota_hackathon.ipynb ├── pdm.lock ├── pyproject.toml ├── pyrightconfig.json ├── README.md ├── samples │ └── community │ ├── global-online │ │ └── README.md │ └── hack-the-north │ └── README.md ├── scripts │ ├── build-uv.sh │ ├── build.ps1 │ ├── build.sh │ ├── cleanup.sh │ ├── playground-docker.sh │ ├── playground.sh │ └── run-docker-dev.sh └── tests ├── pytest.ini ├── shell_cmd.py ├── test_files.py ├── test_shell_bash.py ├── test_telemetry.py ├── test_venv.py └── test_watchdog.py ``` # Files -------------------------------------------------------------------------------- /.devcontainer/post-install.sh: -------------------------------------------------------------------------------- ```bash 1 | #!/usr/bin/env bash 2 | 3 | WORKSPACE="/workspaces/cua" 4 | 5 | # Setup .env.local 6 | echo "PYTHON_BIN=python" > /workspaces/cua/.env.local 7 | 8 | # Run /scripts/build.sh 9 | ./scripts/build.sh 10 | 11 | # --- 12 | # Build is complete. Show user a clear message to open the workspace manually. 13 | # --- 14 | 15 | cat << 'EOM' 16 | 17 | ============================================ 18 | 🚀 Build complete! 19 | 20 | 👉 Next steps: 21 | 22 | 1. Open '.vscode/py.code-workspace' 23 | 2. Press 'Open Workspace' 24 | 25 | Happy coding! 26 | ============================================ 27 | 28 | EOM 29 | ``` -------------------------------------------------------------------------------- /libs/lume/src/Utils/ProgressLogger.swift: -------------------------------------------------------------------------------- ```swift 1 | import Foundation 2 | 3 | struct ProgressLogger { 4 | private var lastLoggedProgress: Double = 0.0 5 | private let threshold: Double 6 | 7 | init(threshold: Double = 0.05) { 8 | self.threshold = threshold 9 | } 10 | 11 | mutating func logProgress(current: Double, context: String) { 12 | if current - lastLoggedProgress >= threshold { 13 | lastLoggedProgress = current 14 | let percentage = Int(current * 100) 15 | Logger.info("\(context) Progress: \(percentage)%") 16 | } 17 | } 18 | } ``` -------------------------------------------------------------------------------- /libs/lume/src/Utils/CommandRegistry.swift: -------------------------------------------------------------------------------- ```swift 1 | import ArgumentParser 2 | 3 | enum CommandRegistry { 4 | static var allCommands: [ParsableCommand.Type] { 5 | [ 6 | Create.self, 7 | Pull.self, 8 | Push.self, 9 | Images.self, 10 | Clone.self, 11 | Get.self, 12 | Set.self, 13 | List.self, 14 | Run.self, 15 | Stop.self, 16 | IPSW.self, 17 | Serve.self, 18 | Delete.self, 19 | Prune.self, 20 | Config.self, 21 | Logs.self, 22 | ] 23 | } 24 | } 25 | ``` -------------------------------------------------------------------------------- /libs/lume/src/Commands/IPSW.swift: -------------------------------------------------------------------------------- ```swift 1 | import ArgumentParser 2 | import Foundation 3 | 4 | struct IPSW: AsyncParsableCommand { 5 | static let configuration = CommandConfiguration( 6 | abstract: "Get macOS restore image IPSW URL", 7 | discussion: "Download IPSW file manually, then use in create command with --ipsw" 8 | ) 9 | 10 | init() { 11 | 12 | } 13 | 14 | @MainActor 15 | func run() async throws { 16 | let vmController = LumeController() 17 | let url = try await vmController.getLatestIPSWURL() 18 | print(url.absoluteString) 19 | } 20 | } ``` -------------------------------------------------------------------------------- /libs/lume/src/Commands/Images.swift: -------------------------------------------------------------------------------- ```swift 1 | import ArgumentParser 2 | import Foundation 3 | 4 | struct Images: AsyncParsableCommand { 5 | static let configuration = CommandConfiguration( 6 | abstract: "List available macOS images from local cache" 7 | ) 8 | 9 | @Option(help: "Organization to list from. Defaults to trycua") 10 | var organization: String = "trycua" 11 | 12 | init() {} 13 | 14 | @MainActor 15 | func run() async throws { 16 | let vmController = LumeController() 17 | _ = try await vmController.getImages(organization: organization) 18 | } 19 | } 20 | ``` -------------------------------------------------------------------------------- /examples/computer-example-ts/package.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "name": "computer-example-ts", 3 | "version": "1.0.0", 4 | "description": "", 5 | "type": "module", 6 | "main": "index.js", 7 | "scripts": { 8 | "dev": "tsx watch src/index.ts", 9 | "start": "tsx src/index.ts" 10 | }, 11 | "keywords": [], 12 | "author": "", 13 | "license": "MIT", 14 | "packageManager": "[email protected]", 15 | "dependencies": { 16 | "@trycua/computer": "^0.1.3", 17 | "dotenv": "^16.5.0", 18 | "openai": "^5.7.0" 19 | }, 20 | "devDependencies": { 21 | "@types/node": "^22.15.33", 22 | "tsx": "^4.20.3", 23 | "typescript": "^5.8.3" 24 | } 25 | } ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/loops/__init__.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Agent loops for agent 3 | """ 4 | 5 | # Import the loops to register them 6 | from . import anthropic 7 | from . import openai 8 | from . import uitars 9 | from . import omniparser 10 | from . import gta1 11 | from . import composed_grounded 12 | from . import glm45v 13 | from . import opencua 14 | from . import internvl 15 | from . import holo 16 | from . import moondream3 17 | 18 | __all__ = [ 19 | "anthropic", 20 | "openai", 21 | "uitars", 22 | "omniparser", 23 | "gta1", 24 | "composed_grounded", 25 | "glm45v", 26 | "opencua", 27 | "internvl", 28 | "holo", 29 | "moondream3", 30 | ] ``` -------------------------------------------------------------------------------- /docs/src/mdx-components.tsx: -------------------------------------------------------------------------------- ```typescript 1 | import defaultMdxComponents from 'fumadocs-ui/mdx'; 2 | import * as TabsComponents from 'fumadocs-ui/components/tabs'; 3 | import type { MDXComponents } from 'mdx/types'; 4 | import { Mermaid } from './components/mermaid'; 5 | import IOU from './components/iou'; 6 | 7 | // use this function to get MDX components, you will need it for rendering MDX 8 | export function getMDXComponents(components?: MDXComponents): MDXComponents { 9 | return { 10 | ...defaultMdxComponents, 11 | Mermaid, 12 | IOU, 13 | ...TabsComponents, 14 | ...components, 15 | }; 16 | } 17 | ``` -------------------------------------------------------------------------------- /libs/xfce/src/scripts/resize-display.sh: -------------------------------------------------------------------------------- ```bash 1 | #!/bin/bash 2 | # Dynamic display resolution script 3 | # Can be called to change the VNC display resolution 4 | 5 | RESOLUTION=${1:-1920x1080} 6 | 7 | # Wait for display to be ready 8 | for i in {1..10}; do 9 | if DISPLAY=:1 xdpyinfo >/dev/null 2>&1; then 10 | break 11 | fi 12 | sleep 1 13 | done 14 | 15 | # Change resolution using xrandr 16 | DISPLAY=:1 xrandr --output VNC-0 --mode "$RESOLUTION" 2>/dev/null || \ 17 | DISPLAY=:1 xrandr --fb "$RESOLUTION" 2>/dev/null || \ 18 | echo "Failed to set resolution to $RESOLUTION" 19 | 20 | echo "Display resolution set to: $RESOLUTION" 21 | ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/mcp-server/configuration.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Configuration 3 | --- 4 | 5 | The server is configured using environment variables (can be set in the Claude Desktop config): 6 | 7 | | Variable | Description | Default | 8 | |----------|-------------|---------| 9 | | `CUA_MODEL_NAME` | Model string (e.g., "anthropic/claude-3-5-sonnet-20241022", "openai/computer-use-preview", "huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B", "omniparser+litellm/gpt-4o", "omniparser+ollama_chat/gemma3") | anthropic/claude-3-5-sonnet-20241022 | 10 | | `CUA_MAX_IMAGES` | Maximum number of images to keep in context | 3 | 11 | ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/__main__.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Entry point for running agent CLI module. 3 | 4 | Usage: 5 | python -m agent.cli <model_string> 6 | """ 7 | 8 | import sys 9 | import asyncio 10 | from .cli import main 11 | 12 | if __name__ == "__main__": 13 | # Check if 'cli' is specified as the module 14 | if len(sys.argv) > 1 and sys.argv[1] == "cli": 15 | # Remove 'cli' from arguments and run CLI 16 | sys.argv.pop(1) 17 | asyncio.run(main()) 18 | else: 19 | print("Usage: python -m agent.cli <model_string>") 20 | print("Example: python -m agent.cli openai/computer-use-preview") 21 | sys.exit(1) 22 | ``` -------------------------------------------------------------------------------- /libs/typescript/computer/tsconfig.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "compilerOptions": { 3 | "target": "esnext", 4 | "lib": ["es2023"], 5 | "moduleDetection": "force", 6 | "module": "preserve", 7 | "moduleResolution": "bundler", 8 | "allowImportingTsExtensions": true, 9 | "resolveJsonModule": true, 10 | "types": ["node"], 11 | "allowSyntheticDefaultImports": true, 12 | "strict": true, 13 | "noUnusedLocals": true, 14 | "declaration": true, 15 | "emitDeclarationOnly": true, 16 | "esModuleInterop": true, 17 | "isolatedModules": true, 18 | "verbatimModuleSyntax": true, 19 | "skipLibCheck": true 20 | }, 21 | "include": ["src"] 22 | } 23 | ``` -------------------------------------------------------------------------------- /docs/src/app/llms.mdx/[[...slug]]/route.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { type NextRequest, NextResponse } from 'next/server'; 2 | import { getLLMText } from '@/lib/llms'; 3 | import { source } from '@/lib/source'; 4 | import { notFound } from 'next/navigation'; 5 | 6 | export const revalidate = false; 7 | 8 | export async function GET( 9 | _req: NextRequest, 10 | { params }: { params: Promise<{ slug?: string[] }> } 11 | ) { 12 | const { slug } = await params; 13 | const page = source.getPage(slug); 14 | if (!page) notFound(); 15 | 16 | return new NextResponse(await getLLMText(page)); 17 | } 18 | 19 | export function generateStaticParams() { 20 | return source.generateParams(); 21 | } 22 | ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/benchmarks/screenspot-v2.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: ScreenSpot-v2 3 | description: Standard resolution GUI grounding benchmark 4 | --- 5 | 6 | ScreenSpot-v2 is a benchmark for evaluating click prediction accuracy on standard resolution GUI screenshots. 7 | 8 | ## Usage 9 | 10 | ```bash 11 | # Run the benchmark 12 | cd libs/python/agent/benchmarks 13 | python ss-v2.py 14 | 15 | # Run with custom sample limit 16 | python ss-v2.py --samples 100 17 | ``` 18 | 19 | ## Results 20 | 21 | | Model | Accuracy | Failure Rate | Samples | 22 | |-------|----------|--------------|---------| 23 | | Coming Soon | - | - | - | 24 | 25 | Results will be populated after running benchmarks with various models. 26 | ``` -------------------------------------------------------------------------------- /libs/xfce/src/scripts/start-vnc.sh: -------------------------------------------------------------------------------- ```bash 1 | #!/bin/bash 2 | set -e 3 | 4 | # Clean up any existing VNC lock files 5 | rm -rf /tmp/.X1-lock /tmp/.X11-unix/X1 6 | 7 | # Start VNC server without password authentication 8 | vncserver :1 \ 9 | -geometry ${VNC_RESOLUTION:-1920x1080} \ 10 | -depth ${VNC_COL_DEPTH:-24} \ 11 | -rfbport ${VNC_PORT:-5901} \ 12 | -localhost no \ 13 | -SecurityTypes None \ 14 | -AlwaysShared \ 15 | -AcceptPointerEvents \ 16 | -AcceptKeyEvents \ 17 | -AcceptCutText \ 18 | -SendCutText \ 19 | -xstartup /usr/local/bin/xstartup.sh \ 20 | --I-KNOW-THIS-IS-INSECURE 21 | 22 | # Keep the process running 23 | tail -f /home/cua/.vnc/*.log 24 | ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/benchmarks/screenspot-pro.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: ScreenSpot-Pro 3 | description: High-resolution GUI grounding benchmark 4 | --- 5 | 6 | ScreenSpot-Pro is a benchmark for evaluating click prediction accuracy on high-resolution GUI screenshots with complex layouts. 7 | 8 | ## Usage 9 | 10 | ```bash 11 | # Run the benchmark 12 | cd libs/python/agent/benchmarks 13 | python ss-pro.py 14 | 15 | # Run with custom sample limit 16 | python ss-pro.py --samples 50 17 | ``` 18 | 19 | ## Results 20 | 21 | | Model | Accuracy | Failure Rate | Samples | 22 | |-------|----------|--------------|---------| 23 | | Coming Soon | - | - | - | 24 | 25 | Results will be populated after running benchmarks with various models. 26 | ``` -------------------------------------------------------------------------------- /libs/python/mcp-server/mcp_server/__init__.py: -------------------------------------------------------------------------------- ```python 1 | """MCP Server for Computer-Use Agent (CUA).""" 2 | 3 | import sys 4 | import os 5 | 6 | # Add detailed debugging at import time 7 | with open("/tmp/mcp_server_debug.log", "w") as f: 8 | f.write(f"Python executable: {sys.executable}\n") 9 | f.write(f"Python version: {sys.version}\n") 10 | f.write(f"Working directory: {os.getcwd()}\n") 11 | f.write(f"Python path:\n{chr(10).join(sys.path)}\n") 12 | f.write(f"Environment variables:\n") 13 | for key, value in os.environ.items(): 14 | f.write(f"{key}={value}\n") 15 | 16 | from .server import server, main 17 | 18 | __version__ = "0.1.0" 19 | __all__ = ["server", "main"] 20 | ``` -------------------------------------------------------------------------------- /libs/python/mcp-server/scripts/start_mcp_server.sh: -------------------------------------------------------------------------------- ```bash 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # Set the CUA repository path based on script location 6 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" 7 | CUA_REPO_DIR="$( cd "$SCRIPT_DIR/../../.." &> /dev/null && pwd )" 8 | PYTHON_PATH="${CUA_REPO_DIR}/.venv/bin/python" 9 | 10 | # Set Python path to include all necessary libraries 11 | export PYTHONPATH="${CUA_REPO_DIR}/libs/python/mcp-server:${CUA_REPO_DIR}/libs/python/agent:${CUA_REPO_DIR}/libs/python/computer:${CUA_REPO_DIR}/libs/python/core:${CUA_REPO_DIR}/libs/python/pylume" 12 | 13 | # Run the MCP server directly as a module 14 | $PYTHON_PATH -m mcp_server.server ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/mcp-server/usage.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Usage 3 | --- 4 | 5 | ## Usage 6 | 7 | Once configured, you can simply ask Claude to perform computer tasks: 8 | 9 | - "Open Chrome and go to github.com" 10 | - "Create a folder called 'Projects' on my desktop" 11 | - "Find all PDFs in my Downloads folder" 12 | - "Take a screenshot and highlight the error message" 13 | 14 | Claude will automatically use your CUA agent to perform these tasks. 15 | 16 | ### First-time Usage Notes 17 | 18 | **API Keys**: Ensure you have valid API keys: 19 | - Add your Anthropic API key, or other model provider API key in the Claude Desktop config (as shown above) 20 | - Or set it as an environment variable in your shell profile 21 | ``` -------------------------------------------------------------------------------- /examples/computer-example-ts/tsconfig.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "compilerOptions": { 3 | "target": "esnext", 4 | "lib": [ 5 | "es2023" 6 | ], 7 | "moduleDetection": "force", 8 | "module": "preserve", 9 | "moduleResolution": "bundler", 10 | "allowImportingTsExtensions": true, 11 | "resolveJsonModule": true, 12 | "types": [ 13 | "node" 14 | ], 15 | "allowSyntheticDefaultImports": true, 16 | "strict": true, 17 | "noUnusedLocals": true, 18 | "declaration": true, 19 | "emitDeclarationOnly": true, 20 | "esModuleInterop": true, 21 | "isolatedModules": true, 22 | "verbatimModuleSyntax": true, 23 | "skipLibCheck": true, 24 | "outDir": "build", 25 | }, 26 | "include": [ 27 | "src" 28 | ] 29 | } ``` -------------------------------------------------------------------------------- /libs/lume/src/Commands/Stop.swift: -------------------------------------------------------------------------------- ```swift 1 | import ArgumentParser 2 | import Foundation 3 | 4 | struct Stop: AsyncParsableCommand { 5 | static let configuration = CommandConfiguration( 6 | abstract: "Stop a virtual machine" 7 | ) 8 | 9 | @Argument(help: "Name of the virtual machine", completion: .custom(completeVMName)) 10 | var name: String 11 | 12 | @Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location") 13 | var storage: String? 14 | 15 | init() { 16 | } 17 | 18 | @MainActor 19 | func run() async throws { 20 | let vmController = LumeController() 21 | try await vmController.stopVM(name: name, storage: storage) 22 | } 23 | } 24 | ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/computer-server/index.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Computer Server 3 | descrption: Reference for the current version of the Computer Server library. 4 | pypi: cua-computer-server 5 | github: 6 | - https://github.com/trycua/cua/tree/main/libs/python/computer-server 7 | --- 8 | 9 | <Callout>A corresponding <a href="https://github.com/trycua/cua/blob/main/notebooks/computer_server_nb.ipynb" target="_blank">Jupyter Notebook</a> is available for this documentation.</Callout> 10 | 11 | The Computer Server API reference documentation is currently under development. 12 | 13 | ## Overview 14 | 15 | The Computer Server provides WebSocket and REST API endpoints for remote computer control and automation. 16 | ``` -------------------------------------------------------------------------------- /libs/typescript/agent/src/index.ts: -------------------------------------------------------------------------------- ```typescript 1 | // Export the main AgentClient class as default 2 | export { AgentClient as default } from './client.js'; 3 | 4 | // Also export as named export for flexibility 5 | export { AgentClient } from './client.js'; 6 | 7 | // Export types for TypeScript users 8 | export type { 9 | AgentRequest, 10 | AgentResponse, 11 | AgentMessage, 12 | UserMessage, 13 | AssistantMessage, 14 | ReasoningMessage, 15 | ComputerCallMessage, 16 | ComputerCallOutputMessage, 17 | OutputContent, 18 | SummaryContent, 19 | InputContent, 20 | ComputerAction, 21 | ClickAction, 22 | TypeAction, 23 | KeyPressAction, 24 | ScrollAction, 25 | WaitAction, 26 | Usage, 27 | ConnectionType, 28 | AgentClientOptions, 29 | } from './types'; 30 | ``` -------------------------------------------------------------------------------- /libs/typescript/computer/src/computer/types.ts: -------------------------------------------------------------------------------- ```typescript 1 | import type { OSType, ScreenSize } from '../types'; 2 | 3 | /** 4 | * Display configuration for the computer. 5 | */ 6 | export interface Display extends ScreenSize { 7 | scale_factor?: number; 8 | } 9 | 10 | /** 11 | * Computer configuration model. 12 | */ 13 | export interface BaseComputerConfig { 14 | /** 15 | * The VM name 16 | * @default "" 17 | */ 18 | name: string; 19 | 20 | /** 21 | * The operating system type ('macos', 'windows', or 'linux') 22 | * @default "macos" 23 | */ 24 | osType: OSType; 25 | } 26 | 27 | export interface CloudComputerConfig extends BaseComputerConfig { 28 | /** 29 | * Optional API key for cloud providers 30 | */ 31 | apiKey: string; 32 | } 33 | 34 | export enum VMProviderType { 35 | CLOUD = 'cloud', 36 | } 37 | ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/benchmarks/interactive.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Interactive Tool 3 | description: Real-time testing and visualization tool for GUI grounding models 4 | --- 5 | 6 | This tool allows you to test multiple models interactively by providing natural language instructions. It automatically captures screenshots and tests all configured models sequentially, providing immediate feedback and visual results. 7 | 8 | ## Usage 9 | 10 | ```bash 11 | # Start the interactive tool 12 | cd libs/python/agent/benchmarks 13 | python interactive.py 14 | ``` 15 | 16 | ## Commands 17 | 18 | - **Type instruction**: Screenshot + test all models 19 | - **`screenshot`**: Take screenshot without prediction 20 | - **`models`**: List available models 21 | - **`quit`/`exit`**: Exit the tool 22 | ``` -------------------------------------------------------------------------------- /examples/agent_ui_examples.py: -------------------------------------------------------------------------------- ```python 1 | #!/usr/bin/env python3 2 | """ 3 | Simple example script for the Computer-Use Agent Gradio UI. 4 | 5 | This script launches the advanced Gradio UI for the Computer-Use Agent 6 | with full model selection and configuration options. 7 | It can be run directly from the command line. 8 | """ 9 | 10 | 11 | from utils import load_dotenv_files 12 | 13 | load_dotenv_files() 14 | 15 | # Import the create_gradio_ui function 16 | from agent.ui.gradio.ui_components import create_gradio_ui 17 | 18 | if __name__ == "__main__": 19 | print("Launching Computer-Use Agent Gradio UI with advanced features...") 20 | app = create_gradio_ui() 21 | app.launch( 22 | share=False, 23 | server_name="0.0.0.0", 24 | server_port=7860, 25 | ) 26 | ``` -------------------------------------------------------------------------------- /docs/src/app/layout.tsx: -------------------------------------------------------------------------------- ```typescript 1 | import './global.css'; 2 | import { RootProvider } from 'fumadocs-ui/provider'; 3 | import { Inter } from 'next/font/google'; 4 | import type { ReactNode } from 'react'; 5 | 6 | const inter = Inter({ 7 | subsets: ['latin'], 8 | }); 9 | 10 | export default function Layout({ children }: { children: ReactNode }) { 11 | return ( 12 | <html lang="en" className={inter.className} suppressHydrationWarning> 13 | <head> 14 | <link rel="icon" href="/docs/favicon.ico" sizes="any" /> 15 | </head> 16 | <body className="flex min-h-screen flex-col"> 17 | <RootProvider search={{ options: { api: '/docs/api/search' } }}> 18 | {children} 19 | </RootProvider> 20 | </body> 21 | </html> 22 | ); 23 | } 24 | ``` -------------------------------------------------------------------------------- /docs/src/lib/llms.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { remark } from 'remark'; 2 | import remarkGfm from 'remark-gfm'; 3 | import remarkMdx from 'remark-mdx'; 4 | import { remarkInclude } from 'fumadocs-mdx/config'; 5 | import { source } from '@/lib/source'; 6 | import type { InferPageType } from 'fumadocs-core/source'; 7 | 8 | const processor = remark() 9 | .use(remarkMdx) 10 | // needed for Fumadocs MDX 11 | .use(remarkInclude) 12 | .use(remarkGfm); 13 | 14 | export async function getLLMText(page: InferPageType<typeof source>) { 15 | const processed = await processor.process({ 16 | path: page.data._file.absolutePath, 17 | value: page.data.content, 18 | }); 19 | 20 | return `# ${page.data.title} 21 | URL: ${page.url} 22 | 23 | ${page.data.description} 24 | 25 | ${processed.value}`; 26 | } 27 | ``` -------------------------------------------------------------------------------- /libs/python/computer/computer/interface/macos.py: -------------------------------------------------------------------------------- ```python 1 | from .generic import GenericComputerInterface 2 | from typing import Optional 3 | 4 | class MacOSComputerInterface(GenericComputerInterface): 5 | """Interface for macOS.""" 6 | 7 | def __init__(self, ip_address: str, username: str = "lume", password: str = "lume", api_key: Optional[str] = None, vm_name: Optional[str] = None): 8 | super().__init__(ip_address, username, password, api_key, vm_name, "computer.interface.macos") 9 | 10 | async def diorama_cmd(self, action: str, arguments: Optional[dict] = None) -> dict: 11 | """Send a diorama command to the server (macOS only).""" 12 | return await self._send_command("diorama_cmd", {"action": action, "arguments": arguments or {}}) ``` -------------------------------------------------------------------------------- /libs/typescript/agent/tsconfig.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "compilerOptions": { 3 | "target": "esnext", 4 | "lib": [ 5 | "es2023" 6 | ], 7 | "moduleDetection": "force", 8 | "module": "preserve", 9 | "moduleResolution": "bundler", 10 | "allowImportingTsExtensions": true, 11 | "resolveJsonModule": true, 12 | "types": [ 13 | "node" 14 | ], 15 | "allowSyntheticDefaultImports": true, 16 | "strict": true, 17 | "noUnusedLocals": true, 18 | "declaration": true, 19 | "emitDeclarationOnly": true, 20 | "esModuleInterop": true, 21 | "isolatedModules": true, 22 | "verbatimModuleSyntax": true, 23 | "skipLibCheck": true 24 | }, 25 | "include": [ 26 | "src" 27 | ] 28 | } ``` -------------------------------------------------------------------------------- /libs/lume/src/Virtualization/ImageLoaderFactory.swift: -------------------------------------------------------------------------------- ```swift 1 | import Foundation 2 | 3 | /// Protocol defining a factory for creating image loaders based on the image type 4 | protocol ImageLoaderFactory { 5 | /// Creates an appropriate ImageLoader based on the image path or type 6 | func createImageLoader() -> ImageLoader 7 | } 8 | 9 | /// Default implementation of ImageLoaderFactory that creates appropriate loaders based on image type 10 | final class DefaultImageLoaderFactory: ImageLoaderFactory { 11 | func createImageLoader() -> ImageLoader { 12 | // For now, we only support Darwin images 13 | // In the future, this can be extended to support other OS types 14 | // by analyzing the image path or having explicit OS type parameter 15 | return DarwinImageLoader() 16 | } 17 | } ``` -------------------------------------------------------------------------------- /.github/workflows/test-validation-script.yml: -------------------------------------------------------------------------------- ```yaml 1 | name: Test valididation script 2 | 3 | on: 4 | pull_request: 5 | paths: 6 | - '.github/scripts/**' 7 | - '.github/workflows/test-scripts.yml' 8 | push: 9 | branches: 10 | - main 11 | paths: 12 | - '.github/scripts/**' 13 | - '.github/workflows/test-scripts.yml' 14 | 15 | jobs: 16 | test: 17 | runs-on: ubuntu-latest 18 | 19 | steps: 20 | - name: Checkout code 21 | uses: actions/checkout@v4 22 | 23 | - name: Set up Python 24 | uses: actions/setup-python@v5 25 | with: 26 | python-version: '3.11' 27 | 28 | - name: Install dependencies 29 | run: | 30 | python -m pip install --upgrade pip 31 | pip install pytest toml 32 | 33 | - name: Run tests 34 | run: | 35 | cd .github/scripts 36 | pytest tests/ -v 37 | ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/agent/index.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Agent 3 | description: Reference for the current version of the Agent library. 4 | pypi: cua-agent 5 | github: 6 | - https://github.com/trycua/cua/tree/main/libs/python/agent 7 | --- 8 | 9 | The Agent library provides the ComputerAgent class and tools for building AI agents that automate workflows on Cua Computers. 10 | 11 | ## Agent Loops 12 | 13 | See the [Agent Loops](../agent-sdk/agent-loops) documentation for how agents process information and take actions. 14 | 15 | ## Chat History 16 | 17 | See the [Chat History](../agent-sdk/chat-history) documentation for managing conversational context and turn-by-turn interactions. 18 | 19 | ## Callbacks 20 | 21 | See the [Callbacks](../agent-sdk/callbacks) documentation for extending and customizing agent behavior with custom hooks. 22 | ``` -------------------------------------------------------------------------------- /docs/source.config.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { 2 | defineConfig, 3 | defineDocs, 4 | frontmatterSchema, 5 | metaSchema, 6 | } from 'fumadocs-mdx/config'; 7 | import { z } from 'zod'; 8 | 9 | // You can customise Zod schemas for frontmatter and `meta.json` here 10 | // see https://fumadocs.vercel.app/docs/mdx/collections#define-docs 11 | export const docs = defineDocs({ 12 | docs: { 13 | schema: frontmatterSchema.extend({ 14 | pypi: z.string().optional(), 15 | npm: z.string().optional(), 16 | github: z.array(z.string()).optional(), 17 | macos: z.boolean().default(false), 18 | windows: z.boolean().default(false), 19 | linux: z.boolean().default(false), 20 | }), 21 | }, 22 | meta: { 23 | schema: metaSchema, 24 | }, 25 | }); 26 | 27 | export default defineConfig({ 28 | mdxOptions: { 29 | // MDX options 30 | }, 31 | }); 32 | ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/lumier/installation.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Installation 3 | --- 4 | 5 | Before using Lumier, make sure you have: 6 | 7 | 1. **Docker for Apple Silicon** - download it [here](https://desktop.docker.com/mac/main/arm64/Docker.dmg) and follow the installation instructions. 8 | 9 | 2. **Lume** - This is the virtualization CLI that powers Lumier. Install it with this command: 10 | ```bash 11 | /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" 12 | ``` 13 | 14 | After installation, Lume runs as a background service and listens on port 7777. This service allows Lumier to create and manage virtual machines. If port 7777 is already in use on your system, you can specify a different port with the `--port` option when running the `install.sh` script. ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/callbacks/__init__.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Callback system for ComputerAgent preprocessing and postprocessing hooks. 3 | """ 4 | 5 | from .base import AsyncCallbackHandler 6 | from .image_retention import ImageRetentionCallback 7 | from .logging import LoggingCallback 8 | from .trajectory_saver import TrajectorySaverCallback 9 | from .budget_manager import BudgetManagerCallback 10 | from .telemetry import TelemetryCallback 11 | from .operator_validator import OperatorNormalizerCallback 12 | from .prompt_instructions import PromptInstructionsCallback 13 | 14 | __all__ = [ 15 | "AsyncCallbackHandler", 16 | "ImageRetentionCallback", 17 | "LoggingCallback", 18 | "TrajectorySaverCallback", 19 | "BudgetManagerCallback", 20 | "TelemetryCallback", 21 | "OperatorNormalizerCallback", 22 | "PromptInstructionsCallback", 23 | ] 24 | ``` -------------------------------------------------------------------------------- /.github/workflows/docker-publish-xfce.yml: -------------------------------------------------------------------------------- ```yaml 1 | name: Build and Publish CUA XFCE Container 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | tags: 8 | - "docker-xfce-v*.*.*" 9 | paths: 10 | - "libs/xfce/**" 11 | - ".github/workflows/docker-publish-xfce.yml" 12 | - ".github/workflows/docker-reusable-publish.yml" 13 | pull_request: 14 | paths: 15 | - "libs/xfce/**" 16 | - ".github/workflows/docker-publish-xfce.yml" 17 | - ".github/workflows/docker-reusable-publish.yml" 18 | 19 | jobs: 20 | publish: 21 | uses: ./.github/workflows/docker-reusable-publish.yml 22 | with: 23 | image_name: cua-xfce 24 | context_dir: libs/xfce 25 | dockerfile_path: Dockerfile 26 | tag_prefix: docker-xfce-v 27 | docker_hub_org: trycua 28 | secrets: 29 | DOCKER_HUB_TOKEN: ${{ secrets.DOCKER_HUB_TOKEN }} 30 | ``` -------------------------------------------------------------------------------- /.github/workflows/docker-publish-kasm.yml: -------------------------------------------------------------------------------- ```yaml 1 | name: Build and Publish CUA Ubuntu Container 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | tags: 8 | - "docker-kasm-v*.*.*" 9 | paths: 10 | - "libs/kasm/**" 11 | - ".github/workflows/docker-publish-kasm.yml" 12 | - ".github/workflows/docker-reusable-publish.yml" 13 | pull_request: 14 | paths: 15 | - "libs/kasm/**" 16 | - ".github/workflows/docker-publish-kasm.yml" 17 | - ".github/workflows/docker-reusable-publish.yml" 18 | 19 | jobs: 20 | publish: 21 | uses: ./.github/workflows/docker-reusable-publish.yml 22 | with: 23 | image_name: cua-ubuntu 24 | context_dir: libs/kasm 25 | dockerfile_path: Dockerfile 26 | tag_prefix: docker-kasm-v 27 | docker_hub_org: trycua 28 | secrets: 29 | DOCKER_HUB_TOKEN: ${{ secrets.DOCKER_HUB_TOKEN }} 30 | ``` -------------------------------------------------------------------------------- /libs/python/mcp-server/pyproject.toml: -------------------------------------------------------------------------------- ```toml 1 | [build-system] 2 | requires = ["pdm-backend"] 3 | build-backend = "pdm.backend" 4 | 5 | [project] 6 | name = "cua-mcp-server" 7 | description = "MCP Server for Computer-Use Agent (CUA)" 8 | readme = "README.md" 9 | requires-python = ">=3.11" 10 | version = "0.1.0" 11 | authors = [ 12 | {name = "TryCua", email = "[email protected]"} 13 | ] 14 | dependencies = [ 15 | "mcp>=1.6.0,<2.0.0", 16 | "cua-agent[all]>=0.4.0,<0.5.0", 17 | "cua-computer>=0.4.0,<0.5.0", 18 | ] 19 | 20 | [project.scripts] 21 | cua-mcp-server = "mcp_server.server:main" 22 | 23 | [tool.pdm] 24 | distribution = true 25 | 26 | [tool.pdm.dev-dependencies] 27 | dev = [ 28 | "black>=23.9.1", 29 | "ruff>=0.0.292", 30 | ] 31 | 32 | [tool.black] 33 | line-length = 100 34 | target-version = ["py311"] 35 | 36 | [tool.ruff] 37 | line-length = 100 38 | target-version = "py311" 39 | select = ["E", "F", "B", "I"] 40 | fix = true 41 | ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/supported-model-providers/index.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Supported Model Providers 3 | --- 4 | 5 | ## Supported Models 6 | 7 | ### Anthropic Claude (Computer Use API) 8 | ```python 9 | model="anthropic/claude-3-5-sonnet-20241022" 10 | model="anthropic/claude-3-7-sonnet-20250219" 11 | model="anthropic/claude-opus-4-20250514" 12 | model="anthropic/claude-sonnet-4-20250514" 13 | ``` 14 | 15 | ### OpenAI Computer Use Preview 16 | ```python 17 | model="openai/computer-use-preview" 18 | ``` 19 | 20 | ### UI-TARS (Local or Huggingface Inference) 21 | ```python 22 | model="huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B" 23 | model="ollama_chat/0000/ui-tars-1.5-7b" 24 | ``` 25 | 26 | ### Omniparser + Any LLM 27 | ```python 28 | model="omniparser+ollama_chat/mistral-small3.2" 29 | model="omniparser+vertex_ai/gemini-pro" 30 | model="omniparser+anthropic/claude-3-5-sonnet-20241022" 31 | model="omniparser+openai/gpt-4o" 32 | ``` ``` -------------------------------------------------------------------------------- /libs/lume/src/Utils/NetworkUtils.swift: -------------------------------------------------------------------------------- ```swift 1 | import Foundation 2 | 3 | enum NetworkUtils { 4 | /// Checks if an IP address is reachable by sending a ping 5 | /// - Parameter ipAddress: The IP address to check 6 | /// - Returns: true if the IP is reachable, false otherwise 7 | static func isReachable(ipAddress: String) -> Bool { 8 | let process = Process() 9 | process.executableURL = URL(fileURLWithPath: "/sbin/ping") 10 | process.arguments = ["-c", "1", "-t", "1", ipAddress] 11 | 12 | let pipe = Pipe() 13 | process.standardOutput = pipe 14 | process.standardError = pipe 15 | 16 | do { 17 | try process.run() 18 | process.waitUntilExit() 19 | return process.terminationStatus == 0 20 | } catch { 21 | return false 22 | } 23 | } 24 | } ``` -------------------------------------------------------------------------------- /.github/workflows/ci-lume.yml: -------------------------------------------------------------------------------- ```yaml 1 | name: lume 2 | on: 3 | push: 4 | branches: 5 | - "main" 6 | pull_request: {} 7 | 8 | concurrency: 9 | group: lume-${{ github.workflow }}-${{ github.ref }} 10 | cancel-in-progress: true 11 | 12 | # Runner images: https://github.com/actions/runner-images 13 | 14 | jobs: 15 | test: 16 | name: Test 17 | runs-on: macos-15 18 | steps: 19 | - uses: actions/checkout@v4 20 | - run: uname -a 21 | - run: sudo xcode-select -s /Applications/Xcode_16.app # Swift 6.0 22 | - run: swift test 23 | working-directory: ./libs/lume 24 | build: 25 | name: Release build 26 | runs-on: macos-15 27 | steps: 28 | - uses: actions/checkout@v4 29 | - run: uname -a 30 | - run: sudo xcode-select -s /Applications/Xcode_16.app # Swift 6.0 31 | - run: swift build --configuration release 32 | working-directory: ./libs/lume 33 | ``` -------------------------------------------------------------------------------- /libs/xfce/src/supervisor/supervisord.conf: -------------------------------------------------------------------------------- ``` 1 | [supervisord] 2 | nodaemon=true 3 | user=root 4 | logfile=/var/log/supervisor/supervisord.log 5 | pidfile=/var/run/supervisord.pid 6 | childlogdir=/var/log/supervisor 7 | 8 | [program:vncserver] 9 | command=/usr/local/bin/start-vnc.sh 10 | user=cua 11 | autorestart=true 12 | stdout_logfile=/var/log/supervisor/vncserver.log 13 | stderr_logfile=/var/log/supervisor/vncserver.error.log 14 | priority=10 15 | 16 | [program:novnc] 17 | command=/usr/local/bin/start-novnc.sh 18 | user=cua 19 | autorestart=true 20 | stdout_logfile=/var/log/supervisor/novnc.log 21 | stderr_logfile=/var/log/supervisor/novnc.error.log 22 | priority=20 23 | 24 | [program:computer-server] 25 | command=/usr/local/bin/start-computer-server.sh 26 | user=cua 27 | autorestart=true 28 | stdout_logfile=/var/log/supervisor/computer-server.log 29 | stderr_logfile=/var/log/supervisor/computer-server.error.log 30 | priority=30 31 | ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/human_tool/__init__.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Human-in-the-Loop Completion Tool 3 | 4 | This package provides a human-in-the-loop completion system that allows 5 | AI agents to request human assistance for complex decisions or responses. 6 | 7 | Components: 8 | - server.py: FastAPI server with completion queue management 9 | - ui.py: Gradio UI for human interaction 10 | - __main__.py: Combined server and UI application 11 | 12 | Usage: 13 | # Run the server and UI 14 | python -m agent.human_tool 15 | 16 | # Or run components separately 17 | python -m agent.human_tool.server # API server only 18 | python -m agent.human_tool.ui # UI only 19 | """ 20 | 21 | from .server import CompletionQueue, completion_queue 22 | from .ui import HumanCompletionUI, create_ui 23 | 24 | __all__ = [ 25 | "CompletionQueue", 26 | "completion_queue", 27 | "HumanCompletionUI", 28 | "create_ui" 29 | ] 30 | ``` -------------------------------------------------------------------------------- /docs/tsconfig.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "compilerOptions": { 3 | "baseUrl": ".", 4 | "target": "ESNext", 5 | "lib": [ 6 | "dom", 7 | "dom.iterable", 8 | "esnext" 9 | ], 10 | "allowJs": true, 11 | "skipLibCheck": true, 12 | "strict": true, 13 | "forceConsistentCasingInFileNames": true, 14 | "noEmit": true, 15 | "esModuleInterop": true, 16 | "module": "esnext", 17 | "moduleResolution": "bundler", 18 | "resolveJsonModule": true, 19 | "isolatedModules": true, 20 | "jsx": "preserve", 21 | "incremental": true, 22 | "paths": { 23 | "@/.source": [ 24 | "./.source/index.ts" 25 | ], 26 | "@/*": [ 27 | "./src/*" 28 | ] 29 | }, 30 | "plugins": [ 31 | { 32 | "name": "next" 33 | } 34 | ] 35 | }, 36 | "include": [ 37 | "next-env.d.ts", 38 | "**/*.ts", 39 | "**/*.tsx", 40 | ".next/types/**/*.ts" 41 | ], 42 | "exclude": [ 43 | "node_modules" 44 | ] 45 | } ``` -------------------------------------------------------------------------------- /libs/lume/src/VM/VMDisplayResolution.swift: -------------------------------------------------------------------------------- ```swift 1 | import Foundation 2 | import ArgumentParser 3 | 4 | struct VMDisplayResolution: Codable, ExpressibleByArgument { 5 | let width: Int 6 | let height: Int 7 | 8 | init?(string: String) { 9 | let components = string.components(separatedBy: "x") 10 | guard components.count == 2, 11 | let width = Int(components[0]), 12 | let height = Int(components[1]), 13 | width > 0, height > 0 else { 14 | return nil 15 | } 16 | self.width = width 17 | self.height = height 18 | } 19 | 20 | var string: String { 21 | "\(width)x\(height)" 22 | } 23 | 24 | init?(argument: String) { 25 | guard let resolution = VMDisplayResolution(string: argument) else { return nil } 26 | self = resolution 27 | } 28 | } ``` -------------------------------------------------------------------------------- /libs/typescript/package.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "name": "cua-ts", 3 | "version": "1.0.0", 4 | "description": "The cua typescript libs.", 5 | "keywords": [], 6 | "author": "cua", 7 | "license": "MIT", 8 | "scripts": { 9 | "lint": "biome check", 10 | "lint:fix": "biome check --fix", 11 | "build:core": "pnpm --filter @trycua/core build", 12 | "build:computer": "pnpm --filter @trycua/computer build", 13 | "build": "pnpm build:core && pnpm build:computer", 14 | "test:core": "pnpm --filter @trycua/core test", 15 | "test:computer": "pnpm --filter @trycua/computer test", 16 | "test": "pnpm -r test", 17 | "typecheck": "pnpm -r typecheck" 18 | }, 19 | "packageManager": "[email protected]", 20 | "devDependencies": { 21 | "@biomejs/biome": "^1.9.4" 22 | }, 23 | "pnpm": { 24 | "onlyBuiltDependencies": [ 25 | "@biomejs/biome", 26 | "esbuild", 27 | "protobufjs", 28 | "sharp", 29 | "unrs-resolver" 30 | ] 31 | } 32 | } ``` -------------------------------------------------------------------------------- /libs/lume/src/Commands/Get.swift: -------------------------------------------------------------------------------- ```swift 1 | import ArgumentParser 2 | import Foundation 3 | 4 | struct Get: AsyncParsableCommand { 5 | static let configuration = CommandConfiguration( 6 | abstract: "Get detailed information about a virtual machine" 7 | ) 8 | 9 | @Argument(help: "Name of the virtual machine", completion: .custom(completeVMName)) 10 | var name: String 11 | 12 | @Option(name: [.long, .customShort("f")], help: "Output format (json|text)") 13 | var format: FormatOption = .text 14 | 15 | @Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location") 16 | var storage: String? 17 | 18 | init() { 19 | } 20 | 21 | @MainActor 22 | func run() async throws { 23 | let vmController = LumeController() 24 | let vm = try vmController.get(name: name, storage: storage) 25 | try VMDetailsPrinter.printStatus([vm.details], format: self.format) 26 | } 27 | } 28 | ``` -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- ```yaml 1 | # These are supported funding model platforms 2 | 3 | github: trycua 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 12 | polar: # Replace with a single Polar username 13 | buy_me_a_coffee: # Replace with a single Buy Me a Coffee username 14 | thanks_dev: # Replace with a single thanks.dev username 15 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 16 | ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/mcp-server/llm-integrations.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: LLM Integrations 3 | --- 4 | ## LiteLLM Integration 5 | 6 | This MCP server features comprehensive liteLLM integration, allowing you to use any supported LLM provider with a simple model string configuration. 7 | 8 | - **Unified Configuration**: Use a single `CUA_MODEL_NAME` environment variable with a model string 9 | - **Automatic Provider Detection**: The agent automatically detects the provider and capabilities from the model string 10 | - **Extensive Provider Support**: Works with Anthropic, OpenAI, local models, and any liteLLM-compatible provider 11 | 12 | ### Model String Examples: 13 | - **Anthropic**: `"anthropic/claude-3-5-sonnet-20241022"` 14 | - **OpenAI**: `"openai/computer-use-preview"` 15 | - **UI-TARS**: `"huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B"` 16 | - **Omni + Any LiteLLM**: `"omniparser+litellm/gpt-4o"`, `"omniparser+litellm/claude-3-haiku"`, `"omniparser+ollama_chat/gemma3"` ``` -------------------------------------------------------------------------------- /examples/computer_ui_examples.py: -------------------------------------------------------------------------------- ```python 1 | #!/usr/bin/env python3 2 | """ 3 | Simple example script for the Computer Interface Gradio UI. 4 | 5 | This script launches the advanced Gradio UI for the Computer Interface 6 | with full model selection and configuration options. 7 | It can be run directly from the command line. 8 | """ 9 | 10 | 11 | from utils import load_dotenv_files 12 | 13 | load_dotenv_files() 14 | 15 | # Import the create_gradio_ui function 16 | from computer.ui.gradio.app import create_gradio_ui 17 | 18 | if __name__ == "__main__": 19 | print("Launching Computer Interface Gradio UI with advanced features...") 20 | app = create_gradio_ui() 21 | app.launch( 22 | share=False, 23 | server_name="0.0.0.0", 24 | server_port=7860, 25 | ) 26 | 27 | # Optional: Using the saved dataset 28 | # import datasets 29 | # from computer.ui.utils import convert_to_unsloth 30 | # ds = datasets.load_dataset("ddupont/highquality-cua-demonstrations") 31 | # ds = convert_to_unsloth(ds) ``` -------------------------------------------------------------------------------- /libs/lume/src/Utils/Logger.swift: -------------------------------------------------------------------------------- ```swift 1 | import Foundation 2 | 3 | struct Logger { 4 | typealias Metadata = [String: String] 5 | 6 | enum Level: String { 7 | case info 8 | case error 9 | case debug 10 | } 11 | 12 | static func info(_ message: String, metadata: Metadata = [:]) { 13 | log(.info, message, metadata) 14 | } 15 | 16 | static func error(_ message: String, metadata: Metadata = [:]) { 17 | log(.error, message, metadata) 18 | } 19 | 20 | static func debug(_ message: String, metadata: Metadata = [:]) { 21 | log(.debug, message, metadata) 22 | } 23 | 24 | private static func log(_ level: Level, _ message: String, _ metadata: Metadata) { 25 | let timestamp = ISO8601DateFormatter().string(from: Date()) 26 | let metadataString = metadata.isEmpty ? "" : " " + metadata.map { "\($0.key)=\($0.value)" }.joined(separator: " ") 27 | print("[\(timestamp)] \(level.rawValue.uppercased()): \(message)\(metadataString)") 28 | } 29 | } ``` -------------------------------------------------------------------------------- /libs/typescript/computer/tests/interface/linux.test.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { describe, expect, it } from 'vitest'; 2 | import { LinuxComputerInterface } from '../../src/interface/linux.ts'; 3 | import { MacOSComputerInterface } from '../../src/interface/macos.ts'; 4 | 5 | describe('LinuxComputerInterface', () => { 6 | const testParams = { 7 | ipAddress: 'test.cua.com', // TEST-NET-1 address (RFC 5737) - guaranteed not to be routable 8 | username: 'testuser', 9 | password: 'testpass', 10 | apiKey: 'test-api-key', 11 | vmName: 'test-vm', 12 | }; 13 | 14 | describe('Inheritance', () => { 15 | it('should extend MacOSComputerInterface', () => { 16 | const linuxInterface = new LinuxComputerInterface( 17 | testParams.ipAddress, 18 | testParams.username, 19 | testParams.password, 20 | testParams.apiKey, 21 | testParams.vmName 22 | ); 23 | 24 | expect(linuxInterface).toBeInstanceOf(MacOSComputerInterface); 25 | expect(linuxInterface).toBeInstanceOf(LinuxComputerInterface); 26 | }); 27 | }); 28 | }); 29 | ``` -------------------------------------------------------------------------------- /libs/lume/src/Commands/Clone.swift: -------------------------------------------------------------------------------- ```swift 1 | import ArgumentParser 2 | import Foundation 3 | 4 | struct Clone: AsyncParsableCommand { 5 | static let configuration = CommandConfiguration( 6 | abstract: "Clone an existing virtual machine" 7 | ) 8 | 9 | @Argument(help: "Name of the source virtual machine", completion: .custom(completeVMName)) 10 | var name: String 11 | 12 | @Argument(help: "Name for the cloned virtual machine") 13 | var newName: String 14 | 15 | @Option(name: .customLong("source-storage"), help: "Source VM storage location") 16 | var sourceStorage: String? 17 | 18 | @Option(name: .customLong("dest-storage"), help: "Destination VM storage location") 19 | var destStorage: String? 20 | 21 | init() {} 22 | 23 | @MainActor 24 | func run() async throws { 25 | let vmController = LumeController() 26 | try vmController.clone( 27 | name: name, 28 | newName: newName, 29 | sourceLocation: sourceStorage, 30 | destLocation: destStorage 31 | ) 32 | } 33 | } 34 | ``` -------------------------------------------------------------------------------- /docs/next.config.mjs: -------------------------------------------------------------------------------- ``` 1 | import { createMDX } from 'fumadocs-mdx/next'; 2 | 3 | const withMDX = createMDX(); 4 | 5 | /** @type {import('next').NextConfig} */ 6 | const config = { 7 | reactStrictMode: true, 8 | trailingSlash: false, 9 | basePath: '/docs', 10 | assetPrefix: '/docs', 11 | async rewrites() { 12 | return [ 13 | { 14 | source: '/:path*.mdx', 15 | destination: '/llms.mdx/:path*', 16 | }, 17 | ]; 18 | }, 19 | async redirects() { 20 | return [ 21 | { 22 | source: '/', 23 | destination: '/docs', 24 | basePath: false, // Important: this bypasses the basePath 25 | permanent: false, 26 | }, 27 | ]; 28 | }, 29 | images: { 30 | dangerouslyAllowSVG: true, 31 | remotePatterns: [ 32 | { 33 | protocol: 'https', 34 | hostname: 'img.shields.io', 35 | }, 36 | { 37 | protocol: 'https', 38 | hostname: 'starchart.cc', 39 | }, 40 | { 41 | protocol: 'https', 42 | hostname: 'github.com', 43 | }, 44 | ], 45 | }, 46 | }; 47 | 48 | export default withMDX(config); 49 | ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/mcp-server/client-integrations.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Client Integrations 3 | --- 4 | 5 | ## Claude Desktop Integration 6 | 7 | To use with Claude Desktop, add an entry to your Claude Desktop configuration (`claude_desktop_config.json`, typically found in `~/.config/claude-desktop/`): 8 | 9 | For more information on MCP with Claude Desktop, see the [official MCP User Guide](https://modelcontextprotocol.io/quickstart/user). 10 | 11 | ## Cursor Integration 12 | 13 | To use with Cursor, add an MCP configuration file in one of these locations: 14 | 15 | - **Project-specific**: Create `.cursor/mcp.json` in your project directory 16 | - **Global**: Create `~/.cursor/mcp.json` in your home directory 17 | 18 | After configuration, you can simply tell Cursor's Agent to perform computer tasks by explicitly mentioning the CUA agent, such as "Use the computer control tools to open Safari." 19 | 20 | For more information on MCP with Cursor, see the [official Cursor MCP documentation](https://docs.cursor.com/context/model-context-protocol). ``` -------------------------------------------------------------------------------- /libs/typescript/computer/tests/interface/windows.test.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { describe, expect, it } from 'vitest'; 2 | import { MacOSComputerInterface } from '../../src/interface/macos.ts'; 3 | import { WindowsComputerInterface } from '../../src/interface/windows.ts'; 4 | 5 | describe('WindowsComputerInterface', () => { 6 | const testParams = { 7 | ipAddress: '192.0.2.1', // TEST-NET-1 address (RFC 5737) - guaranteed not to be routable 8 | username: 'testuser', 9 | password: 'testpass', 10 | apiKey: 'test-api-key', 11 | vmName: 'test-vm', 12 | }; 13 | 14 | describe('Inheritance', () => { 15 | it('should extend MacOSComputerInterface', () => { 16 | const windowsInterface = new WindowsComputerInterface( 17 | testParams.ipAddress, 18 | testParams.username, 19 | testParams.password, 20 | testParams.apiKey, 21 | testParams.vmName 22 | ); 23 | 24 | expect(windowsInterface).toBeInstanceOf(MacOSComputerInterface); 25 | expect(windowsInterface).toBeInstanceOf(WindowsComputerInterface); 26 | }); 27 | }); 28 | }); 29 | ``` -------------------------------------------------------------------------------- /libs/lume/src/VM/VMFactory.swift: -------------------------------------------------------------------------------- ```swift 1 | import Foundation 2 | import Virtualization 3 | 4 | enum VMType: String { 5 | case darwin = "macOS" 6 | case linux = "linux" 7 | } 8 | 9 | protocol VMFactory { 10 | @MainActor 11 | func createVM( 12 | vmDirContext: VMDirContext, 13 | imageLoader: ImageLoader? 14 | ) throws -> VM 15 | } 16 | 17 | class DefaultVMFactory: VMFactory { 18 | @MainActor 19 | func createVM( 20 | vmDirContext: VMDirContext, 21 | imageLoader: ImageLoader? 22 | ) throws -> VM { 23 | let osType = vmDirContext.config.os.lowercased() 24 | 25 | switch osType { 26 | case "macos", "darwin": 27 | guard let imageLoader = imageLoader else { 28 | throw VMError.internalError("ImageLoader required for macOS VM") 29 | } 30 | return DarwinVM(vmDirContext: vmDirContext, imageLoader: imageLoader) 31 | case "linux": 32 | return LinuxVM(vmDirContext: vmDirContext) 33 | default: 34 | throw VMError.unsupportedOS(osType) 35 | } 36 | } 37 | } ``` -------------------------------------------------------------------------------- /libs/typescript/core/tests/telemetry.test.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { beforeEach, describe, expect, it } from 'vitest'; 2 | import { Telemetry } from '../src/'; 3 | 4 | describe('Telemetry', () => { 5 | let telemetry: Telemetry; 6 | beforeEach(() => { 7 | process.env.CUA_TELEMETRY = ''; 8 | process.env.CUA_TELEMETRY_DISABLED = ''; 9 | telemetry = new Telemetry(); 10 | }); 11 | describe('telemetry.enabled', () => { 12 | it('should return false when CUA_TELEMETRY is off', () => { 13 | process.env.CUA_TELEMETRY = 'off'; 14 | telemetry = new Telemetry(); 15 | expect(telemetry.enabled).toBe(false); 16 | }); 17 | 18 | it('should return true when CUA_TELEMETRY is not set', () => { 19 | process.env.CUA_TELEMETRY = ''; 20 | telemetry = new Telemetry(); 21 | expect(telemetry.enabled).toBe(true); 22 | }); 23 | 24 | it('should return false if CUA_TELEMETRY_DISABLED is 1', () => { 25 | process.env.CUA_TELEMETRY_DISABLED = '1'; 26 | telemetry = new Telemetry(); 27 | expect(telemetry.enabled).toBe(false); 28 | }); 29 | }); 30 | }); 31 | ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/computer/index.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Computer 3 | description: Reference for the current version of the Computer library. 4 | pypi: cua-computer 5 | npm: '@trycua/computer' 6 | github: 7 | - https://github.com/trycua/cua/tree/main/libs/python/computer 8 | - https://github.com/trycua/cua/tree/main/libs/typescript/computer 9 | --- 10 | 11 | The Computer library provides a Computer class for controlling and automating containers running the Computer Server. 12 | 13 | ## Connecting to Computers 14 | 15 | See the [Cua Computers](../computer-sdk/computers) documentation for how to connect to different computer types (cloud, local, or host desktop). 16 | 17 | ## Computer Commands 18 | 19 | See the [Commands](../computer-sdk/commands) documentation for all supported commands and interface methods (Shell, Mouse, Keyboard, File System, etc.). 20 | 21 | ## Sandboxed Python Functions 22 | 23 | See the [Sandboxed Python](../computer-sdk/sandboxed-python) documentation for running Python functions securely in isolated environments on a remote Cua Computer. ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/benchmarks/index.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Benchmarks 3 | description: Computer Agent SDK benchmarks for agentic GUI tasks 4 | --- 5 | 6 | The benchmark system evaluates models on GUI grounding tasks, specifically agent loop success rate and click prediction accuracy. It supports both: 7 | - **Computer Agent SDK providers** (using model strings like `"huggingface-local/HelloKKMe/GTA1-7B"`) 8 | - **Reference agent implementations** (custom model classes implementing the `ModelProtocol`) 9 | 10 | ## Available Benchmarks 11 | 12 | - **[ScreenSpot-v2](./benchmarks/screenspot-v2)** - Standard resolution GUI grounding 13 | - **[ScreenSpot-Pro](./benchmarks/screenspot-pro)** - High-resolution GUI grounding 14 | - **[Interactive Testing](./benchmarks/interactive)** - Real-time testing and visualization 15 | 16 | ## Quick Start 17 | 18 | ```bash 19 | # Clone the benchmark repository 20 | git clone https://github.com/trycua/cua 21 | cd libs/python/agent/benchmarks 22 | 23 | # Install dependencies 24 | pip install "cua-agent[all]" 25 | 26 | # Run a benchmark 27 | python ss-v2.py 28 | ``` 29 | ``` -------------------------------------------------------------------------------- /libs/lume/src/Server/Responses.swift: -------------------------------------------------------------------------------- ```swift 1 | import Foundation 2 | 3 | struct APIError: Codable { 4 | let message: String 5 | } 6 | 7 | // Helper struct to encode mixed-type dictionaries 8 | struct AnyEncodable: Encodable { 9 | private let value: Encodable 10 | 11 | init(_ value: Encodable) { 12 | self.value = value 13 | } 14 | 15 | func encode(to encoder: Encoder) throws { 16 | try value.encode(to: encoder) 17 | } 18 | } 19 | 20 | extension HTTPResponse { 21 | static func json<T: Encodable>(_ value: T) throws -> HTTPResponse { 22 | let data = try JSONEncoder().encode(value) 23 | return HTTPResponse( 24 | statusCode: .ok, 25 | headers: ["Content-Type": "application/json"], 26 | body: data 27 | ) 28 | } 29 | 30 | static func badRequest(message: String) -> HTTPResponse { 31 | let error = APIError(message: message) 32 | return try! HTTPResponse( 33 | statusCode: .badRequest, 34 | headers: ["Content-Type": "application/json"], 35 | body: JSONEncoder().encode(error) 36 | ) 37 | } 38 | } ``` -------------------------------------------------------------------------------- /libs/python/agent/benchmarks/models/base.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Base protocol for benchmark models. 3 | """ 4 | 5 | from typing import Protocol, Optional, Tuple 6 | from PIL import Image 7 | 8 | 9 | class ModelProtocol(Protocol): 10 | """Protocol for benchmark models that can predict click coordinates.""" 11 | 12 | @property 13 | def model_name(self) -> str: 14 | """Return the name of the model.""" 15 | ... 16 | 17 | async def load_model(self) -> None: 18 | """Load the model into memory.""" 19 | ... 20 | 21 | async def unload_model(self) -> None: 22 | """Unload the model from memory.""" 23 | ... 24 | 25 | async def predict_click(self, image: Image.Image, instruction: str) -> Optional[Tuple[int, int]]: 26 | """ 27 | Predict click coordinates for the given image and instruction. 28 | 29 | Args: 30 | image: PIL Image to analyze 31 | instruction: Text instruction describing what to click 32 | 33 | Returns: 34 | Tuple of (x, y) coordinates or None if prediction fails 35 | """ 36 | ... 37 | ``` -------------------------------------------------------------------------------- /libs/lume/src/Commands/List.swift: -------------------------------------------------------------------------------- ```swift 1 | import ArgumentParser 2 | import Foundation 3 | 4 | struct List: AsyncParsableCommand { 5 | static let configuration: CommandConfiguration = CommandConfiguration( 6 | commandName: "ls", 7 | abstract: "List virtual machines" 8 | ) 9 | 10 | @Option(name: [.long, .customShort("f")], help: "Output format (json|text)") 11 | var format: FormatOption = .text 12 | 13 | @Option(name: .long, help: "Filter by storage location name") 14 | var storage: String? 15 | 16 | init() { 17 | } 18 | 19 | @MainActor 20 | func run() async throws { 21 | let manager = LumeController() 22 | let vms = try manager.list(storage: self.storage) 23 | if vms.isEmpty && self.format == .text { 24 | if let storageName = self.storage { 25 | print("No virtual machines found in storage '\(storageName)'") 26 | } else { 27 | print("No virtual machines found") 28 | } 29 | } else { 30 | try VMDetailsPrinter.printStatus(vms, format: self.format) 31 | } 32 | } 33 | } 34 | ``` -------------------------------------------------------------------------------- /libs/lume/tests/Mocks/MockVNCService.swift: -------------------------------------------------------------------------------- ```swift 1 | import Foundation 2 | @testable import lume 3 | 4 | @MainActor 5 | final class MockVNCService: VNCService { 6 | private(set) var url: String? 7 | private(set) var isRunning = false 8 | private(set) var clientOpenCount = 0 9 | private var _attachedVM: Any? 10 | private let vmDirectory: VMDirectory 11 | 12 | init(vmDirectory: VMDirectory) { 13 | self.vmDirectory = vmDirectory 14 | } 15 | 16 | nonisolated var attachedVM: String? { 17 | get async { 18 | await Task { @MainActor in 19 | _attachedVM as? String 20 | }.value 21 | } 22 | } 23 | 24 | func start(port: Int, virtualMachine: Any?) async throws { 25 | isRunning = true 26 | url = "vnc://localhost:\(port)" 27 | _attachedVM = virtualMachine 28 | } 29 | 30 | func stop() { 31 | isRunning = false 32 | url = nil 33 | _attachedVM = nil 34 | } 35 | 36 | func openClient(url: String) async throws { 37 | guard isRunning else { 38 | throw VMError.vncNotConfigured 39 | } 40 | clientOpenCount += 1 41 | } 42 | } ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/lumier/docker-compose.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Docker Compose 3 | --- 4 | 5 | You can also use Docker Compose to run Lumier with a simple configuration file. Create a `docker-compose.yml` file with the following content: 6 | 7 | ```yaml 8 | version: '3' 9 | 10 | services: 11 | lumier: 12 | image: trycua/lumier:latest 13 | container_name: lumier-vm 14 | restart: unless-stopped 15 | ports: 16 | - "8006:8006" # Port for VNC access 17 | volumes: 18 | - ./storage:/storage # VM persistent storage 19 | - ./shared:/shared # Shared folder accessible in the VM 20 | environment: 21 | - VM_NAME=lumier-vm 22 | - VERSION=ghcr.io/trycua/macos-sequoia-cua:latest 23 | - CPU_CORES=4 24 | - RAM_SIZE=8192 25 | - HOST_STORAGE_PATH=${PWD}/storage 26 | - HOST_SHARED_PATH=${PWD}/shared 27 | stop_signal: SIGINT 28 | stop_grace_period: 2m 29 | ``` 30 | 31 | Then run Lumier using: 32 | 33 | ```bash 34 | # First create the required directories 35 | mkdir -p storage shared 36 | 37 | # Start the container 38 | docker-compose up -d 39 | 40 | # View the logs 41 | docker-compose logs -f 42 | 43 | # Stop the container when done 44 | docker-compose down 45 | ``` 46 | ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/supported-model-providers/local-models.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Running Models Locally 3 | --- 4 | 5 | You can run open-source LLMs and vision models on your own machine using cua, without relying on cloud APIs. This is ideal for development, privacy, or running on air-gapped systems. 6 | 7 | ## Hugging Face (transformers) 8 | 9 | Use the `huggingface-local/` prefix to run any Hugging Face model locally via the `transformers` library. This supports most text and vision models from the Hugging Face Hub. 10 | 11 | **Example:** 12 | 13 | ```python 14 | model = "huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B" 15 | ``` 16 | 17 | ## MLX (Apple Silicon) 18 | 19 | Use the `mlx/` prefix to run models using the `mlx-vlm` library, optimized for Apple Silicon (M1/M2/M3). This allows fast, local inference for many open-source models. 20 | 21 | **Example:** 22 | 23 | ```python 24 | model = "mlx/mlx-community/UI-TARS-1.5-7B-6bit" 25 | ``` 26 | 27 | ## Ollama 28 | 29 | Use the `ollama_chat/` prefix to run models using the `ollama` library. This allows fast, local inference for many open-source models. 30 | 31 | **Example:** 32 | 33 | ```python 34 | model = "omniparser+ollama_chat/llama3.2:latest" 35 | ``` 36 | ``` -------------------------------------------------------------------------------- /docs/package.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "name": "docs", 3 | "version": "0.0.0", 4 | "private": true, 5 | "scripts": { 6 | "build": "next build", 7 | "dev": "next dev --turbo -p 8090", 8 | "start": "next start -p 8090", 9 | "postinstall": "fumadocs-mdx" 10 | }, 11 | "dependencies": { 12 | "fumadocs-core": "15.5.1", 13 | "fumadocs-mdx": "11.6.7", 14 | "fumadocs-ui": "15.5.1", 15 | "lucide-react": "^0.525.0", 16 | "mermaid": "^11.8.1", 17 | "next": "15.3.3", 18 | "next-themes": "^0.4.6", 19 | "react": "^19.1.0", 20 | "react-dom": "^19.1.0", 21 | "remark": "^15.0.1", 22 | "remark-gfm": "^4.0.1", 23 | "remark-mdx": "^3.1.0", 24 | "tailwind-merge": "^3.3.1", 25 | "zod": "^3.25.76" 26 | }, 27 | "devDependencies": { 28 | "@tailwindcss/postcss": "^4.1.8", 29 | "@types/mdx": "^2.0.13", 30 | "@types/node": "22.15.28", 31 | "@types/react": "^19.1.6", 32 | "@types/react-dom": "^19.1.5", 33 | "postcss": "^8.5.4", 34 | "prettier": "^3.6.2", 35 | "tailwindcss": "^4.1.8", 36 | "typescript": "^5.8.3" 37 | }, 38 | "pnpm": { 39 | "onlyBuiltDependencies": [ 40 | "@tailwindcss/oxide", 41 | "esbuild", 42 | "sharp" 43 | ] 44 | } 45 | } ``` -------------------------------------------------------------------------------- /libs/lume/src/Main.swift: -------------------------------------------------------------------------------- ```swift 1 | import ArgumentParser 2 | import Foundation 3 | 4 | @main 5 | struct Lume: AsyncParsableCommand { 6 | static var configuration: CommandConfiguration { 7 | CommandConfiguration( 8 | commandName: "lume", 9 | abstract: "A lightweight CLI and local API server to build, run and manage macOS VMs.", 10 | version: Version.current, 11 | subcommands: CommandRegistry.allCommands, 12 | helpNames: .long 13 | ) 14 | } 15 | } 16 | 17 | // MARK: - Version Management 18 | extension Lume { 19 | enum Version { 20 | static let current: String = "0.1.0" 21 | } 22 | } 23 | 24 | // MARK: - Command Execution 25 | extension Lume { 26 | public static func main() async { 27 | do { 28 | try await executeCommand() 29 | } catch { 30 | exit(withError: error) 31 | } 32 | } 33 | 34 | private static func executeCommand() async throws { 35 | var command = try parseAsRoot() 36 | 37 | if var asyncCommand = command as? AsyncParsableCommand { 38 | try await asyncCommand.run() 39 | } else { 40 | try command.run() 41 | } 42 | } 43 | } ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/human_tool/__main__.py: -------------------------------------------------------------------------------- ```python 1 | #!/usr/bin/env python3 2 | """ 3 | Human-in-the-Loop Completion Server and UI 4 | 5 | This module combines the FastAPI server for handling completion requests 6 | with a Gradio UI for human interaction. 7 | """ 8 | 9 | import gradio as gr 10 | from fastapi import FastAPI 11 | from .server import app as fastapi_app 12 | from .ui import create_ui 13 | 14 | # Create the Gradio demo 15 | gradio_demo = create_ui() 16 | 17 | # Mount Gradio on FastAPI 18 | CUSTOM_PATH = "/gradio" 19 | app = gr.mount_gradio_app(fastapi_app, gradio_demo, path=CUSTOM_PATH) 20 | 21 | # Add a redirect from root to Gradio UI 22 | @fastapi_app.get("/") 23 | async def redirect_to_ui(): 24 | """Redirect root to Gradio UI.""" 25 | return { 26 | "message": "Human Completion Server is running", 27 | "ui_url": "/gradio", 28 | "api_docs": "/docs" 29 | } 30 | 31 | if __name__ == "__main__": 32 | import uvicorn 33 | print("🚀 Starting Human-in-the-Loop Completion Server...") 34 | print("📊 API Server: http://localhost:8002") 35 | print("🎨 Gradio UI: http://localhost:8002/gradio") 36 | print("📚 API Docs: http://localhost:8002/docs") 37 | 38 | uvicorn.run(app, host="0.0.0.0", port=8002) 39 | ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/types.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Type definitions for agent 3 | """ 4 | 5 | from typing import Dict, List, Any, Optional, Callable, Protocol, Literal 6 | from pydantic import BaseModel 7 | import re 8 | from litellm import ResponseInputParam, ResponsesAPIResponse, ToolParam 9 | from collections.abc import Iterable 10 | 11 | # Agent input types 12 | Messages = str | ResponseInputParam | List[Dict[str, Any]] 13 | Tools = Optional[Iterable[ToolParam]] 14 | 15 | # Agent output types 16 | AgentResponse = ResponsesAPIResponse 17 | AgentCapability = Literal["step", "click"] 18 | 19 | # Exception types 20 | class ToolError(RuntimeError): 21 | """Base exception for tool-related errors""" 22 | pass 23 | 24 | class IllegalArgumentError(ToolError): 25 | """Exception raised when function arguments are invalid""" 26 | pass 27 | 28 | 29 | # Agent config registration 30 | class AgentConfigInfo(BaseModel): 31 | """Information about a registered agent config""" 32 | agent_class: type 33 | models_regex: str 34 | priority: int = 0 35 | 36 | def matches_model(self, model: str) -> bool: 37 | """Check if this agent config matches the given model""" 38 | return bool(re.match(self.models_regex, model)) 39 | ``` -------------------------------------------------------------------------------- /libs/python/pylume/pylume/exceptions.py: -------------------------------------------------------------------------------- ```python 1 | from typing import Optional 2 | 3 | class LumeError(Exception): 4 | """Base exception for all PyLume errors.""" 5 | pass 6 | 7 | class LumeServerError(LumeError): 8 | """Raised when there's an error with the PyLume server.""" 9 | def __init__(self, message: str, status_code: Optional[int] = None, response_text: Optional[str] = None): 10 | self.status_code = status_code 11 | self.response_text = response_text 12 | super().__init__(message) 13 | 14 | class LumeConnectionError(LumeError): 15 | """Raised when there's an error connecting to the PyLume server.""" 16 | pass 17 | 18 | class LumeTimeoutError(LumeError): 19 | """Raised when a request to the PyLume server times out.""" 20 | pass 21 | 22 | class LumeNotFoundError(LumeError): 23 | """Raised when a requested resource is not found.""" 24 | pass 25 | 26 | class LumeConfigError(LumeError): 27 | """Raised when there's an error with the configuration.""" 28 | pass 29 | 30 | class LumeVMError(LumeError): 31 | """Raised when there's an error with a VM operation.""" 32 | pass 33 | 34 | class LumeImageError(LumeError): 35 | """Raised when there's an error with an image operation.""" 36 | pass ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/custom-tools.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Custom Tools 3 | slug: custom-tools 4 | --- 5 | 6 | The Agent SDK supports defining custom Python tools that run securely in sandboxed environments on remote Cua Computers. This enables safe execution of user-defined functions, isolation of dependencies, and robust automation workflows. 7 | 8 | ## Custom Tools 9 | 10 | Define a custom tool for an agent: 11 | 12 | ```python 13 | def calculate(a: int, b: int) -> int: 14 | """Calculate the sum of two integers""" 15 | return a + b 16 | 17 | # Use with agent 18 | agent = ComputerAgent( 19 | model="anthropic/claude-3-5-sonnet-20241022", 20 | tools=[computer, calculate] 21 | ) 22 | ``` 23 | 24 | ## Sandboxed Tools 25 | 26 | Define a sandboxed tool: 27 | 28 | ```python 29 | from computer.helpers import sandboxed 30 | 31 | @sandboxed() 32 | def read_file(location: str) -> str: 33 | """Read contents of a file""" 34 | with open(location, 'r') as f: 35 | return f.read() 36 | ``` 37 | 38 | You can then register this as a tool for your agent: 39 | 40 | ```python 41 | from agent import ComputerAgent 42 | from computer import Computer 43 | 44 | computer = Computer(...) 45 | agent = ComputerAgent( 46 | model="anthropic/claude-3-5-sonnet-20241022", 47 | tools=[computer, read_file], 48 | ) 49 | ``` 50 | ``` -------------------------------------------------------------------------------- /libs/python/core/pyproject.toml: -------------------------------------------------------------------------------- ```toml 1 | [build-system] 2 | requires = ["pdm-backend"] 3 | build-backend = "pdm.backend" 4 | 5 | [project] 6 | name = "cua-core" 7 | version = "0.1.8" 8 | description = "Core functionality for Cua including telemetry and shared utilities" 9 | readme = "README.md" 10 | authors = [ 11 | { name = "TryCua", email = "[email protected]" } 12 | ] 13 | dependencies = [ 14 | "pydantic>=2.0.0", 15 | "httpx>=0.24.0", 16 | "posthog>=3.20.0" 17 | ] 18 | requires-python = ">=3.11" 19 | 20 | [tool.pdm] 21 | distribution = true 22 | 23 | [tool.pdm.build] 24 | includes = ["core/"] 25 | source-includes = ["tests/", "README.md", "LICENSE"] 26 | 27 | [tool.black] 28 | line-length = 100 29 | target-version = ["py311"] 30 | 31 | [tool.ruff] 32 | line-length = 100 33 | target-version = "py311" 34 | select = ["E", "F", "B", "I"] 35 | fix = true 36 | 37 | [tool.ruff.format] 38 | docstring-code-format = true 39 | 40 | [tool.mypy] 41 | strict = true 42 | python_version = "3.11" 43 | ignore_missing_imports = true 44 | disallow_untyped_defs = true 45 | check_untyped_defs = true 46 | warn_return_any = true 47 | show_error_codes = true 48 | warn_unused_ignores = false 49 | 50 | [tool.pytest.ini_options] 51 | asyncio_mode = "auto" 52 | testpaths = ["tests"] 53 | python_files = "test_*.py" 54 | [dependency-groups] 55 | dev = [ 56 | "pytest>=8.3.5", 57 | ] 58 | ``` -------------------------------------------------------------------------------- /libs/xfce/src/xfce-config/xfce4-power-manager.xml: -------------------------------------------------------------------------------- ``` 1 | <?xml version="1.0" encoding="UTF-8"?> 2 | <channel name="xfce4-power-manager" version="1.0"> 3 | <property name="xfce4-power-manager" type="empty"> 4 | <property name="dpms-enabled" type="bool" value="false"/> 5 | <property name="dpms-on-ac-sleep" type="uint" value="0"/> 6 | <property name="dpms-on-ac-off" type="uint" value="0"/> 7 | <property name="dpms-on-battery-sleep" type="uint" value="0"/> 8 | <property name="dpms-on-battery-off" type="uint" value="0"/> 9 | <property name="blank-on-ac" type="int" value="0"/> 10 | <property name="blank-on-battery" type="int" value="0"/> 11 | <property name="lock-screen-suspend-hibernate" type="bool" value="false"/> 12 | <property name="logind-handle-lid-switch" type="bool" value="false"/> 13 | <property name="brightness-on-ac" type="uint" value="9"/> 14 | <property name="brightness-on-battery" type="uint" value="9"/> 15 | <property name="inactivity-on-ac" type="uint" value="0"/> 16 | <property name="inactivity-on-battery" type="uint" value="0"/> 17 | <property name="inactivity-sleep-mode-on-battery" type="uint" value="1"/> 18 | </property> 19 | </channel> 20 | ``` -------------------------------------------------------------------------------- /libs/lume/src/Commands/Delete.swift: -------------------------------------------------------------------------------- ```swift 1 | import ArgumentParser 2 | import Foundation 3 | 4 | struct Delete: AsyncParsableCommand { 5 | static let configuration = CommandConfiguration( 6 | abstract: "Delete a virtual machine" 7 | ) 8 | 9 | @Argument(help: "Name of the virtual machine to delete", completion: .custom(completeVMName)) 10 | var name: String 11 | 12 | @Flag(name: .long, help: "Force deletion without confirmation") 13 | var force = false 14 | 15 | @Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location") 16 | var storage: String? 17 | 18 | init() {} 19 | 20 | @MainActor 21 | func run() async throws { 22 | if !force { 23 | print( 24 | "Are you sure you want to delete the virtual machine '\(name)'? [y/N] ", 25 | terminator: "") 26 | guard let response = readLine()?.lowercased(), 27 | response == "y" || response == "yes" 28 | else { 29 | print("Deletion cancelled") 30 | return 31 | } 32 | } 33 | 34 | let vmController = LumeController() 35 | try await vmController.delete(name: name, storage: storage) 36 | } 37 | } 38 | ``` -------------------------------------------------------------------------------- /libs/lume/src/Commands/Pull.swift: -------------------------------------------------------------------------------- ```swift 1 | import ArgumentParser 2 | import Foundation 3 | 4 | struct Pull: AsyncParsableCommand { 5 | static let configuration = CommandConfiguration( 6 | abstract: "Pull a macOS image from GitHub Container Registry" 7 | ) 8 | 9 | @Argument(help: "Image to pull (format: name:tag)") 10 | var image: String 11 | 12 | @Argument( 13 | help: "Name for the VM (defaults to image name without tag)", transform: { Optional($0) }) 14 | var name: String? 15 | 16 | @Option(help: "Github Container Registry to pull from. Defaults to ghcr.io") 17 | var registry: String = "ghcr.io" 18 | 19 | @Option(help: "Organization to pull from. Defaults to trycua") 20 | var organization: String = "trycua" 21 | 22 | @Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location") 23 | var storage: String? 24 | 25 | init() {} 26 | 27 | @MainActor 28 | func run() async throws { 29 | let controller = LumeController() 30 | try await controller.pullImage( 31 | image: image, 32 | name: name, 33 | registry: registry, 34 | organization: organization, 35 | storage: storage 36 | ) 37 | } 38 | } 39 | ``` -------------------------------------------------------------------------------- /libs/lume/src/Utils/Path.swift: -------------------------------------------------------------------------------- ```swift 1 | import ArgumentParser 2 | import Foundation 3 | 4 | struct Path: CustomStringConvertible, ExpressibleByArgument { 5 | let url: URL 6 | 7 | init(_ path: String) { 8 | url = URL(filePath: NSString(string: path).expandingTildeInPath).standardizedFileURL 9 | } 10 | 11 | init(_ url: URL) { 12 | self.url = url 13 | } 14 | 15 | init(argument: String) { 16 | self.init(argument) 17 | } 18 | 19 | func file(_ path: String) -> Path { 20 | return Path(url.appendingPathComponent(path, isDirectory: false)) 21 | } 22 | 23 | func directory(_ path: String) -> Path { 24 | return Path(url.appendingPathComponent(path, isDirectory: true)) 25 | } 26 | 27 | func exists() -> Bool { 28 | return FileManager.default.fileExists(atPath: url.standardizedFileURL.path(percentEncoded: false)) 29 | } 30 | 31 | func writable() -> Bool { 32 | return FileManager.default.isWritableFile(atPath: url.standardizedFileURL.path(percentEncoded: false)) 33 | } 34 | 35 | var name: String { 36 | return url.lastPathComponent 37 | } 38 | 39 | var path: String { 40 | return url.standardizedFileURL.path(percentEncoded: false) 41 | } 42 | 43 | var description: String { 44 | return url.path() 45 | } 46 | } 47 | ``` -------------------------------------------------------------------------------- /libs/typescript/agent/tests/client.test.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { describe, it, expect } from 'vitest'; 2 | import AgentClient from '../src/index.js'; 3 | 4 | describe('AgentClient', () => { 5 | it('should create client with HTTP URL', () => { 6 | const client = new AgentClient('https://localhost:8000'); 7 | expect(client).toBeDefined(); 8 | expect(client.responses).toBeDefined(); 9 | expect(typeof client.responses.create).toBe('function'); 10 | }); 11 | 12 | it('should create client with peer URL', () => { 13 | const client = new AgentClient('peer://test-peer-id'); 14 | expect(client).toBeDefined(); 15 | expect(client.responses).toBeDefined(); 16 | expect(typeof client.responses.create).toBe('function'); 17 | }); 18 | 19 | it('should throw error for invalid URL', () => { 20 | expect(() => { 21 | new AgentClient('invalid://url'); 22 | }).toThrow('Invalid URL format'); 23 | }); 24 | 25 | it('should have health method', async () => { 26 | const client = new AgentClient('https://localhost:8000'); 27 | expect(typeof client.health).toBe('function'); 28 | }); 29 | 30 | it('should have disconnect method', async () => { 31 | const client = new AgentClient('https://localhost:8000'); 32 | expect(typeof client.disconnect).toBe('function'); 33 | }); 34 | }); 35 | ``` -------------------------------------------------------------------------------- /examples/docker_examples.py: -------------------------------------------------------------------------------- ```python 1 | import asyncio 2 | from computer.providers.factory import VMProviderFactory 3 | from computer import Computer, VMProviderType 4 | import os 5 | 6 | async def main(): 7 | # # Create docker provider 8 | # provider = VMProviderFactory.create_provider( 9 | # provider_type="docker", 10 | # image="cua-ubuntu:latest", # Your CUA Ubuntu image 11 | # port=8080, 12 | # vnc_port=6901 13 | # ) 14 | 15 | # # Run a container 16 | # async with provider: 17 | # vm_info = await provider.run_vm( 18 | # image="cua-ubuntu:latest", 19 | # name="my-cua-container", 20 | # run_opts={ 21 | # "memory": "4GB", 22 | # "cpu": 2, 23 | # "vnc_port": 6901, 24 | # "api_port": 8080 25 | # } 26 | # ) 27 | # print(vm_info) 28 | 29 | computer = Computer( 30 | os_type="linux", 31 | provider_type=VMProviderType.DOCKER, 32 | name="my-cua-container", 33 | image="cua-ubuntu:latest", 34 | ) 35 | 36 | await computer.run() 37 | 38 | screenshot = await computer.interface.screenshot() 39 | with open("screenshot_docker.png", "wb") as f: 40 | f.write(screenshot) 41 | 42 | if __name__ == "__main__": 43 | asyncio.run(main()) 44 | ``` -------------------------------------------------------------------------------- /libs/python/computer/computer/__init__.py: -------------------------------------------------------------------------------- ```python 1 | """CUA Computer Interface for cross-platform computer control.""" 2 | 3 | import logging 4 | import sys 5 | 6 | __version__ = "0.1.0" 7 | 8 | # Initialize logging 9 | logger = logging.getLogger("computer") 10 | 11 | # Initialize telemetry when the package is imported 12 | try: 13 | # Import from core telemetry 14 | from core.telemetry import ( 15 | is_telemetry_enabled, 16 | record_event, 17 | ) 18 | 19 | # Check if telemetry is enabled 20 | if is_telemetry_enabled(): 21 | logger.info("Telemetry is enabled") 22 | 23 | # Record package initialization 24 | record_event( 25 | "module_init", 26 | { 27 | "module": "computer", 28 | "version": __version__, 29 | "python_version": sys.version, 30 | }, 31 | ) 32 | else: 33 | logger.info("Telemetry is disabled") 34 | except ImportError as e: 35 | # Telemetry not available 36 | logger.warning(f"Telemetry not available: {e}") 37 | except Exception as e: 38 | # Other issues with telemetry 39 | logger.warning(f"Error initializing telemetry: {e}") 40 | 41 | # Core components 42 | from .computer import Computer 43 | 44 | # Provider components 45 | from .providers.base import VMProviderType 46 | 47 | __all__ = ["Computer", "VMProviderType"] 48 | ``` -------------------------------------------------------------------------------- /libs/python/pylume/pylume/__init__.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | PyLume Python SDK - A client library for managing macOS VMs with PyLume. 3 | 4 | Example: 5 | >>> from pylume import PyLume, VMConfig 6 | >>> client = PyLume() 7 | >>> config = VMConfig(name="my-vm", cpu=4, memory="8GB", disk_size="64GB") 8 | >>> client.create_vm(config) 9 | >>> client.run_vm("my-vm") 10 | """ 11 | 12 | # Import exceptions then all models 13 | from .exceptions import ( 14 | LumeConfigError, 15 | LumeConnectionError, 16 | LumeError, 17 | LumeImageError, 18 | LumeNotFoundError, 19 | LumeServerError, 20 | LumeTimeoutError, 21 | LumeVMError, 22 | ) 23 | from .models import ( 24 | CloneSpec, 25 | ImageInfo, 26 | ImageList, 27 | ImageRef, 28 | SharedDirectory, 29 | VMConfig, 30 | VMRunOpts, 31 | VMStatus, 32 | VMUpdateOpts, 33 | ) 34 | 35 | # Import main class last to avoid circular imports 36 | from .pylume import PyLume 37 | 38 | __version__ = "0.2.2" 39 | 40 | __all__ = [ 41 | "PyLume", 42 | "VMConfig", 43 | "VMStatus", 44 | "VMRunOpts", 45 | "VMUpdateOpts", 46 | "ImageRef", 47 | "CloneSpec", 48 | "SharedDirectory", 49 | "ImageList", 50 | "ImageInfo", 51 | "LumeError", 52 | "LumeServerError", 53 | "LumeConnectionError", 54 | "LumeTimeoutError", 55 | "LumeNotFoundError", 56 | "LumeConfigError", 57 | "LumeVMError", 58 | "LumeImageError", 59 | ] 60 | ``` -------------------------------------------------------------------------------- /docs/content/docs/index.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Home 3 | icon: House 4 | --- 5 | 6 | import { Monitor, Code, BookOpen } from 'lucide-react'; 7 | 8 | # Welcome! 9 | 10 | Cua is a framework for automating Windows, Mac, and Linux apps powered by computer-using agents (CUAs). 11 | 12 | Cua makes every stage of computer-using agent development simple: 13 | 14 | - **Development**: Use any LLM provider with liteLLM. The agent SDK makes multiple agent loop providers, trajectory tracing, caching, and budget management easy 15 | - **Containerization**: cua offers Docker containers pre-installed with everything needed for AI-powered RPA 16 | - **Deployment**: cua cloud gives you a production-ready cloud environment for your assistants 17 | 18 | <div className="grid grid-cols-1 md:grid-cols-2 gap-6 mt-8"> 19 | <Card icon={<Monitor />} href="/quickstart-ui" title="Quickstart (UI)"> 20 | Try the cua Agent UI in your browser—no coding required. 21 | </Card> 22 | <Card icon={<Code />} href="/quickstart-devs" title="Quickstart (Developers)"> 23 | Build with Python—full SDK and agent code examples. 24 | </Card> 25 | </div> 26 | <div className="grid grid-cols-1 gap-6 mt-6"> 27 | <Card icon={<BookOpen />} href="/libraries/agent" title="API Reference"> 28 | Explore the agent SDK and APIs 29 | </Card> 30 | </div> 31 | ``` -------------------------------------------------------------------------------- /libs/python/computer/computer/models.py: -------------------------------------------------------------------------------- ```python 1 | """Models for computer configuration.""" 2 | 3 | from dataclasses import dataclass 4 | from typing import Optional, Any, Dict 5 | 6 | # Import base provider interface 7 | from .providers.base import BaseVMProvider 8 | 9 | @dataclass 10 | class Display: 11 | """Display configuration.""" 12 | width: int 13 | height: int 14 | 15 | @dataclass 16 | class Image: 17 | """VM image configuration.""" 18 | image: str 19 | tag: str 20 | name: str 21 | 22 | @dataclass 23 | class Computer: 24 | """Computer configuration.""" 25 | image: str 26 | tag: str 27 | name: str 28 | display: Display 29 | memory: str 30 | cpu: str 31 | vm_provider: Optional[BaseVMProvider] = None 32 | 33 | # @property # Remove the property decorator 34 | async def get_ip(self) -> Optional[str]: 35 | """Get the IP address of the VM.""" 36 | if not self.vm_provider: 37 | return None 38 | 39 | vm = await self.vm_provider.get_vm(self.name) 40 | # Handle both object attribute and dictionary access for ip_address 41 | if vm: 42 | if isinstance(vm, dict): 43 | return vm.get("ip_address") 44 | else: 45 | # Access as attribute for object-based return values 46 | return getattr(vm, "ip_address", None) 47 | return None ``` -------------------------------------------------------------------------------- /docs/src/assets/discord-black.svg: -------------------------------------------------------------------------------- ``` 1 | <?xml version="1.0" encoding="UTF-8"?><svg id="Discord-Logo" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 126.644 96"><path fill="currentColor" id="Discord-Symbol-Black" d="M81.15,0c-1.2376,2.1973-2.3489,4.4704-3.3591,6.794-9.5975-1.4396-19.3718-1.4396-28.9945,0-.985-2.3236-2.1216-4.5967-3.3591-6.794-9.0166,1.5407-17.8059,4.2431-26.1405,8.0568C2.779,32.5304-1.6914,56.3725.5312,79.8863c9.6732,7.1476,20.5083,12.603,32.0505,16.0884,2.6014-3.4854,4.8998-7.1981,6.8698-11.0623-3.738-1.3891-7.3497-3.1318-10.8098-5.1523.9092-.6567,1.7932-1.3386,2.6519-1.9953,20.281,9.547,43.7696,9.547,64.0758,0,.8587.7072,1.7427,1.3891,2.6519,1.9953-3.4601,2.0457-7.0718,3.7632-10.835,5.1776,1.97,3.8642,4.2683,7.5769,6.8698,11.0623,11.5419-3.4854,22.3769-8.9156,32.0509-16.0631,2.626-27.2771-4.496-50.9172-18.817-71.8548C98.9811,4.2684,90.1918,1.5659,81.1752.0505l-.0252-.0505ZM42.2802,65.4144c-6.2383,0-11.4159-5.6575-11.4159-12.6535s4.9755-12.6788,11.3907-12.6788,11.5169,5.708,11.4159,12.6788c-.101,6.9708-5.026,12.6535-11.3907,12.6535ZM84.3576,65.4144c-6.2637,0-11.3907-5.6575-11.3907-12.6535s4.9755-12.6788,11.3907-12.6788,11.4917,5.708,11.3906,12.6788c-.101,6.9708-5.026,12.6535-11.3906,12.6535Z"/></svg> ``` -------------------------------------------------------------------------------- /libs/lume/tests/Mocks/MockVM.swift: -------------------------------------------------------------------------------- ```swift 1 | import Foundation 2 | 3 | @testable import lume 4 | 5 | @MainActor 6 | class MockVM: VM { 7 | private var mockIsRunning = false 8 | 9 | override func getOSType() -> String { 10 | return "mock-os" 11 | } 12 | 13 | override func setup( 14 | ipswPath: String, cpuCount: Int, memorySize: UInt64, diskSize: UInt64, display: String 15 | ) async throws { 16 | // Mock setup implementation 17 | vmDirContext.config.setCpuCount(cpuCount) 18 | vmDirContext.config.setMemorySize(memorySize) 19 | vmDirContext.config.setDiskSize(diskSize) 20 | vmDirContext.config.setMacAddress("00:11:22:33:44:55") 21 | try vmDirContext.saveConfig() 22 | } 23 | 24 | override func run( 25 | noDisplay: Bool, sharedDirectories: [SharedDirectory], mount: Path?, vncPort: Int = 0, 26 | recoveryMode: Bool = false, usbMassStoragePaths: [Path]? = nil 27 | ) async throws { 28 | mockIsRunning = true 29 | try await super.run( 30 | noDisplay: noDisplay, sharedDirectories: sharedDirectories, mount: mount, 31 | vncPort: vncPort, recoveryMode: recoveryMode, 32 | usbMassStoragePaths: usbMassStoragePaths 33 | ) 34 | } 35 | 36 | override func stop() async throws { 37 | mockIsRunning = false 38 | try await super.stop() 39 | } 40 | } 41 | ``` -------------------------------------------------------------------------------- /libs/lume/src/Commands/Set.swift: -------------------------------------------------------------------------------- ```swift 1 | import ArgumentParser 2 | import Foundation 3 | 4 | struct Set: AsyncParsableCommand { 5 | static let configuration = CommandConfiguration( 6 | abstract: "Set new values for CPU, memory, and disk size of a virtual machine" 7 | ) 8 | 9 | @Argument(help: "Name of the virtual machine", completion: .custom(completeVMName)) 10 | var name: String 11 | 12 | @Option(help: "New number of CPU cores") 13 | var cpu: Int? 14 | 15 | @Option(help: "New memory size, e.g., 8192MB or 8GB.", transform: { try parseSize($0) }) 16 | var memory: UInt64? 17 | 18 | @Option(help: "New disk size, e.g., 20480MB or 20GB.", transform: { try parseSize($0) }) 19 | var diskSize: UInt64? 20 | 21 | @Option(help: "New display resolution in format WIDTHxHEIGHT.") 22 | var display: VMDisplayResolution? 23 | 24 | @Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location") 25 | var storage: String? 26 | 27 | init() { 28 | } 29 | 30 | @MainActor 31 | func run() async throws { 32 | let vmController = LumeController() 33 | try vmController.updateSettings( 34 | name: name, 35 | cpu: cpu, 36 | memory: memory, 37 | diskSize: diskSize, 38 | display: display?.string, 39 | storage: storage 40 | ) 41 | } 42 | } 43 | ``` -------------------------------------------------------------------------------- /libs/lume/src/ContainerRegistry/ImagesPrinter.swift: -------------------------------------------------------------------------------- ```swift 1 | import Foundation 2 | 3 | struct ImagesPrinter { 4 | private struct Column: Sendable { 5 | let header: String 6 | let width: Int 7 | let getValue: @Sendable (String) -> String 8 | } 9 | 10 | private static let columns: [Column] = [ 11 | Column(header: "name", width: 28) { $0.split(separator: ":").first.map(String.init) ?? $0 }, 12 | Column(header: "image_id", width: 16) { $0.split(separator: ":").last.map(String.init) ?? "-" } 13 | ] 14 | 15 | static func print(images: [String]) { 16 | if images.isEmpty { 17 | Swift.print("No images found") 18 | return 19 | } 20 | 21 | printHeader() 22 | images.sorted().forEach(printImage) 23 | } 24 | 25 | private static func printHeader() { 26 | let paddedHeaders = columns.map { $0.header.paddedToWidth($0.width) } 27 | Swift.print(paddedHeaders.joined()) 28 | } 29 | 30 | private static func printImage(_ image: String) { 31 | let paddedColumns = columns.map { column in 32 | column.getValue(image).paddedToWidth(column.width) 33 | } 34 | Swift.print(paddedColumns.joined()) 35 | } 36 | } 37 | 38 | private extension String { 39 | func paddedToWidth(_ width: Int) -> String { 40 | padding(toLength: width, withPad: " ", startingAt: 0) 41 | } 42 | } ``` -------------------------------------------------------------------------------- /docs/src/assets/discord-white.svg: -------------------------------------------------------------------------------- ``` 1 | <?xml version="1.0" encoding="UTF-8"?><svg id="Discord-Logo" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 126.644 96"><defs><style>.cls-1{fill:#fff;}</style></defs><path id="Discord-Symbol-White" class="cls-1" d="M81.15,0c-1.2376,2.1973-2.3489,4.4704-3.3591,6.794-9.5975-1.4396-19.3718-1.4396-28.9945,0-.985-2.3236-2.1216-4.5967-3.3591-6.794-9.0166,1.5407-17.8059,4.2431-26.1405,8.0568C2.779,32.5304-1.6914,56.3725.5312,79.8863c9.6732,7.1476,20.5083,12.603,32.0505,16.0884,2.6014-3.4854,4.8998-7.1981,6.8698-11.0623-3.738-1.3891-7.3497-3.1318-10.8098-5.1523.9092-.6567,1.7932-1.3386,2.6519-1.9953,20.281,9.547,43.7696,9.547,64.0758,0,.8587.7072,1.7427,1.3891,2.6519,1.9953-3.4601,2.0457-7.0718,3.7632-10.835,5.1776,1.97,3.8642,4.2683,7.5769,6.8698,11.0623,11.5419-3.4854,22.3769-8.9156,32.0509-16.0631,2.626-27.2771-4.496-50.9172-18.817-71.8548C98.9811,4.2684,90.1918,1.5659,81.1752.0505l-.0252-.0505ZM42.2802,65.4144c-6.2383,0-11.4159-5.6575-11.4159-12.6535s4.9755-12.6788,11.3907-12.6788,11.5169,5.708,11.4159,12.6788c-.101,6.9708-5.026,12.6535-11.3907,12.6535ZM84.3576,65.4144c-6.2637,0-11.3907-5.6575-11.3907-12.6535s4.9755-12.6788,11.3907-12.6788,11.4917,5.708,11.3906,12.6788c-.101,6.9708-5.026,12.6535-11.3906,12.6535Z"/></svg> ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/lumier/building-lumier.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Building Lumier 3 | --- 4 | 5 | If you want to customize the Lumier container or build it from source, you can follow these steps: 6 | 7 | ```bash 8 | # 1. Navigate to the Lumier directory 9 | cd libs/lumier 10 | 11 | # 2. Build the Docker image locally 12 | docker build -t lumier-custom:latest . 13 | 14 | # 3. Run your custom build 15 | docker run -it --rm \ 16 | --name lumier-vm \ 17 | -p 8006:8006 \ 18 | -e VM_NAME=lumier-vm \ 19 | -e VERSION=ghcr.io/trycua/macos-sequoia-cua:latest \ 20 | -e CPU_CORES=4 \ 21 | -e RAM_SIZE=8192 \ 22 | lumier-custom:latest 23 | ``` 24 | 25 | ### Customization Options 26 | 27 | The Dockerfile provides several customization points: 28 | 29 | 1. **Base image**: The container uses Debian Bullseye Slim as the base. You can modify this if needed. 30 | 2. **Installed packages**: You can add or remove packages in the apt-get install list. 31 | 3. **Hooks**: Check the `/run/hooks/` directory for scripts that run at specific points during VM lifecycle. 32 | 4. **Configuration**: Review `/run/config/constants.sh` for default settings. 33 | 34 | After making your modifications, you can build and push your custom image to your own Docker Hub repository: 35 | 36 | ```bash 37 | # Build with a custom tag 38 | docker build -t yourusername/lumier:custom . 39 | 40 | # Push to Docker Hub (after docker login) 41 | docker push yourusername/lumier:custom 42 | ``` ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/__init__.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | agent - Decorator-based Computer Use Agent with liteLLM integration 3 | """ 4 | 5 | import logging 6 | import sys 7 | 8 | from .decorators import register_agent 9 | from .agent import ComputerAgent 10 | from .types import Messages, AgentResponse 11 | 12 | # Import loops to register them 13 | from . import loops 14 | 15 | __all__ = [ 16 | "register_agent", 17 | "ComputerAgent", 18 | "Messages", 19 | "AgentResponse" 20 | ] 21 | 22 | __version__ = "0.4.0" 23 | 24 | logger = logging.getLogger(__name__) 25 | 26 | # Initialize telemetry when the package is imported 27 | try: 28 | # Import from core telemetry for basic functions 29 | from core.telemetry import ( 30 | is_telemetry_enabled, 31 | record_event, 32 | ) 33 | 34 | # Check if telemetry is enabled 35 | if is_telemetry_enabled(): 36 | logger.info("Telemetry is enabled") 37 | 38 | # Record package initialization 39 | record_event( 40 | "module_init", 41 | { 42 | "module": "agent", 43 | "version": __version__, 44 | "python_version": sys.version, 45 | }, 46 | ) 47 | 48 | else: 49 | logger.info("Telemetry is disabled") 50 | except ImportError as e: 51 | # Telemetry not available 52 | logger.warning(f"Telemetry not available: {e}") 53 | except Exception as e: 54 | # Other issues with telemetry 55 | logger.warning(f"Error initializing telemetry: {e}") 56 | ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/lumier/index.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Lumier 3 | description: Reference for the current version of the Lumier library. 4 | github: 5 | - https://github.com/trycua/cua/tree/main/libs/lumier 6 | --- 7 | 8 | **Lumier** is an interface for running macOS virtual machines with minimal setup. It uses Docker as a packaging system to deliver a pre-configured environment that connects to the `lume` virtualization service running on your host machine. With Lumier, you get: 9 | 10 | - A ready-to-use macOS or Linux virtual machine in minutes 11 | - Browser-based VNC access to your VM 12 | - Easy file sharing between your host and VM 13 | - Simple configuration through environment variables 14 | 15 | ## How It Works 16 | 17 | <Callout title="Note"> 18 | We're using Docker primarily as a convenient delivery mechanism, not as an isolation layer. Unlike traditional Docker containers, Lumier leverages the Apple Virtualization Framework (Apple Vz) through the `lume` CLI to create true virtual machines. 19 | </Callout> 20 | 21 | Here's what's happening behind the scenes: 22 | 23 | 1. The Docker container provides a consistent environment to run the Lumier interface 24 | 2. Lumier connects to the Lume service running on your host Mac 25 | 3. Lume uses Apple's Virtualization Framework to create a true macOS virtual machine 26 | 4. The VM runs with hardware acceleration using your Mac's native virtualization capabilities ``` -------------------------------------------------------------------------------- /libs/typescript/agent/package.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "name": "@trycua/agent", 3 | "version": "0.1.0", 4 | "packageManager": "[email protected]", 5 | "description": "TypeScript SDK for CUA agent interaction", 6 | "type": "module", 7 | "license": "MIT", 8 | "homepage": "https://github.com/trycua/cua/tree/main/libs/typescript/agent", 9 | "bugs": { 10 | "url": "https://github.com/trycua/cua/issues" 11 | }, 12 | "repository": { 13 | "type": "git", 14 | "url": "git+https://github.com/trycua/cua.git" 15 | }, 16 | "author": "cua", 17 | "files": [ 18 | "dist" 19 | ], 20 | "main": "./dist/index.js", 21 | "module": "./dist/index.js", 22 | "types": "./dist/index.d.ts", 23 | "exports": { 24 | ".": "./dist/index.js", 25 | "./package.json": "./package.json" 26 | }, 27 | "publishConfig": { 28 | "access": "public" 29 | }, 30 | "scripts": { 31 | "lint": "biome lint .", 32 | "lint:fix": "biome lint --fix .", 33 | "build": "tsdown", 34 | "dev": "tsdown --watch", 35 | "test": "vitest", 36 | "typecheck": "tsc --noEmit", 37 | "release": "bumpp && pnpm publish", 38 | "prepublishOnly": "pnpm run build" 39 | }, 40 | "dependencies": { 41 | "@trycua/core": "^0.1.2", 42 | "peerjs": "^1.5.4", 43 | "pino": "^9.7.0" 44 | }, 45 | "devDependencies": { 46 | "@biomejs/biome": "^1.9.4", 47 | "@types/node": "^22.15.17", 48 | "bumpp": "^10.1.0", 49 | "happy-dom": "^17.4.7", 50 | "tsdown": "^0.14.1", 51 | "typescript": "^5.7.2", 52 | "vitest": "^2.1.8" 53 | } 54 | } 55 | ``` -------------------------------------------------------------------------------- /libs/typescript/computer/tests/interface/index.test.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { describe, expect, it } from 'vitest'; 2 | import * as InterfaceExports from '../../src/interface/index.ts'; 3 | 4 | describe('Interface Module Exports', () => { 5 | it('should export InterfaceFactory', () => { 6 | expect(InterfaceExports.InterfaceFactory).toBeDefined(); 7 | expect( 8 | InterfaceExports.InterfaceFactory.createInterfaceForOS 9 | ).toBeDefined(); 10 | }); 11 | 12 | it('should export BaseComputerInterface', () => { 13 | expect(InterfaceExports.BaseComputerInterface).toBeDefined(); 14 | }); 15 | 16 | it('should export MacOSComputerInterface', () => { 17 | expect(InterfaceExports.MacOSComputerInterface).toBeDefined(); 18 | }); 19 | 20 | it('should export LinuxComputerInterface', () => { 21 | expect(InterfaceExports.LinuxComputerInterface).toBeDefined(); 22 | }); 23 | 24 | it('should export WindowsComputerInterface', () => { 25 | expect(InterfaceExports.WindowsComputerInterface).toBeDefined(); 26 | }); 27 | 28 | it('should export all expected interfaces', () => { 29 | const expectedExports = [ 30 | 'InterfaceFactory', 31 | 'BaseComputerInterface', 32 | 'MacOSComputerInterface', 33 | 'LinuxComputerInterface', 34 | 'WindowsComputerInterface', 35 | ]; 36 | 37 | const actualExports = Object.keys(InterfaceExports); 38 | for (const exportName of expectedExports) { 39 | expect(actualExports).toContain(exportName); 40 | } 41 | }); 42 | }); 43 | ``` -------------------------------------------------------------------------------- /libs/lume/Package.swift: -------------------------------------------------------------------------------- ```swift 1 | // swift-tools-version: 6.0 2 | // The swift-tools-version declares the minimum version of Swift required to build this package. 3 | 4 | import PackageDescription 5 | 6 | let package = Package( 7 | name: "lume", 8 | platforms: [ 9 | .macOS(.v14) 10 | ], 11 | dependencies: [ 12 | .package(url: "https://github.com/apple/swift-argument-parser", from: "1.3.1"), 13 | .package(url: "https://github.com/apple/swift-format.git", branch: ("release/5.10")), 14 | .package(url: "https://github.com/apple/swift-atomics.git", .upToNextMajor(from: "1.2.0")), 15 | .package(url: "https://github.com/mhdhejazi/Dynamic", branch: "master") 16 | ], 17 | targets: [ 18 | // Targets are the basic building blocks of a package, defining a module or a test suite. 19 | // Targets can depend on other targets in this package and products from dependencies. 20 | .executableTarget( 21 | name: "lume", 22 | dependencies: [ 23 | .product(name: "ArgumentParser", package: "swift-argument-parser"), 24 | .product(name: "Atomics", package: "swift-atomics"), 25 | .product(name: "Dynamic", package: "Dynamic") 26 | ], 27 | path: "src"), 28 | .testTarget( 29 | name: "lumeTests", 30 | dependencies: [ 31 | "lume" 32 | ], 33 | path: "tests") 34 | ] 35 | ) 36 | ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/mcp-server/installation.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Installation 3 | --- 4 | 5 | Install the package from PyPI: 6 | 7 | ```bash 8 | pip install cua-mcp-server 9 | ``` 10 | 11 | This will install: 12 | - The MCP server 13 | - CUA agent and computer dependencies 14 | - An executable `cua-mcp-server` script in your PATH 15 | 16 | ## Easy Setup Script 17 | 18 | If you want to simplify installation, you can use this one-liner to download and run the installation script: 19 | 20 | ```bash 21 | curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/python/mcp-server/scripts/install_mcp_server.sh | bash 22 | ``` 23 | 24 | This script will: 25 | - Create the ~/.cua directory if it doesn't exist 26 | - Generate a startup script at ~/.cua/start_mcp_server.sh 27 | - Make the script executable 28 | - The startup script automatically manages Python virtual environments and installs/updates the cua-mcp-server package 29 | 30 | You can then use the script in your MCP configuration like this: 31 | 32 | ```json 33 | { 34 | "mcpServers": { 35 | "cua-agent": { 36 | "command": "/bin/bash", 37 | "args": ["~/.cua/start_mcp_server.sh"], 38 | "env": { 39 | "CUA_MODEL_NAME": "anthropic/claude-3-5-sonnet-20241022" 40 | } 41 | } 42 | } 43 | } 44 | ``` 45 | 46 | ### Troubleshooting 47 | 48 | If you get a `/bin/bash: ~/cua/libs/python/mcp-server/scripts/start_mcp_server.sh: No such file or directory` error, try changing the path to the script to be absolute instead of relative. 49 | 50 | To see the logs: 51 | ``` 52 | tail -n 20 -f ~/Library/Logs/Claude/mcp*.log 53 | ``` ``` -------------------------------------------------------------------------------- /.github/workflows/npm-publish-core.yml: -------------------------------------------------------------------------------- ```yaml 1 | name: Publish @trycua/core to npm 2 | 3 | on: 4 | push: 5 | branches: main 6 | 7 | jobs: 8 | publish: 9 | permissions: 10 | id-token: write 11 | contents: read 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v4 15 | 16 | - name: Use Node.js 24.x 17 | uses: actions/setup-node@v4 18 | with: 19 | node-version: "24.x" 20 | registry-url: "https://registry.npmjs.org" 21 | 22 | - name: Setup pnpm 10 23 | uses: pnpm/action-setup@v4 24 | with: 25 | version: 10 26 | 27 | - name: Check if version changed 28 | id: check-version 29 | uses: EndBug/version-check@v2 30 | with: 31 | file-name: libs/typescript/core/package.json 32 | diff-search: true 33 | 34 | - name: Install dependencies 35 | if: steps.check-version.outputs.changed == 'true' 36 | working-directory: ./libs/typescript/core 37 | run: pnpm install --frozen-lockfile 38 | 39 | - name: Build package 40 | if: steps.check-version.outputs.changed == 'true' 41 | working-directory: ./libs/typescript/core 42 | run: pnpm run build --if-present 43 | 44 | - name: Publish to npm 45 | if: steps.check-version.outputs.changed == 'true' 46 | working-directory: ./libs/typescript/core 47 | run: pnpm publish --access public --no-git-checks 48 | env: 49 | NPM_CONFIG_PROVENANCE: true 50 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} 51 | ``` -------------------------------------------------------------------------------- /docs/src/components/mermaid.tsx: -------------------------------------------------------------------------------- ```typescript 1 | 'use client'; 2 | 3 | import { useEffect, useId, useRef, useState } from 'react'; 4 | import { useTheme } from 'next-themes'; 5 | 6 | export function Mermaid({ chart }: { chart: string }) { 7 | const id = useId(); 8 | const [svg, setSvg] = useState(''); 9 | const containerRef = useRef<HTMLDivElement>(null); 10 | const currentChartRef = useRef<string>(null); 11 | const { resolvedTheme } = useTheme(); 12 | 13 | useEffect(() => { 14 | if (currentChartRef.current === chart || !containerRef.current) return; 15 | const container = containerRef.current; 16 | currentChartRef.current = chart; 17 | 18 | async function renderChart() { 19 | const { default: mermaid } = await import('mermaid'); 20 | 21 | try { 22 | // configure mermaid 23 | mermaid.initialize({ 24 | startOnLoad: false, 25 | securityLevel: 'loose', 26 | fontFamily: 'inherit', 27 | themeCSS: 'margin: 1.5rem auto 0;', 28 | theme: resolvedTheme === 'dark' ? 'dark' : 'default', 29 | }); 30 | 31 | const { svg, bindFunctions } = await mermaid.render( 32 | id, 33 | chart.replaceAll('\\n', '\n'), 34 | ); 35 | 36 | bindFunctions?.(container); 37 | setSvg(svg); 38 | } catch (error) { 39 | console.error('Error while rendering mermaid', error); 40 | } 41 | } 42 | 43 | void renderChart(); 44 | }, [chart, id, resolvedTheme]); 45 | 46 | return <div ref={containerRef} dangerouslySetInnerHTML={{ __html: svg }} />; 47 | } ``` -------------------------------------------------------------------------------- /.github/workflows/npm-publish-computer.yml: -------------------------------------------------------------------------------- ```yaml 1 | name: Publish @trycua/computer to npm 2 | 3 | on: 4 | push: 5 | branches: main 6 | 7 | jobs: 8 | publish: 9 | permissions: 10 | id-token: write 11 | contents: read 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v4 15 | 16 | - name: Use Node.js 24.x 17 | uses: actions/setup-node@v4 18 | with: 19 | node-version: "24.x" 20 | registry-url: "https://registry.npmjs.org" 21 | 22 | - name: Setup pnpm 10 23 | uses: pnpm/action-setup@v4 24 | with: 25 | version: 10 26 | 27 | - name: Check if version changed 28 | id: check-version 29 | uses: EndBug/version-check@v2 30 | with: 31 | file-name: libs/typescript/computer/package.json 32 | diff-search: true 33 | 34 | - name: Install dependencies 35 | if: steps.check-version.outputs.changed == 'true' 36 | working-directory: ./libs/typescript/computer 37 | run: pnpm install --frozen-lockfile 38 | 39 | - name: Build package 40 | if: steps.check-version.outputs.changed == 'true' 41 | working-directory: ./libs/typescript/computer 42 | run: pnpm run build --if-present 43 | 44 | - name: Publish to npm 45 | if: steps.check-version.outputs.changed == 'true' 46 | working-directory: ./libs/typescript/computer 47 | run: pnpm publish --access public --no-git-checks 48 | env: 49 | NPM_CONFIG_PROVENANCE: true 50 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} 51 | ``` -------------------------------------------------------------------------------- /libs/typescript/computer/package.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "name": "@trycua/computer", 3 | "version": "0.1.3", 4 | "packageManager": "[email protected]", 5 | "description": "Typescript SDK for cua computer interaction", 6 | "type": "module", 7 | "license": "MIT", 8 | "homepage": "https://github.com/trycua/cua/tree/feature/computer/typescript/libs/typescript/computer", 9 | "bugs": { 10 | "url": "https://github.com/trycua/cua/issues" 11 | }, 12 | "repository": { 13 | "type": "git", 14 | "url": "git+https://github.com/trycua/cua.git" 15 | }, 16 | "author": "cua", 17 | "files": [ 18 | "dist" 19 | ], 20 | "main": "./dist/index.js", 21 | "module": "./dist/index.js", 22 | "types": "./dist/index.d.ts", 23 | "exports": { 24 | ".": "./dist/index.js", 25 | "./package.json": "./package.json" 26 | }, 27 | "publishConfig": { 28 | "access": "public" 29 | }, 30 | "scripts": { 31 | "lint": "biome lint .", 32 | "lint:fix": "biome lint --fix .", 33 | "build": "tsdown", 34 | "dev": "tsdown --watch", 35 | "test": "vitest", 36 | "typecheck": "tsc --noEmit", 37 | "release": "bumpp && pnpm publish", 38 | "prepublishOnly": "pnpm run build" 39 | }, 40 | "dependencies": { 41 | "@trycua/core": "^0.1.2", 42 | "pino": "^9.7.0", 43 | "ws": "^8.18.0" 44 | }, 45 | "devDependencies": { 46 | "@biomejs/biome": "^1.9.4", 47 | "@types/node": "^22.15.17", 48 | "@types/ws": "^8.18.1", 49 | "bumpp": "^10.1.0", 50 | "happy-dom": "^17.4.7", 51 | "tsdown": "^0.11.9", 52 | "tsx": "^4.19.4", 53 | "typescript": "^5.8.3", 54 | "vitest": "^3.1.3" 55 | } 56 | } ``` -------------------------------------------------------------------------------- /libs/python/computer/pyproject.toml: -------------------------------------------------------------------------------- ```toml 1 | [build-system] 2 | requires = ["pdm-backend"] 3 | build-backend = "pdm.backend" 4 | 5 | [project] 6 | name = "cua-computer" 7 | version = "0.4.8" 8 | description = "Computer-Use Interface (CUI) framework powering Cua" 9 | readme = "README.md" 10 | authors = [ 11 | { name = "TryCua", email = "[email protected]" } 12 | ] 13 | dependencies = [ 14 | "pillow>=10.0.0", 15 | "websocket-client>=1.8.0", 16 | "websockets>=12.0", 17 | "aiohttp>=3.9.0", 18 | "cua-core>=0.1.0,<0.2.0", 19 | "pydantic>=2.11.1" 20 | ] 21 | requires-python = ">=3.11" 22 | 23 | [project.optional-dependencies] 24 | lume = [ 25 | ] 26 | lumier = [ 27 | ] 28 | ui = [ 29 | "gradio>=5.23.3", 30 | "python-dotenv>=1.0.1", 31 | "datasets>=3.6.0", 32 | ] 33 | all = [ 34 | # Include all optional dependencies 35 | "gradio>=5.23.3", 36 | "python-dotenv>=1.0.1", 37 | "datasets>=3.6.0", 38 | ] 39 | 40 | [tool.pdm] 41 | distribution = true 42 | 43 | [tool.pdm.build] 44 | includes = ["computer/"] 45 | source-includes = ["tests/", "README.md", "LICENSE"] 46 | 47 | [tool.black] 48 | line-length = 100 49 | target-version = ["py311"] 50 | 51 | [tool.ruff] 52 | line-length = 100 53 | target-version = "py311" 54 | select = ["E", "F", "B", "I"] 55 | fix = true 56 | 57 | [tool.ruff.format] 58 | docstring-code-format = true 59 | 60 | [tool.mypy] 61 | strict = true 62 | python_version = "3.11" 63 | ignore_missing_imports = true 64 | disallow_untyped_defs = true 65 | check_untyped_defs = true 66 | warn_return_any = true 67 | show_error_codes = true 68 | warn_unused_ignores = false 69 | 70 | [tool.pytest.ini_options] 71 | asyncio_mode = "auto" 72 | testpaths = ["tests"] 73 | python_files = "test_*.py" ``` -------------------------------------------------------------------------------- /libs/typescript/core/package.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "name": "@trycua/core", 3 | "version": "0.1.3", 4 | "packageManager": "[email protected]", 5 | "description": "Typescript SDK for cua core.", 6 | "type": "module", 7 | "license": "MIT", 8 | "homepage": "https://github.com/trycua/cua/tree/feature/computer/typescript/libs/typescript/computer", 9 | "bugs": { 10 | "url": "https://github.com/trycua/cua/issues" 11 | }, 12 | "repository": { 13 | "type": "git", 14 | "url": "git+https://github.com/trycua/cua.git" 15 | }, 16 | "author": "cua", 17 | "files": [ 18 | "dist" 19 | ], 20 | "main": "./dist/index.js", 21 | "module": "./dist/index.js", 22 | "types": "./dist/index.d.ts", 23 | "exports": { 24 | ".": "./dist/index.js", 25 | "./package.json": "./package.json" 26 | }, 27 | "publishConfig": { 28 | "access": "public" 29 | }, 30 | "scripts": { 31 | "lint": "biome lint .", 32 | "lint:fix": "biome lint --fix .", 33 | "build": "tsdown", 34 | "dev": "tsdown --watch", 35 | "test": "vitest", 36 | "typecheck": "tsc --noEmit", 37 | "release": "bumpp && pnpm publish", 38 | "prepublishOnly": "pnpm run build" 39 | }, 40 | "dependencies": { 41 | "@types/uuid": "^10.0.0", 42 | "pino": "^9.7.0", 43 | "posthog-node": "^5.1.1", 44 | "uuid": "^11.1.0" 45 | }, 46 | "devDependencies": { 47 | "@biomejs/biome": "^1.9.4", 48 | "@types/node": "^22.15.17", 49 | "@types/ws": "^8.18.1", 50 | "bumpp": "^10.1.0", 51 | "happy-dom": "^17.4.7", 52 | "tsdown": "^0.11.9", 53 | "tsx": "^4.19.4", 54 | "typescript": "^5.8.3", 55 | "vitest": "^3.1.3" 56 | } 57 | } ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/adapters/models/__init__.py: -------------------------------------------------------------------------------- ```python 1 | from typing import Optional 2 | 3 | try: 4 | from transformers import AutoConfig 5 | HF_AVAILABLE = True 6 | except ImportError: 7 | HF_AVAILABLE = False 8 | 9 | from .generic import GenericHFModel 10 | from .opencua import OpenCUAModel 11 | from .qwen2_5_vl import Qwen2_5_VLModel 12 | from .internvl import InternVLModel 13 | 14 | def load_model(model_name: str, device: str = "auto", trust_remote_code: bool = False): 15 | """Factory function to load and return the right model handler instance. 16 | 17 | - If the underlying transformers config class matches OpenCUA, return OpenCUAModel 18 | - Otherwise, return GenericHFModel 19 | """ 20 | if not HF_AVAILABLE: 21 | raise ImportError( 22 | "HuggingFace transformers dependencies not found. Install with: pip install \"cua-agent[uitars-hf]\"" 23 | ) 24 | cfg = AutoConfig.from_pretrained(model_name, trust_remote_code=trust_remote_code) 25 | cls = cfg.__class__.__name__ 26 | print(f"cls: {cls}") 27 | if "OpenCUA" in cls: 28 | return OpenCUAModel(model_name=model_name, device=device, trust_remote_code=trust_remote_code) 29 | elif "Qwen2_5_VL" in cls: 30 | return Qwen2_5_VLModel(model_name=model_name, device=device, trust_remote_code=trust_remote_code) 31 | elif "InternVL" in cls: 32 | return InternVLModel(model_name=model_name, device=device, trust_remote_code=trust_remote_code) 33 | return GenericHFModel(model_name=model_name, device=device, trust_remote_code=trust_remote_code) 34 | ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/callbacks/trajectories.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Trajectories 3 | description: Recording and viewing agent conversation trajectories 4 | --- 5 | 6 | # Trajectory Saving Callback 7 | 8 | The TrajectorySaverCallback records complete agent conversations including messages, actions, and screenshots for debugging and analysis. 9 | 10 | ## Callbacks Example 11 | 12 | ```python 13 | from agent.callbacks import TrajectorySaverCallback 14 | 15 | agent = ComputerAgent( 16 | model="anthropic/claude-3-5-sonnet-20241022", 17 | tools=[computer], 18 | callbacks=[ 19 | TrajectorySaverCallback( 20 | trajectory_dir="my_trajectories", 21 | save_screenshots=True 22 | ) 23 | ] 24 | ) 25 | ``` 26 | 27 | ## Shorthand 28 | 29 | ```python 30 | agent = ComputerAgent( 31 | model="anthropic/claude-3-5-sonnet-20241022", 32 | trajectory_dir="trajectories", # Auto-save trajectories 33 | tools=[computer] 34 | ) 35 | ``` 36 | 37 | ## View Trajectories Online 38 | 39 | View trajectories in the browser at: 40 | **[trycua.com/trajectory-viewer](http://trycua.com/trajectory-viewer)** 41 | 42 | The viewer provides: 43 | - Interactive conversation replay 44 | - Screenshot galleries 45 | - No data collection 46 | 47 | ## Trajectory Structure 48 | 49 | Trajectories are saved with: 50 | - Complete conversation history 51 | - Usage statistics and costs 52 | - Timestamps and metadata 53 | - Screenshots and computer actions 54 | 55 | Each trajectory contains: 56 | - **metadata.json**: Run info, timestamps, usage stats (`total_tokens`, `response_cost`) 57 | - **turn_000/**: Turn-by-turn conversation history (api calls, responses, computer calls, screenshots) 58 | ``` -------------------------------------------------------------------------------- /examples/utils.py: -------------------------------------------------------------------------------- ```python 1 | """Utility functions for example scripts.""" 2 | 3 | import os 4 | import sys 5 | import signal 6 | from pathlib import Path 7 | from typing import Optional 8 | 9 | 10 | def load_env_file(path: Path) -> bool: 11 | """Load environment variables from a file. 12 | 13 | Args: 14 | path: Path to the .env file 15 | 16 | Returns: 17 | True if file was loaded successfully, False otherwise 18 | """ 19 | if not path.exists(): 20 | return False 21 | 22 | print(f"Loading environment from {path}") 23 | with open(path, "r") as f: 24 | for line in f: 25 | line = line.strip() 26 | if not line or line.startswith("#"): 27 | continue 28 | 29 | key, value = line.split("=", 1) 30 | os.environ[key] = value 31 | 32 | return True 33 | 34 | 35 | def load_dotenv_files(): 36 | """Load environment variables from .env files. 37 | 38 | Tries to load from .env.local first, then .env if .env.local doesn't exist. 39 | """ 40 | # Get the project root directory (parent of the examples directory) 41 | project_root = Path(__file__).parent.parent 42 | 43 | # Try loading .env.local first, then .env if .env.local doesn't exist 44 | env_local_path = project_root / ".env.local" 45 | env_path = project_root / ".env" 46 | 47 | # Load .env.local if it exists, otherwise try .env 48 | if not load_env_file(env_local_path): 49 | load_env_file(env_path) 50 | 51 | 52 | def handle_sigint(signum, frame): 53 | """Handle SIGINT (Ctrl+C) gracefully.""" 54 | print("\nExiting gracefully...") 55 | sys.exit(0) 56 | ``` -------------------------------------------------------------------------------- /libs/lume/Development.md: -------------------------------------------------------------------------------- ```markdown 1 | # Development Guide 2 | 3 | This guide will help you set up your development environment and understand the process for contributing code to lume. 4 | 5 | ## Environment Setup 6 | 7 | Lume development requires: 8 | - Swift 6 or higher 9 | - Xcode 15 or higher 10 | - macOS Sequoia 15.2 or higher 11 | - (Optional) VS Code with Swift extension 12 | 13 | If you're working on Lume in the context of the Cua monorepo, we recommend using the dedicated VS Code workspace configuration: 14 | 15 | ```bash 16 | # Open VS Code workspace from the root of the monorepo 17 | code .vscode/lume.code-workspace 18 | ``` 19 | This workspace is preconfigured with Swift language support, build tasks, and debug configurations. 20 | 21 | ## Setting Up the Repository Locally 22 | 23 | 1. **Fork the Repository**: Create your own fork of lume 24 | 2. **Clone the Repository**: 25 | ```bash 26 | git clone https://github.com/trycua/lume.git 27 | cd lume 28 | ``` 29 | 3. **Install Dependencies**: 30 | ```bash 31 | swift package resolve 32 | ``` 33 | 4. **Build the Project**: 34 | ```bash 35 | swift build 36 | ``` 37 | 38 | ## Development Workflow 39 | 40 | 1. Create a new branch for your changes 41 | 2. Make your changes 42 | 3. Run the tests: `swift test` 43 | 4. Build and test your changes locally 44 | 5. Commit your changes with clear commit messages 45 | 46 | ## Submitting Pull Requests 47 | 48 | 1. Push your changes to your fork 49 | 2. Open a Pull Request with: 50 | - A clear title and description 51 | - Reference to any related issues 52 | - Screenshots or logs if relevant 53 | 3. Respond to any feedback from maintainers 54 | ``` -------------------------------------------------------------------------------- /libs/lume/src/Commands/Serve.swift: -------------------------------------------------------------------------------- ```swift 1 | import ArgumentParser 2 | import Foundation 3 | 4 | struct Serve: AsyncParsableCommand { 5 | static let configuration = CommandConfiguration( 6 | abstract: "Start the VM management server" 7 | ) 8 | 9 | @Option(help: "Port to listen on") 10 | var port: UInt16 = 7777 11 | 12 | func run() async throws { 13 | let server = await Server(port: port) 14 | 15 | Logger.info("Starting server", metadata: ["port": "\(port)"]) 16 | 17 | // Using custom error handling to prevent ArgumentParser from printing additional error messages 18 | do { 19 | try await server.start() 20 | } catch let error as PortError { 21 | // For port errors, just log once with the suggestion 22 | let suggestedPort = port + 1 23 | 24 | // Create a user-friendly error message that includes the suggestion 25 | let message = """ 26 | \(error.localizedDescription) 27 | Try using a different port: lume serve --port \(suggestedPort) 28 | """ 29 | 30 | // Log the message (without the "ERROR:" prefix that ArgumentParser will add) 31 | Logger.error(message) 32 | 33 | // Exit with a custom code to prevent ArgumentParser from printing the error again 34 | Foundation.exit(1) 35 | } catch { 36 | // For other errors, log once 37 | Logger.error("Failed to start server", metadata: ["error": error.localizedDescription]) 38 | throw error 39 | } 40 | } 41 | } ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/computers/__init__.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Computer handler factory and interface definitions. 3 | 4 | This module provides a factory function to create computer handlers from different 5 | computer interface types, supporting both the ComputerHandler protocol and the 6 | Computer library interface. 7 | """ 8 | 9 | from .base import AsyncComputerHandler 10 | from .cua import cuaComputerHandler 11 | from .custom import CustomComputerHandler 12 | from computer import Computer as cuaComputer 13 | 14 | def is_agent_computer(computer): 15 | """Check if the given computer is a ComputerHandler or CUA Computer.""" 16 | return isinstance(computer, AsyncComputerHandler) or \ 17 | isinstance(computer, cuaComputer) or \ 18 | (isinstance(computer, dict)) #and "screenshot" in computer) 19 | 20 | async def make_computer_handler(computer): 21 | """ 22 | Create a computer handler from a computer interface. 23 | 24 | Args: 25 | computer: Either a ComputerHandler instance, Computer instance, or dict of functions 26 | 27 | Returns: 28 | ComputerHandler: A computer handler instance 29 | 30 | Raises: 31 | ValueError: If the computer type is not supported 32 | """ 33 | if isinstance(computer, AsyncComputerHandler): 34 | return computer 35 | if isinstance(computer, cuaComputer): 36 | computer_handler = cuaComputerHandler(computer) 37 | await computer_handler._initialize() 38 | return computer_handler 39 | if isinstance(computer, dict): 40 | return CustomComputerHandler(computer) 41 | raise ValueError(f"Unsupported computer type: {type(computer)}") ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/computer-server/REST-API.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: REST API Reference 3 | description: Reference for the /cmd REST endpoint of the Computer Server. 4 | --- 5 | 6 | # REST API Reference 7 | 8 | The Computer Server exposes a single REST endpoint for command execution: 9 | 10 | - `http://localhost:8000/cmd` 11 | - `https://your-container.containers.cloud.trycua.com:8443/cmd` (cloud) 12 | 13 | ## POST /cmd 14 | 15 | - Accepts commands as JSON in the request body 16 | - Returns results as a streaming response (text/event-stream) 17 | 18 | ### Request Format 19 | ```json 20 | { 21 | "command": "<command_name>", 22 | "params": { ... } 23 | } 24 | ``` 25 | 26 | ### Required Headers (for cloud containers) 27 | - `X-Container-Name`: Name of the container (cloud only) 28 | - `X-API-Key`: API key for authentication (cloud only) 29 | 30 | ### Example Request (Python) 31 | ```python 32 | import requests 33 | 34 | url = "http://localhost:8000/cmd" 35 | body = {"command": "screenshot", "params": {}} 36 | resp = requests.post(url, json=body) 37 | print(resp.text) 38 | ``` 39 | 40 | ### Example Request (Cloud) 41 | ```python 42 | import requests 43 | 44 | url = "https://your-container.containers.cloud.trycua.com:8443/cmd" 45 | headers = { 46 | "X-Container-Name": "your-container", 47 | "X-API-Key": "your-api-key" 48 | } 49 | body = {"command": "screenshot", "params": {}} 50 | resp = requests.post(url, json=body, headers=headers) 51 | print(resp.text) 52 | ``` 53 | 54 | ### Response Format 55 | Streaming text/event-stream with JSON objects, e.g.: 56 | ``` 57 | data: {"success": true, "content": "..."} 58 | 59 | data: {"success": false, "error": "..."} 60 | ``` 61 | 62 | ### Supported Commands 63 | See [Commands Reference](./Commands) for the full list of commands and parameters. 64 | ``` -------------------------------------------------------------------------------- /libs/typescript/computer/src/interface/factory.ts: -------------------------------------------------------------------------------- ```typescript 1 | /** 2 | * Factory for creating computer interfaces. 3 | */ 4 | 5 | import type { OSType } from '../types'; 6 | import type { BaseComputerInterface } from './base'; 7 | import { LinuxComputerInterface } from './linux'; 8 | import { MacOSComputerInterface } from './macos'; 9 | import { WindowsComputerInterface } from './windows'; 10 | 11 | export const InterfaceFactory = { 12 | /** 13 | * Create an interface for the specified OS. 14 | * 15 | * @param os Operating system type ('macos', 'linux', or 'windows') 16 | * @param ipAddress IP address of the computer to control 17 | * @param apiKey Optional API key for cloud authentication 18 | * @param vmName Optional VM name for cloud authentication 19 | * @returns The appropriate interface for the OS 20 | * @throws Error if the OS type is not supported 21 | */ 22 | createInterfaceForOS( 23 | os: OSType, 24 | ipAddress: string, 25 | apiKey?: string, 26 | vmName?: string 27 | ): BaseComputerInterface { 28 | switch (os) { 29 | case 'macos': 30 | return new MacOSComputerInterface( 31 | ipAddress, 32 | 'lume', 33 | 'lume', 34 | apiKey, 35 | vmName 36 | ); 37 | case 'linux': 38 | return new LinuxComputerInterface( 39 | ipAddress, 40 | 'lume', 41 | 'lume', 42 | apiKey, 43 | vmName 44 | ); 45 | case 'windows': 46 | return new WindowsComputerInterface( 47 | ipAddress, 48 | 'lume', 49 | 'lume', 50 | apiKey, 51 | vmName 52 | ); 53 | default: 54 | throw new Error(`Unsupported OS type: ${os}`); 55 | } 56 | }, 57 | }; 58 | ``` -------------------------------------------------------------------------------- /libs/kasm/Dockerfile: -------------------------------------------------------------------------------- ```dockerfile 1 | FROM kasmweb/core-ubuntu-jammy:1.17.0 2 | USER root 3 | 4 | ENV HOME=/home/kasm-default-profile 5 | ENV STARTUPDIR=/dockerstartup 6 | ENV INST_SCRIPTS=$STARTUPDIR/install 7 | WORKDIR $HOME 8 | 9 | ######### Customize Container Here ########### 10 | 11 | # Installing python, pip, and libraries 12 | RUN apt-get update 13 | RUN apt install -y wget build-essential libncursesw5-dev libssl-dev \ 14 | libsqlite3-dev tk-dev libgdbm-dev libc6-dev libbz2-dev libffi-dev zlib1g-dev 15 | RUN add-apt-repository ppa:deadsnakes/ppa 16 | RUN apt install -y python3.11 python3-pip python3-tk python3-dev \ 17 | gnome-screenshot wmctrl ffmpeg socat xclip 18 | 19 | RUN pip install cua-computer-server 20 | 21 | # Install Firefox 22 | ENV DEBIAN_FRONTEND=noninteractive \ 23 | INST_DIR=$STARTUPDIR/install 24 | COPY ./src/ $INST_DIR 25 | RUN bash ${INST_DIR}/ubuntu/install/firefox/install_firefox.sh 26 | 27 | # Disable SSL requirement 28 | RUN sed -i 's/require_ssl: true/require_ssl: false/g' /usr/share/kasmvnc/kasmvnc_defaults.yaml 29 | RUN sed -i 's/-sslOnly//g' /dockerstartup/vnc_startup.sh 30 | 31 | # Running the python script on startup 32 | RUN echo "/usr/bin/python3 -m computer_server" > $STARTUPDIR/custom_startup.sh \ 33 | && chmod +x $STARTUPDIR/custom_startup.sh 34 | 35 | # Enable sudo support for kasm-user 36 | RUN echo "kasm-user:password" | chpasswd 37 | RUN usermod -aG sudo kasm-user 38 | RUN echo "kasm-user ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers 39 | 40 | ######### End Customizations ########### 41 | 42 | RUN chown 1000:0 $HOME 43 | RUN $STARTUPDIR/set_user_permission.sh $HOME 44 | ENV HOME=/home/kasm-user 45 | WORKDIR $HOME 46 | RUN mkdir -p $HOME && chown -R 1000:0 $HOME 47 | 48 | USER 1000 ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/callbacks/prompt_instructions.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Prompt instructions callback. 3 | 4 | This callback allows simple prompt engineering by pre-pending a user 5 | instructions message to the start of the conversation before each LLM call. 6 | 7 | Usage: 8 | 9 | from agent.callbacks import PromptInstructionsCallback 10 | agent = ComputerAgent( 11 | model="openai/computer-use-preview", 12 | callbacks=[PromptInstructionsCallback("Follow these rules...")] 13 | ) 14 | 15 | """ 16 | 17 | from typing import Any, Dict, List, Optional 18 | 19 | from .base import AsyncCallbackHandler 20 | 21 | 22 | class PromptInstructionsCallback(AsyncCallbackHandler): 23 | """ 24 | Prepend a user instructions message to the message list. 25 | 26 | This is a minimal, non-invasive way to guide the agent's behavior without 27 | modifying agent loops or tools. It works with any provider/loop since it 28 | only alters the messages array before sending to the model. 29 | """ 30 | 31 | def __init__(self, instructions: Optional[str]) -> None: 32 | self.instructions = instructions 33 | 34 | async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: 35 | # Pre-pend instructions message 36 | if not self.instructions: 37 | return messages 38 | 39 | # Ensure we don't duplicate if already present at the front 40 | if messages and isinstance(messages[0], dict): 41 | first = messages[0] 42 | if first.get("role") == "user" and first.get("content") == self.instructions: 43 | return messages 44 | 45 | return [ 46 | {"role": "user", "content": self.instructions}, 47 | ] + messages 48 | ``` -------------------------------------------------------------------------------- /examples/winsandbox_example.py: -------------------------------------------------------------------------------- ```python 1 | """Example of using the Windows Sandbox computer provider. 2 | 3 | Learn more at: https://learn.microsoft.com/en-us/windows/security/application-security/application-isolation/windows-sandbox/ 4 | """ 5 | 6 | import asyncio 7 | from computer import Computer 8 | 9 | async def main(): 10 | """Test the Windows Sandbox provider.""" 11 | 12 | # Create a computer instance using Windows Sandbox 13 | computer = Computer( 14 | provider_type="winsandbox", 15 | os_type="windows", 16 | memory="4GB", 17 | # ephemeral=True, # Always true for Windows Sandbox 18 | ) 19 | 20 | try: 21 | print("Starting Windows Sandbox...") 22 | await computer.run() 23 | 24 | print("Windows Sandbox is ready!") 25 | print(f"IP Address: {await computer.get_ip()}") 26 | 27 | # Test basic functionality 28 | print("Testing basic functionality...") 29 | screenshot = await computer.interface.screenshot() 30 | print(f"Screenshot taken: {len(screenshot)} bytes") 31 | 32 | # Test running a command 33 | print("Testing command execution...") 34 | result = await computer.interface.run_command("echo Hello from Windows Sandbox!") 35 | print(f"Command output: {result.stdout}") 36 | 37 | print("Press any key to continue...") 38 | input() 39 | 40 | except Exception as e: 41 | print(f"Error: {e}") 42 | import traceback 43 | traceback.print_exc() 44 | 45 | finally: 46 | print("Stopping Windows Sandbox...") 47 | await computer.stop() 48 | print("Windows Sandbox stopped.") 49 | 50 | if __name__ == "__main__": 51 | asyncio.run(main()) 52 | ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/callbacks/agent-lifecycle.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Agent Lifecycle 3 | description: Agent callback lifecycle and hooks 4 | --- 5 | 6 | # Callbacks 7 | 8 | Callbacks provide hooks into the agent lifecycle for extensibility. They're called in a specific order during agent execution. 9 | 10 | ## Callback Lifecycle 11 | 12 | ### 1. `on_run_start(kwargs, old_items)` 13 | Called once when agent run begins. Initialize tracking, logging, or state. 14 | 15 | ### 2. `on_run_continue(kwargs, old_items, new_items)` → bool 16 | Called before each iteration. Return `False` to stop execution (e.g., budget limits). 17 | 18 | ### 3. `on_llm_start(messages)` → messages 19 | Preprocess messages before LLM call. Use for PII anonymization, image retention. 20 | 21 | ### 4. `on_api_start(kwargs)` 22 | Called before each LLM API call. 23 | 24 | ### 5. `on_api_end(kwargs, result)` 25 | Called after each LLM API call completes. 26 | 27 | ### 6. `on_usage(usage)` 28 | Called when usage information is received from LLM. 29 | 30 | ### 7. `on_llm_end(messages)` → messages 31 | Postprocess messages after LLM call. Use for PII deanonymization. 32 | 33 | ### 8. `on_responses(kwargs, responses)` 34 | Called when responses are received from agent loop. 35 | 36 | ### 9. Response-specific hooks: 37 | - `on_text(item)` - Text messages 38 | - `on_computer_call_start(item)` - Before computer actions 39 | - `on_computer_call_end(item, result)` - After computer actions 40 | - `on_function_call_start(item)` - Before function calls 41 | - `on_function_call_end(item, result)` - After function calls 42 | - `on_screenshot(screenshot, name)` - When screenshots are taken 43 | 44 | ### 10. `on_run_end(kwargs, old_items, new_items)` 45 | Called when agent run completes. Finalize tracking, save trajectories. ``` -------------------------------------------------------------------------------- /libs/python/computer-server/computer_server/diorama/macos.py: -------------------------------------------------------------------------------- ```python 1 | import platform 2 | import sys 3 | import platform 4 | import inspect 5 | from computer_server.diorama.diorama import Diorama 6 | from computer_server.diorama.base import BaseDioramaHandler 7 | from typing import Optional 8 | 9 | class MacOSDioramaHandler(BaseDioramaHandler): 10 | """Handler for Diorama commands on macOS, using local diorama module.""" 11 | async def diorama_cmd(self, action: str, arguments: Optional[dict] = None) -> dict: 12 | if platform.system().lower() != "darwin": 13 | return {"success": False, "error": "Diorama is only supported on macOS."} 14 | try: 15 | app_list = arguments.get("app_list") if arguments else None 16 | if not app_list: 17 | return {"success": False, "error": "Missing 'app_list' in arguments"} 18 | diorama = Diorama(app_list) 19 | interface = diorama.interface 20 | if not hasattr(interface, action): 21 | return {"success": False, "error": f"Unknown diorama action: {action}"} 22 | method = getattr(interface, action) 23 | # Remove app_list from arguments before calling the method 24 | filtered_arguments = dict(arguments) 25 | filtered_arguments.pop("app_list", None) 26 | if inspect.iscoroutinefunction(method): 27 | result = await method(**(filtered_arguments or {})) 28 | else: 29 | result = method(**(filtered_arguments or {})) 30 | return {"success": True, "result": result} 31 | except Exception as e: 32 | import traceback 33 | return {"success": False, "error": str(e), "trace": traceback.format_exc()} 34 | ``` -------------------------------------------------------------------------------- /docs/src/lib/source.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { docs } from '@/.source'; 2 | import { loader } from 'fumadocs-core/source'; 3 | import { icons } from 'lucide-react'; 4 | import { createElement } from 'react'; 5 | 6 | import fs from 'node:fs/promises'; 7 | import path from 'node:path'; 8 | 9 | /** 10 | * Returns available API doc versions for a given section (e.g., 'agent'). 11 | * Each version is an object: { label, slug } 12 | * - 'Current' (index.mdx) → slug: [] 13 | * - '[version].mdx' → slug: [version] 14 | */ 15 | export async function getApiVersions( 16 | section: string 17 | ): Promise<{ label: string; slug: string[] }[]> { 18 | const dir = path.join(process.cwd(), 'content/docs/api', section); 19 | let files: string[] = []; 20 | try { 21 | files = (await fs.readdir(dir)).filter((f) => f.endsWith('.mdx')); 22 | } catch (_e) { 23 | return []; 24 | } 25 | const versions = files.map((file) => { 26 | if (file === 'index.mdx') { 27 | return { label: 'Current', slug: [] }; 28 | } 29 | const version = file.replace(/\.mdx$/, ''); 30 | return { label: version, slug: [version] }; 31 | }); 32 | // Always put 'Current' first, then others sorted descending (semver-ish) 33 | return [ 34 | ...versions.filter((v) => v.label === 'Current'), 35 | ...versions 36 | .filter((v) => v.label !== 'Current') 37 | .sort((a, b) => 38 | b.label.localeCompare(a.label, undefined, { numeric: true }) 39 | ), 40 | ]; 41 | } 42 | 43 | // See https://fumadocs.vercel.app/docs/headless/source-api for more info 44 | export const source = loader({ 45 | // it assigns a URL to your pages 46 | baseUrl: '/', 47 | source: docs.toFumadocsSource(), 48 | icon(icon) { 49 | if (!icon) return; 50 | if (icon in icons) return createElement(icons[icon as keyof typeof icons]); 51 | }, 52 | }); 53 | ``` -------------------------------------------------------------------------------- /libs/python/computer/computer/interface/factory.py: -------------------------------------------------------------------------------- ```python 1 | """Factory for creating computer interfaces.""" 2 | 3 | from typing import Literal, Optional 4 | from .base import BaseComputerInterface 5 | 6 | class InterfaceFactory: 7 | """Factory for creating OS-specific computer interfaces.""" 8 | 9 | @staticmethod 10 | def create_interface_for_os( 11 | os: Literal['macos', 'linux', 'windows'], 12 | ip_address: str, 13 | api_key: Optional[str] = None, 14 | vm_name: Optional[str] = None 15 | ) -> BaseComputerInterface: 16 | """Create an interface for the specified OS. 17 | 18 | Args: 19 | os: Operating system type ('macos', 'linux', or 'windows') 20 | ip_address: IP address of the computer to control 21 | api_key: Optional API key for cloud authentication 22 | vm_name: Optional VM name for cloud authentication 23 | 24 | Returns: 25 | BaseComputerInterface: The appropriate interface for the OS 26 | 27 | Raises: 28 | ValueError: If the OS type is not supported 29 | """ 30 | # Import implementations here to avoid circular imports 31 | from .macos import MacOSComputerInterface 32 | from .linux import LinuxComputerInterface 33 | from .windows import WindowsComputerInterface 34 | 35 | if os == 'macos': 36 | return MacOSComputerInterface(ip_address, api_key=api_key, vm_name=vm_name) 37 | elif os == 'linux': 38 | return LinuxComputerInterface(ip_address, api_key=api_key, vm_name=vm_name) 39 | elif os == 'windows': 40 | return WindowsComputerInterface(ip_address, api_key=api_key, vm_name=vm_name) 41 | else: 42 | raise ValueError(f"Unsupported OS type: {os}") 43 | ``` -------------------------------------------------------------------------------- /libs/lume/src/VM/VMDetails.swift: -------------------------------------------------------------------------------- ```swift 1 | import Foundation 2 | import Network 3 | 4 | struct DiskSize: Codable { 5 | let allocated: UInt64 6 | let total: UInt64 7 | } 8 | 9 | extension DiskSize { 10 | var formattedAllocated: String { 11 | formatBytes(allocated) 12 | } 13 | 14 | var formattedTotal: String { 15 | formatBytes(total) 16 | } 17 | 18 | private func formatBytes(_ bytes: UInt64) -> String { 19 | let units = ["B", "KB", "MB", "GB", "TB"] 20 | var size = Double(bytes) 21 | var unitIndex = 0 22 | 23 | while size >= 1024 && unitIndex < units.count - 1 { 24 | size /= 1024 25 | unitIndex += 1 26 | } 27 | 28 | return String(format: "%.1f%@", size, units[unitIndex]) 29 | } 30 | } 31 | 32 | struct VMDetails: Codable { 33 | let name: String 34 | let os: String 35 | let cpuCount: Int 36 | let memorySize: UInt64 37 | let diskSize: DiskSize 38 | let display: String 39 | let status: String 40 | let vncUrl: String? 41 | let ipAddress: String? 42 | let locationName: String 43 | let sharedDirectories: [SharedDirectory]? 44 | 45 | init( 46 | name: String, 47 | os: String, 48 | cpuCount: Int, 49 | memorySize: UInt64, 50 | diskSize: DiskSize, 51 | display: String, 52 | status: String, 53 | vncUrl: String?, 54 | ipAddress: String?, 55 | locationName: String, 56 | sharedDirectories: [SharedDirectory]? = nil 57 | ) { 58 | self.name = name 59 | self.os = os 60 | self.cpuCount = cpuCount 61 | self.memorySize = memorySize 62 | self.diskSize = diskSize 63 | self.display = display 64 | self.status = status 65 | self.vncUrl = vncUrl 66 | self.ipAddress = ipAddress 67 | self.locationName = locationName 68 | self.sharedDirectories = sharedDirectories 69 | } 70 | } 71 | ```