This is page 3 of 21. Use http://codebase.md/trycua/cua?lines=true&page={x} to view the full context. # Directory Structure ``` ├── .all-contributorsrc ├── .cursorignore ├── .devcontainer │ ├── devcontainer.json │ ├── post-install.sh │ └── README.md ├── .dockerignore ├── .gitattributes ├── .github │ ├── FUNDING.yml │ ├── scripts │ │ ├── get_pyproject_version.py │ │ └── tests │ │ ├── __init__.py │ │ ├── README.md │ │ └── test_get_pyproject_version.py │ └── workflows │ ├── ci-lume.yml │ ├── docker-publish-kasm.yml │ ├── docker-publish-xfce.yml │ ├── docker-reusable-publish.yml │ ├── npm-publish-computer.yml │ ├── npm-publish-core.yml │ ├── publish-lume.yml │ ├── pypi-publish-agent.yml │ ├── pypi-publish-computer-server.yml │ ├── pypi-publish-computer.yml │ ├── pypi-publish-core.yml │ ├── pypi-publish-mcp-server.yml │ ├── pypi-publish-pylume.yml │ ├── pypi-publish-som.yml │ ├── pypi-reusable-publish.yml │ └── test-validation-script.yml ├── .gitignore ├── .vscode │ ├── docs.code-workspace │ ├── launch.json │ ├── libs-ts.code-workspace │ ├── lume.code-workspace │ ├── lumier.code-workspace │ ├── py.code-workspace │ └── settings.json ├── blog │ ├── app-use.md │ ├── assets │ │ ├── composite-agents.png │ │ ├── docker-ubuntu-support.png │ │ ├── hack-booth.png │ │ ├── hack-closing-ceremony.jpg │ │ ├── hack-cua-ollama-hud.jpeg │ │ ├── hack-leaderboard.png │ │ ├── hack-the-north.png │ │ ├── hack-winners.jpeg │ │ ├── hack-workshop.jpeg │ │ ├── hud-agent-evals.png │ │ └── trajectory-viewer.jpeg │ ├── bringing-computer-use-to-the-web.md │ ├── build-your-own-operator-on-macos-1.md │ ├── build-your-own-operator-on-macos-2.md │ ├── composite-agents.md │ ├── cua-hackathon.md │ ├── hack-the-north.md │ ├── hud-agent-evals.md │ ├── human-in-the-loop.md │ ├── introducing-cua-cloud-containers.md │ ├── lume-to-containerization.md │ ├── sandboxed-python-execution.md │ ├── training-computer-use-models-trajectories-1.md │ ├── trajectory-viewer.md │ ├── ubuntu-docker-support.md │ └── windows-sandbox.md ├── CONTRIBUTING.md ├── Development.md ├── Dockerfile ├── docs │ ├── .gitignore │ ├── .prettierrc │ ├── content │ │ └── docs │ │ ├── agent-sdk │ │ │ ├── agent-loops.mdx │ │ │ ├── benchmarks │ │ │ │ ├── index.mdx │ │ │ │ ├── interactive.mdx │ │ │ │ ├── introduction.mdx │ │ │ │ ├── meta.json │ │ │ │ ├── osworld-verified.mdx │ │ │ │ ├── screenspot-pro.mdx │ │ │ │ └── screenspot-v2.mdx │ │ │ ├── callbacks │ │ │ │ ├── agent-lifecycle.mdx │ │ │ │ ├── cost-saving.mdx │ │ │ │ ├── index.mdx │ │ │ │ ├── logging.mdx │ │ │ │ ├── meta.json │ │ │ │ ├── pii-anonymization.mdx │ │ │ │ └── trajectories.mdx │ │ │ ├── chat-history.mdx │ │ │ ├── custom-computer-handlers.mdx │ │ │ ├── custom-tools.mdx │ │ │ ├── customizing-computeragent.mdx │ │ │ ├── integrations │ │ │ │ ├── hud.mdx │ │ │ │ └── meta.json │ │ │ ├── message-format.mdx │ │ │ ├── meta.json │ │ │ ├── migration-guide.mdx │ │ │ ├── prompt-caching.mdx │ │ │ ├── supported-agents │ │ │ │ ├── composed-agents.mdx │ │ │ │ ├── computer-use-agents.mdx │ │ │ │ ├── grounding-models.mdx │ │ │ │ ├── human-in-the-loop.mdx │ │ │ │ └── meta.json │ │ │ ├── supported-model-providers │ │ │ │ ├── index.mdx │ │ │ │ └── local-models.mdx │ │ │ └── usage-tracking.mdx │ │ ├── computer-sdk │ │ │ ├── cloud-vm-management.mdx │ │ │ ├── commands.mdx │ │ │ ├── computer-ui.mdx │ │ │ ├── computers.mdx │ │ │ ├── meta.json │ │ │ └── sandboxed-python.mdx │ │ ├── index.mdx │ │ ├── libraries │ │ │ ├── agent │ │ │ │ └── index.mdx │ │ │ ├── computer │ │ │ │ └── index.mdx │ │ │ ├── computer-server │ │ │ │ ├── Commands.mdx │ │ │ │ ├── index.mdx │ │ │ │ ├── REST-API.mdx │ │ │ │ └── WebSocket-API.mdx │ │ │ ├── core │ │ │ │ └── index.mdx │ │ │ ├── lume │ │ │ │ ├── cli-reference.mdx │ │ │ │ ├── faq.md │ │ │ │ ├── http-api.mdx │ │ │ │ ├── index.mdx │ │ │ │ ├── installation.mdx │ │ │ │ ├── meta.json │ │ │ │ └── prebuilt-images.mdx │ │ │ ├── lumier │ │ │ │ ├── building-lumier.mdx │ │ │ │ ├── docker-compose.mdx │ │ │ │ ├── docker.mdx │ │ │ │ ├── index.mdx │ │ │ │ ├── installation.mdx │ │ │ │ └── meta.json │ │ │ ├── mcp-server │ │ │ │ ├── client-integrations.mdx │ │ │ │ ├── configuration.mdx │ │ │ │ ├── index.mdx │ │ │ │ ├── installation.mdx │ │ │ │ ├── llm-integrations.mdx │ │ │ │ ├── meta.json │ │ │ │ ├── tools.mdx │ │ │ │ └── usage.mdx │ │ │ └── som │ │ │ ├── configuration.mdx │ │ │ └── index.mdx │ │ ├── meta.json │ │ ├── quickstart-cli.mdx │ │ ├── quickstart-devs.mdx │ │ └── telemetry.mdx │ ├── next.config.mjs │ ├── package-lock.json │ ├── package.json │ ├── pnpm-lock.yaml │ ├── postcss.config.mjs │ ├── public │ │ └── img │ │ ├── agent_gradio_ui.png │ │ ├── agent.png │ │ ├── cli.png │ │ ├── computer.png │ │ ├── som_box_threshold.png │ │ └── som_iou_threshold.png │ ├── README.md │ ├── source.config.ts │ ├── src │ │ ├── app │ │ │ ├── (home) │ │ │ │ ├── [[...slug]] │ │ │ │ │ └── page.tsx │ │ │ │ └── layout.tsx │ │ │ ├── api │ │ │ │ └── search │ │ │ │ └── route.ts │ │ │ ├── favicon.ico │ │ │ ├── global.css │ │ │ ├── layout.config.tsx │ │ │ ├── layout.tsx │ │ │ ├── llms.mdx │ │ │ │ └── [[...slug]] │ │ │ │ └── route.ts │ │ │ └── llms.txt │ │ │ └── route.ts │ │ ├── assets │ │ │ ├── discord-black.svg │ │ │ ├── discord-white.svg │ │ │ ├── logo-black.svg │ │ │ └── logo-white.svg │ │ ├── components │ │ │ ├── iou.tsx │ │ │ └── mermaid.tsx │ │ ├── lib │ │ │ ├── llms.ts │ │ │ └── source.ts │ │ └── mdx-components.tsx │ └── tsconfig.json ├── examples │ ├── agent_examples.py │ ├── agent_ui_examples.py │ ├── cloud_api_examples.py │ ├── computer_examples_windows.py │ ├── computer_examples.py │ ├── computer_ui_examples.py │ ├── computer-example-ts │ │ ├── .env.example │ │ ├── .gitignore │ │ ├── .prettierrc │ │ ├── package-lock.json │ │ ├── package.json │ │ ├── pnpm-lock.yaml │ │ ├── README.md │ │ ├── src │ │ │ ├── helpers.ts │ │ │ └── index.ts │ │ └── tsconfig.json │ ├── docker_examples.py │ ├── evals │ │ ├── hud_eval_examples.py │ │ └── wikipedia_most_linked.txt │ ├── pylume_examples.py │ ├── sandboxed_functions_examples.py │ ├── som_examples.py │ ├── utils.py │ └── winsandbox_example.py ├── img │ ├── agent_gradio_ui.png │ ├── agent.png │ ├── cli.png │ ├── computer.png │ ├── logo_black.png │ └── logo_white.png ├── libs │ ├── kasm │ │ ├── Dockerfile │ │ ├── LICENSE │ │ ├── README.md │ │ └── src │ │ └── ubuntu │ │ └── install │ │ └── firefox │ │ ├── custom_startup.sh │ │ ├── firefox.desktop │ │ └── install_firefox.sh │ ├── lume │ │ ├── .cursorignore │ │ ├── CONTRIBUTING.md │ │ ├── Development.md │ │ ├── img │ │ │ └── cli.png │ │ ├── Package.resolved │ │ ├── Package.swift │ │ ├── README.md │ │ ├── resources │ │ │ └── lume.entitlements │ │ ├── scripts │ │ │ ├── build │ │ │ │ ├── build-debug.sh │ │ │ │ ├── build-release-notarized.sh │ │ │ │ └── build-release.sh │ │ │ └── install.sh │ │ ├── src │ │ │ ├── Commands │ │ │ │ ├── Clone.swift │ │ │ │ ├── Config.swift │ │ │ │ ├── Create.swift │ │ │ │ ├── Delete.swift │ │ │ │ ├── Get.swift │ │ │ │ ├── Images.swift │ │ │ │ ├── IPSW.swift │ │ │ │ ├── List.swift │ │ │ │ ├── Logs.swift │ │ │ │ ├── Options │ │ │ │ │ └── FormatOption.swift │ │ │ │ ├── Prune.swift │ │ │ │ ├── Pull.swift │ │ │ │ ├── Push.swift │ │ │ │ ├── Run.swift │ │ │ │ ├── Serve.swift │ │ │ │ ├── Set.swift │ │ │ │ └── Stop.swift │ │ │ ├── ContainerRegistry │ │ │ │ ├── ImageContainerRegistry.swift │ │ │ │ ├── ImageList.swift │ │ │ │ └── ImagesPrinter.swift │ │ │ ├── Errors │ │ │ │ └── Errors.swift │ │ │ ├── FileSystem │ │ │ │ ├── Home.swift │ │ │ │ ├── Settings.swift │ │ │ │ ├── VMConfig.swift │ │ │ │ ├── VMDirectory.swift │ │ │ │ └── VMLocation.swift │ │ │ ├── LumeController.swift │ │ │ ├── Main.swift │ │ │ ├── Server │ │ │ │ ├── Handlers.swift │ │ │ │ ├── HTTP.swift │ │ │ │ ├── Requests.swift │ │ │ │ ├── Responses.swift │ │ │ │ └── Server.swift │ │ │ ├── Utils │ │ │ │ ├── CommandRegistry.swift │ │ │ │ ├── CommandUtils.swift │ │ │ │ ├── Logger.swift │ │ │ │ ├── NetworkUtils.swift │ │ │ │ ├── Path.swift │ │ │ │ ├── ProcessRunner.swift │ │ │ │ ├── ProgressLogger.swift │ │ │ │ ├── String.swift │ │ │ │ └── Utils.swift │ │ │ ├── Virtualization │ │ │ │ ├── DarwinImageLoader.swift │ │ │ │ ├── DHCPLeaseParser.swift │ │ │ │ ├── ImageLoaderFactory.swift │ │ │ │ └── VMVirtualizationService.swift │ │ │ ├── VM │ │ │ │ ├── DarwinVM.swift │ │ │ │ ├── LinuxVM.swift │ │ │ │ ├── VM.swift │ │ │ │ ├── VMDetails.swift │ │ │ │ ├── VMDetailsPrinter.swift │ │ │ │ ├── VMDisplayResolution.swift │ │ │ │ └── VMFactory.swift │ │ │ └── VNC │ │ │ ├── PassphraseGenerator.swift │ │ │ └── VNCService.swift │ │ └── tests │ │ ├── Mocks │ │ │ ├── MockVM.swift │ │ │ ├── MockVMVirtualizationService.swift │ │ │ └── MockVNCService.swift │ │ ├── VM │ │ │ └── VMDetailsPrinterTests.swift │ │ ├── VMTests.swift │ │ ├── VMVirtualizationServiceTests.swift │ │ └── VNCServiceTests.swift │ ├── lumier │ │ ├── .dockerignore │ │ ├── Dockerfile │ │ ├── README.md │ │ └── src │ │ ├── bin │ │ │ └── entry.sh │ │ ├── config │ │ │ └── constants.sh │ │ ├── hooks │ │ │ └── on-logon.sh │ │ └── lib │ │ ├── utils.sh │ │ └── vm.sh │ ├── python │ │ ├── agent │ │ │ ├── .bumpversion.cfg │ │ │ ├── agent │ │ │ │ ├── __init__.py │ │ │ │ ├── __main__.py │ │ │ │ ├── adapters │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── huggingfacelocal_adapter.py │ │ │ │ │ ├── human_adapter.py │ │ │ │ │ ├── mlxvlm_adapter.py │ │ │ │ │ └── models │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── generic.py │ │ │ │ │ ├── internvl.py │ │ │ │ │ ├── opencua.py │ │ │ │ │ └── qwen2_5_vl.py │ │ │ │ ├── agent.py │ │ │ │ ├── callbacks │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── budget_manager.py │ │ │ │ │ ├── image_retention.py │ │ │ │ │ ├── logging.py │ │ │ │ │ ├── operator_validator.py │ │ │ │ │ ├── pii_anonymization.py │ │ │ │ │ ├── prompt_instructions.py │ │ │ │ │ ├── telemetry.py │ │ │ │ │ └── trajectory_saver.py │ │ │ │ ├── cli.py │ │ │ │ ├── computers │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── cua.py │ │ │ │ │ └── custom.py │ │ │ │ ├── decorators.py │ │ │ │ ├── human_tool │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── __main__.py │ │ │ │ │ ├── server.py │ │ │ │ │ └── ui.py │ │ │ │ ├── integrations │ │ │ │ │ └── hud │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── agent.py │ │ │ │ │ └── proxy.py │ │ │ │ ├── loops │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── anthropic.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── composed_grounded.py │ │ │ │ │ ├── gemini.py │ │ │ │ │ ├── glm45v.py │ │ │ │ │ ├── gta1.py │ │ │ │ │ ├── holo.py │ │ │ │ │ ├── internvl.py │ │ │ │ │ ├── model_types.csv │ │ │ │ │ ├── moondream3.py │ │ │ │ │ ├── omniparser.py │ │ │ │ │ ├── openai.py │ │ │ │ │ ├── opencua.py │ │ │ │ │ └── uitars.py │ │ │ │ ├── proxy │ │ │ │ │ ├── examples.py │ │ │ │ │ └── handlers.py │ │ │ │ ├── responses.py │ │ │ │ ├── types.py │ │ │ │ └── ui │ │ │ │ ├── __init__.py │ │ │ │ ├── __main__.py │ │ │ │ └── gradio │ │ │ │ ├── __init__.py │ │ │ │ ├── app.py │ │ │ │ └── ui_components.py │ │ │ ├── benchmarks │ │ │ │ ├── .gitignore │ │ │ │ ├── contrib.md │ │ │ │ ├── interactive.py │ │ │ │ ├── models │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ └── gta1.py │ │ │ │ ├── README.md │ │ │ │ ├── ss-pro.py │ │ │ │ ├── ss-v2.py │ │ │ │ └── utils.py │ │ │ ├── example.py │ │ │ ├── pyproject.toml │ │ │ └── README.md │ │ ├── computer │ │ │ ├── .bumpversion.cfg │ │ │ ├── computer │ │ │ │ ├── __init__.py │ │ │ │ ├── computer.py │ │ │ │ ├── diorama_computer.py │ │ │ │ ├── helpers.py │ │ │ │ ├── interface │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── factory.py │ │ │ │ │ ├── generic.py │ │ │ │ │ ├── linux.py │ │ │ │ │ ├── macos.py │ │ │ │ │ ├── models.py │ │ │ │ │ └── windows.py │ │ │ │ ├── logger.py │ │ │ │ ├── models.py │ │ │ │ ├── providers │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── cloud │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── provider.py │ │ │ │ │ ├── docker │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── provider.py │ │ │ │ │ ├── factory.py │ │ │ │ │ ├── lume │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── provider.py │ │ │ │ │ ├── lume_api.py │ │ │ │ │ ├── lumier │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── provider.py │ │ │ │ │ ├── types.py │ │ │ │ │ └── winsandbox │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── provider.py │ │ │ │ │ └── setup_script.ps1 │ │ │ │ ├── ui │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── __main__.py │ │ │ │ │ └── gradio │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── app.py │ │ │ │ └── utils.py │ │ │ ├── poetry.toml │ │ │ ├── pyproject.toml │ │ │ └── README.md │ │ ├── computer-server │ │ │ ├── .bumpversion.cfg │ │ │ ├── computer_server │ │ │ │ ├── __init__.py │ │ │ │ ├── __main__.py │ │ │ │ ├── cli.py │ │ │ │ ├── diorama │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── diorama_computer.py │ │ │ │ │ ├── diorama.py │ │ │ │ │ ├── draw.py │ │ │ │ │ ├── macos.py │ │ │ │ │ └── safezone.py │ │ │ │ ├── handlers │ │ │ │ │ ├── base.py │ │ │ │ │ ├── factory.py │ │ │ │ │ ├── generic.py │ │ │ │ │ ├── linux.py │ │ │ │ │ ├── macos.py │ │ │ │ │ └── windows.py │ │ │ │ ├── main.py │ │ │ │ ├── server.py │ │ │ │ └── watchdog.py │ │ │ ├── examples │ │ │ │ ├── __init__.py │ │ │ │ └── usage_example.py │ │ │ ├── pyproject.toml │ │ │ ├── README.md │ │ │ ├── run_server.py │ │ │ └── test_connection.py │ │ ├── core │ │ │ ├── .bumpversion.cfg │ │ │ ├── core │ │ │ │ ├── __init__.py │ │ │ │ └── telemetry │ │ │ │ ├── __init__.py │ │ │ │ └── posthog.py │ │ │ ├── poetry.toml │ │ │ ├── pyproject.toml │ │ │ └── README.md │ │ ├── mcp-server │ │ │ ├── .bumpversion.cfg │ │ │ ├── CONCURRENT_SESSIONS.md │ │ │ ├── mcp_server │ │ │ │ ├── __init__.py │ │ │ │ ├── __main__.py │ │ │ │ ├── server.py │ │ │ │ └── session_manager.py │ │ │ ├── pdm.lock │ │ │ ├── pyproject.toml │ │ │ ├── README.md │ │ │ └── scripts │ │ │ ├── install_mcp_server.sh │ │ │ └── start_mcp_server.sh │ │ ├── pylume │ │ │ ├── __init__.py │ │ │ ├── .bumpversion.cfg │ │ │ ├── pylume │ │ │ │ ├── __init__.py │ │ │ │ ├── client.py │ │ │ │ ├── exceptions.py │ │ │ │ ├── lume │ │ │ │ ├── models.py │ │ │ │ ├── pylume.py │ │ │ │ └── server.py │ │ │ ├── pyproject.toml │ │ │ └── README.md │ │ └── som │ │ ├── .bumpversion.cfg │ │ ├── LICENSE │ │ ├── poetry.toml │ │ ├── pyproject.toml │ │ ├── README.md │ │ ├── som │ │ │ ├── __init__.py │ │ │ ├── detect.py │ │ │ ├── detection.py │ │ │ ├── models.py │ │ │ ├── ocr.py │ │ │ ├── util │ │ │ │ └── utils.py │ │ │ └── visualization.py │ │ └── tests │ │ └── test_omniparser.py │ ├── typescript │ │ ├── .gitignore │ │ ├── .nvmrc │ │ ├── agent │ │ │ ├── examples │ │ │ │ ├── playground-example.html │ │ │ │ └── README.md │ │ │ ├── package.json │ │ │ ├── README.md │ │ │ ├── src │ │ │ │ ├── client.ts │ │ │ │ ├── index.ts │ │ │ │ └── types.ts │ │ │ ├── tests │ │ │ │ └── client.test.ts │ │ │ ├── tsconfig.json │ │ │ ├── tsdown.config.ts │ │ │ └── vitest.config.ts │ │ ├── biome.json │ │ ├── computer │ │ │ ├── .editorconfig │ │ │ ├── .gitattributes │ │ │ ├── .gitignore │ │ │ ├── LICENSE │ │ │ ├── package.json │ │ │ ├── README.md │ │ │ ├── src │ │ │ │ ├── computer │ │ │ │ │ ├── index.ts │ │ │ │ │ ├── providers │ │ │ │ │ │ ├── base.ts │ │ │ │ │ │ ├── cloud.ts │ │ │ │ │ │ └── index.ts │ │ │ │ │ └── types.ts │ │ │ │ ├── index.ts │ │ │ │ ├── interface │ │ │ │ │ ├── base.ts │ │ │ │ │ ├── factory.ts │ │ │ │ │ ├── index.ts │ │ │ │ │ ├── linux.ts │ │ │ │ │ ├── macos.ts │ │ │ │ │ └── windows.ts │ │ │ │ └── types.ts │ │ │ ├── tests │ │ │ │ ├── computer │ │ │ │ │ └── cloud.test.ts │ │ │ │ ├── interface │ │ │ │ │ ├── factory.test.ts │ │ │ │ │ ├── index.test.ts │ │ │ │ │ ├── linux.test.ts │ │ │ │ │ ├── macos.test.ts │ │ │ │ │ └── windows.test.ts │ │ │ │ └── setup.ts │ │ │ ├── tsconfig.json │ │ │ ├── tsdown.config.ts │ │ │ └── vitest.config.ts │ │ ├── core │ │ │ ├── .editorconfig │ │ │ ├── .gitattributes │ │ │ ├── .gitignore │ │ │ ├── LICENSE │ │ │ ├── package.json │ │ │ ├── README.md │ │ │ ├── src │ │ │ │ ├── index.ts │ │ │ │ └── telemetry │ │ │ │ ├── clients │ │ │ │ │ ├── index.ts │ │ │ │ │ └── posthog.ts │ │ │ │ └── index.ts │ │ │ ├── tests │ │ │ │ └── telemetry.test.ts │ │ │ ├── tsconfig.json │ │ │ ├── tsdown.config.ts │ │ │ └── vitest.config.ts │ │ ├── package.json │ │ ├── pnpm-lock.yaml │ │ ├── pnpm-workspace.yaml │ │ └── README.md │ └── xfce │ ├── .dockerignore │ ├── .gitignore │ ├── Dockerfile │ ├── README.md │ └── src │ ├── scripts │ │ ├── resize-display.sh │ │ ├── start-computer-server.sh │ │ ├── start-novnc.sh │ │ ├── start-vnc.sh │ │ └── xstartup.sh │ ├── supervisor │ │ └── supervisord.conf │ └── xfce-config │ ├── helpers.rc │ ├── xfce4-power-manager.xml │ └── xfce4-session.xml ├── LICENSE.md ├── Makefile ├── notebooks │ ├── agent_nb.ipynb │ ├── blog │ │ ├── build-your-own-operator-on-macos-1.ipynb │ │ └── build-your-own-operator-on-macos-2.ipynb │ ├── composite_agents_docker_nb.ipynb │ ├── computer_nb.ipynb │ ├── computer_server_nb.ipynb │ ├── customizing_computeragent.ipynb │ ├── eval_osworld.ipynb │ ├── ollama_nb.ipynb │ ├── pylume_nb.ipynb │ ├── README.md │ ├── sota_hackathon_cloud.ipynb │ └── sota_hackathon.ipynb ├── pdm.lock ├── pyproject.toml ├── pyrightconfig.json ├── README.md ├── samples │ └── community │ ├── global-online │ │ └── README.md │ └── hack-the-north │ └── README.md ├── scripts │ ├── build-uv.sh │ ├── build.ps1 │ ├── build.sh │ ├── cleanup.sh │ ├── playground-docker.sh │ ├── playground.sh │ └── run-docker-dev.sh └── tests ├── pytest.ini ├── shell_cmd.py ├── test_files.py ├── test_mcp_server_session_management.py ├── test_mcp_server_streaming.py ├── test_shell_bash.py ├── test_telemetry.py ├── test_venv.py └── test_watchdog.py ``` # Files -------------------------------------------------------------------------------- /examples/winsandbox_example.py: -------------------------------------------------------------------------------- ```python 1 | """Example of using the Windows Sandbox computer provider. 2 | 3 | Learn more at: https://learn.microsoft.com/en-us/windows/security/application-security/application-isolation/windows-sandbox/ 4 | """ 5 | 6 | import asyncio 7 | from computer import Computer 8 | 9 | async def main(): 10 | """Test the Windows Sandbox provider.""" 11 | 12 | # Create a computer instance using Windows Sandbox 13 | computer = Computer( 14 | provider_type="winsandbox", 15 | os_type="windows", 16 | memory="4GB", 17 | # ephemeral=True, # Always true for Windows Sandbox 18 | ) 19 | 20 | try: 21 | print("Starting Windows Sandbox...") 22 | await computer.run() 23 | 24 | print("Windows Sandbox is ready!") 25 | print(f"IP Address: {await computer.get_ip()}") 26 | 27 | # Test basic functionality 28 | print("Testing basic functionality...") 29 | screenshot = await computer.interface.screenshot() 30 | print(f"Screenshot taken: {len(screenshot)} bytes") 31 | 32 | # Test running a command 33 | print("Testing command execution...") 34 | result = await computer.interface.run_command("echo Hello from Windows Sandbox!") 35 | print(f"Command output: {result.stdout}") 36 | 37 | print("Press any key to continue...") 38 | input() 39 | 40 | except Exception as e: 41 | print(f"Error: {e}") 42 | import traceback 43 | traceback.print_exc() 44 | 45 | finally: 46 | print("Stopping Windows Sandbox...") 47 | await computer.stop() 48 | print("Windows Sandbox stopped.") 49 | 50 | if __name__ == "__main__": 51 | asyncio.run(main()) 52 | ``` -------------------------------------------------------------------------------- /libs/python/mcp-server/scripts/start_mcp_server.sh: -------------------------------------------------------------------------------- ```bash 1 | #!/usr/bin/env bash 2 | set -Eeuo pipefail 3 | 4 | # --- Resolve repo root from this script's location --- 5 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" 6 | CUA_REPO_DIR="$( cd "$SCRIPT_DIR/../../../.." &> /dev/null && pwd )" 7 | 8 | # --- Choose a Python interpreter (prefer repo-root venv) --- 9 | CANDIDATES=( 10 | "$CUA_REPO_DIR/.venv/bin/python" 11 | "$CUA_REPO_DIR/libs/.venv/bin/python" 12 | "$(command -v python3 || true)" 13 | "$(command -v python || true)" 14 | ) 15 | 16 | PYTHON_PATH="" 17 | for p in "${CANDIDATES[@]}"; do 18 | if [[ -n "$p" && -x "$p" ]]; then 19 | PYTHON_PATH="$p" 20 | break 21 | fi 22 | done 23 | 24 | if [[ -z "${PYTHON_PATH}" ]]; then 25 | >&2 echo "[cua-mcp] ERROR: No suitable Python found. Tried:" 26 | for p in "${CANDIDATES[@]}"; do >&2 echo " - $p"; done 27 | >&2 echo "[cua-mcp] Tip: create venv: python3 -m venv $CUA_REPO_DIR/.venv && \"$CUA_REPO_DIR/.venv/bin/pip\" install -e \"$CUA_REPO_DIR/libs/python/mcp-server\"" 28 | exit 127 29 | fi 30 | 31 | # --- Export PYTHONPATH so module imports work during dev --- 32 | export PYTHONPATH="$CUA_REPO_DIR/libs/python/mcp-server:$CUA_REPO_DIR/libs/python/agent:$CUA_REPO_DIR/libs/python/computer:$CUA_REPO_DIR/libs/python/core:$CUA_REPO_DIR/libs/python/pylume" 33 | 34 | # --- Helpful startup log for Claude's mcp.log --- 35 | >&2 echo "[cua-mcp] using python: $PYTHON_PATH" 36 | >&2 echo "[cua-mcp] repo dir : $CUA_REPO_DIR" 37 | >&2 echo "[cua-mcp] PYTHONPATH : $PYTHONPATH" 38 | if [[ -n "${CUA_MODEL_NAME:-}" ]]; then 39 | >&2 echo "[cua-mcp] CUA_MODEL_NAME=$CUA_MODEL_NAME" 40 | fi 41 | 42 | # --- Run the MCP server module --- 43 | exec "$PYTHON_PATH" -m mcp_server.server 44 | ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/callbacks/agent-lifecycle.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Agent Lifecycle 3 | description: Agent callback lifecycle and hooks 4 | --- 5 | 6 | # Callbacks 7 | 8 | Callbacks provide hooks into the agent lifecycle for extensibility. They're called in a specific order during agent execution. 9 | 10 | ## Callback Lifecycle 11 | 12 | ### 1. `on_run_start(kwargs, old_items)` 13 | Called once when agent run begins. Initialize tracking, logging, or state. 14 | 15 | ### 2. `on_run_continue(kwargs, old_items, new_items)` → bool 16 | Called before each iteration. Return `False` to stop execution (e.g., budget limits). 17 | 18 | ### 3. `on_llm_start(messages)` → messages 19 | Preprocess messages before LLM call. Use for PII anonymization, image retention. 20 | 21 | ### 4. `on_api_start(kwargs)` 22 | Called before each LLM API call. 23 | 24 | ### 5. `on_api_end(kwargs, result)` 25 | Called after each LLM API call completes. 26 | 27 | ### 6. `on_usage(usage)` 28 | Called when usage information is received from LLM. 29 | 30 | ### 7. `on_llm_end(messages)` → messages 31 | Postprocess messages after LLM call. Use for PII deanonymization. 32 | 33 | ### 8. `on_responses(kwargs, responses)` 34 | Called when responses are received from agent loop. 35 | 36 | ### 9. Response-specific hooks: 37 | - `on_text(item)` - Text messages 38 | - `on_computer_call_start(item)` - Before computer actions 39 | - `on_computer_call_end(item, result)` - After computer actions 40 | - `on_function_call_start(item)` - Before function calls 41 | - `on_function_call_end(item, result)` - After function calls 42 | - `on_screenshot(screenshot, name)` - When screenshots are taken 43 | 44 | ### 10. `on_run_end(kwargs, old_items, new_items)` 45 | Called when agent run completes. Finalize tracking, save trajectories. ``` -------------------------------------------------------------------------------- /libs/python/computer-server/computer_server/diorama/macos.py: -------------------------------------------------------------------------------- ```python 1 | import platform 2 | import sys 3 | import platform 4 | import inspect 5 | from computer_server.diorama.diorama import Diorama 6 | from computer_server.diorama.base import BaseDioramaHandler 7 | from typing import Optional 8 | 9 | class MacOSDioramaHandler(BaseDioramaHandler): 10 | """Handler for Diorama commands on macOS, using local diorama module.""" 11 | async def diorama_cmd(self, action: str, arguments: Optional[dict] = None) -> dict: 12 | if platform.system().lower() != "darwin": 13 | return {"success": False, "error": "Diorama is only supported on macOS."} 14 | try: 15 | app_list = arguments.get("app_list") if arguments else None 16 | if not app_list: 17 | return {"success": False, "error": "Missing 'app_list' in arguments"} 18 | diorama = Diorama(app_list) 19 | interface = diorama.interface 20 | if not hasattr(interface, action): 21 | return {"success": False, "error": f"Unknown diorama action: {action}"} 22 | method = getattr(interface, action) 23 | # Remove app_list from arguments before calling the method 24 | filtered_arguments = dict(arguments) 25 | filtered_arguments.pop("app_list", None) 26 | if inspect.iscoroutinefunction(method): 27 | result = await method(**(filtered_arguments or {})) 28 | else: 29 | result = method(**(filtered_arguments or {})) 30 | return {"success": True, "result": result} 31 | except Exception as e: 32 | import traceback 33 | return {"success": False, "error": str(e), "trace": traceback.format_exc()} 34 | ``` -------------------------------------------------------------------------------- /docs/src/lib/source.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { docs } from '@/.source'; 2 | import { loader } from 'fumadocs-core/source'; 3 | import { icons } from 'lucide-react'; 4 | import { createElement } from 'react'; 5 | 6 | import fs from 'node:fs/promises'; 7 | import path from 'node:path'; 8 | 9 | /** 10 | * Returns available API doc versions for a given section (e.g., 'agent'). 11 | * Each version is an object: { label, slug } 12 | * - 'Current' (index.mdx) → slug: [] 13 | * - '[version].mdx' → slug: [version] 14 | */ 15 | export async function getApiVersions( 16 | section: string 17 | ): Promise<{ label: string; slug: string[] }[]> { 18 | const dir = path.join(process.cwd(), 'content/docs/api', section); 19 | let files: string[] = []; 20 | try { 21 | files = (await fs.readdir(dir)).filter((f) => f.endsWith('.mdx')); 22 | } catch (_e) { 23 | return []; 24 | } 25 | const versions = files.map((file) => { 26 | if (file === 'index.mdx') { 27 | return { label: 'Current', slug: [] }; 28 | } 29 | const version = file.replace(/\.mdx$/, ''); 30 | return { label: version, slug: [version] }; 31 | }); 32 | // Always put 'Current' first, then others sorted descending (semver-ish) 33 | return [ 34 | ...versions.filter((v) => v.label === 'Current'), 35 | ...versions 36 | .filter((v) => v.label !== 'Current') 37 | .sort((a, b) => 38 | b.label.localeCompare(a.label, undefined, { numeric: true }) 39 | ), 40 | ]; 41 | } 42 | 43 | // See https://fumadocs.vercel.app/docs/headless/source-api for more info 44 | export const source = loader({ 45 | // it assigns a URL to your pages 46 | baseUrl: '/', 47 | source: docs.toFumadocsSource(), 48 | icon(icon) { 49 | if (!icon) return; 50 | if (icon in icons) return createElement(icons[icon as keyof typeof icons]); 51 | }, 52 | }); 53 | ``` -------------------------------------------------------------------------------- /libs/python/computer/computer/interface/factory.py: -------------------------------------------------------------------------------- ```python 1 | """Factory for creating computer interfaces.""" 2 | 3 | from typing import Literal, Optional 4 | from .base import BaseComputerInterface 5 | 6 | class InterfaceFactory: 7 | """Factory for creating OS-specific computer interfaces.""" 8 | 9 | @staticmethod 10 | def create_interface_for_os( 11 | os: Literal['macos', 'linux', 'windows'], 12 | ip_address: str, 13 | api_key: Optional[str] = None, 14 | vm_name: Optional[str] = None 15 | ) -> BaseComputerInterface: 16 | """Create an interface for the specified OS. 17 | 18 | Args: 19 | os: Operating system type ('macos', 'linux', or 'windows') 20 | ip_address: IP address of the computer to control 21 | api_key: Optional API key for cloud authentication 22 | vm_name: Optional VM name for cloud authentication 23 | 24 | Returns: 25 | BaseComputerInterface: The appropriate interface for the OS 26 | 27 | Raises: 28 | ValueError: If the OS type is not supported 29 | """ 30 | # Import implementations here to avoid circular imports 31 | from .macos import MacOSComputerInterface 32 | from .linux import LinuxComputerInterface 33 | from .windows import WindowsComputerInterface 34 | 35 | if os == 'macos': 36 | return MacOSComputerInterface(ip_address, api_key=api_key, vm_name=vm_name) 37 | elif os == 'linux': 38 | return LinuxComputerInterface(ip_address, api_key=api_key, vm_name=vm_name) 39 | elif os == 'windows': 40 | return WindowsComputerInterface(ip_address, api_key=api_key, vm_name=vm_name) 41 | else: 42 | raise ValueError(f"Unsupported OS type: {os}") 43 | ``` -------------------------------------------------------------------------------- /libs/lume/src/VM/VMDetails.swift: -------------------------------------------------------------------------------- ```swift 1 | import Foundation 2 | import Network 3 | 4 | struct DiskSize: Codable { 5 | let allocated: UInt64 6 | let total: UInt64 7 | } 8 | 9 | extension DiskSize { 10 | var formattedAllocated: String { 11 | formatBytes(allocated) 12 | } 13 | 14 | var formattedTotal: String { 15 | formatBytes(total) 16 | } 17 | 18 | private func formatBytes(_ bytes: UInt64) -> String { 19 | let units = ["B", "KB", "MB", "GB", "TB"] 20 | var size = Double(bytes) 21 | var unitIndex = 0 22 | 23 | while size >= 1024 && unitIndex < units.count - 1 { 24 | size /= 1024 25 | unitIndex += 1 26 | } 27 | 28 | return String(format: "%.1f%@", size, units[unitIndex]) 29 | } 30 | } 31 | 32 | struct VMDetails: Codable { 33 | let name: String 34 | let os: String 35 | let cpuCount: Int 36 | let memorySize: UInt64 37 | let diskSize: DiskSize 38 | let display: String 39 | let status: String 40 | let vncUrl: String? 41 | let ipAddress: String? 42 | let locationName: String 43 | let sharedDirectories: [SharedDirectory]? 44 | 45 | init( 46 | name: String, 47 | os: String, 48 | cpuCount: Int, 49 | memorySize: UInt64, 50 | diskSize: DiskSize, 51 | display: String, 52 | status: String, 53 | vncUrl: String?, 54 | ipAddress: String?, 55 | locationName: String, 56 | sharedDirectories: [SharedDirectory]? = nil 57 | ) { 58 | self.name = name 59 | self.os = os 60 | self.cpuCount = cpuCount 61 | self.memorySize = memorySize 62 | self.diskSize = diskSize 63 | self.display = display 64 | self.status = status 65 | self.vncUrl = vncUrl 66 | self.ipAddress = ipAddress 67 | self.locationName = locationName 68 | self.sharedDirectories = sharedDirectories 69 | } 70 | } 71 | ``` -------------------------------------------------------------------------------- /libs/python/computer-server/computer_server/diorama/diorama_computer.py: -------------------------------------------------------------------------------- ```python 1 | import asyncio 2 | 3 | class DioramaComputer: 4 | """ 5 | A minimal Computer-like interface for Diorama, compatible with ComputerAgent. 6 | Implements _initialized, run(), and __aenter__ for agent compatibility. 7 | """ 8 | def __init__(self, diorama): 9 | """ 10 | Initialize the DioramaComputer with a diorama instance. 11 | 12 | Args: 13 | diorama: The diorama instance to wrap with a computer-like interface. 14 | """ 15 | self.diorama = diorama 16 | self.interface = self.diorama.interface 17 | self._initialized = False 18 | 19 | async def __aenter__(self): 20 | """ 21 | Async context manager entry method for compatibility with ComputerAgent. 22 | 23 | Ensures an event loop is running and marks the instance as initialized. 24 | Creates a new event loop if none is currently running. 25 | 26 | Returns: 27 | DioramaComputer: The initialized instance. 28 | """ 29 | # Ensure the event loop is running (for compatibility) 30 | try: 31 | asyncio.get_running_loop() 32 | except RuntimeError: 33 | asyncio.set_event_loop(asyncio.new_event_loop()) 34 | self._initialized = True 35 | return self 36 | 37 | async def run(self): 38 | """ 39 | Run method stub for compatibility with ComputerAgent interface. 40 | 41 | Ensures the instance is initialized before returning. If not already 42 | initialized, calls __aenter__ to perform initialization. 43 | 44 | Returns: 45 | DioramaComputer: The initialized instance. 46 | """ 47 | # This is a stub for compatibility 48 | if not self._initialized: 49 | await self.__aenter__() 50 | return self 51 | ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/lume/prebuilt-images.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Prebuilt Images 3 | --- 4 | 5 | Pre-built images are available in the registry [ghcr.io/trycua](https://github.com/orgs/trycua/packages). These images come with an SSH server pre-configured and auto-login enabled. 6 | 7 | <Callout> 8 | The default password on pre-built images is `lume`. For the security of your VM, change this password after your first login. 9 | </Callout> 10 | 11 | ## Available Images 12 | 13 | The following pre-built images are available to download via `lume pull`: 14 | 15 | | Image | Tag | Description | Logical Size | 16 | |-------|------------|-------------|------| 17 | | `macos-sequoia-vanilla` | `latest`, `15.2` | macOS Sequoia 15.2 image | 20GB | 18 | | `macos-sequoia-xcode` | `latest`, `15.2` | macOS Sequoia 15.2 image with Xcode command line tools | 22GB | 19 | | `macos-sequoia-cua` | `latest`, `15.3` | macOS Sequoia 15.3 image compatible with the Computer interface | 24GB | 20 | | `ubuntu-noble-vanilla` | `latest`, `24.04.1` | [Ubuntu Server for ARM 24.04.1 LTS](https://ubuntu.com/download/server/arm) with Ubuntu Desktop | 20GB | 21 | 22 | ## Disk Space 23 | 24 | For additional disk space, resize the VM disk after pulling the image using the `lume set <name> --disk-size <size>` command. Note that the actual disk space used by sparse images will be much lower than the logical size listed. 25 | 26 | <Callout> 27 | **Important Note (v0.2.0+):** Images are being re-uploaded with sparse file system optimizations enabled, resulting in significantly lower actual disk usage. Older images (without the `-sparse` suffix) are now **deprecated**. The last version of `lume` fully supporting the non-sparse images was `v0.1.x`. Starting from `v0.2.0`, lume will automatically pull images optimized with sparse file system support. 28 | </Callout> ``` -------------------------------------------------------------------------------- /libs/python/pylume/pyproject.toml: -------------------------------------------------------------------------------- ```toml 1 | [build-system] 2 | build-backend = "pdm.backend" 3 | requires = ["pdm-backend"] 4 | 5 | [project] 6 | authors = [{ name = "TryCua", email = "[email protected]" }] 7 | classifiers = [ 8 | "Intended Audience :: Developers", 9 | "License :: OSI Approved :: MIT License", 10 | "Operating System :: MacOS :: MacOS X", 11 | "Programming Language :: Python :: 3", 12 | "Programming Language :: Python :: 3.10", 13 | "Programming Language :: Python :: 3.11", 14 | "Programming Language :: Python :: 3.12", 15 | ] 16 | dependencies = ["pydantic>=2.11.1"] 17 | description = "Python SDK for lume - run macOS and Linux VMs on Apple Silicon" 18 | dynamic = ["version"] 19 | keywords = ["apple-silicon", "macos", "virtualization", "vm"] 20 | license = { text = "MIT" } 21 | name = "pylume" 22 | readme = "README.md" 23 | requires-python = ">=3.9" 24 | 25 | [tool.pdm.version] 26 | path = "pylume/__init__.py" 27 | source = "file" 28 | 29 | [project.urls] 30 | homepage = "https://github.com/trycua/pylume" 31 | repository = "https://github.com/trycua/pylume" 32 | 33 | [tool.pdm] 34 | distribution = true 35 | 36 | [tool.pdm.dev-dependencies] 37 | dev = [ 38 | "black>=23.0.0", 39 | "isort>=5.12.0", 40 | "pytest-asyncio>=0.23.0", 41 | "pytest>=7.0.0", 42 | ] 43 | 44 | [tool.black] 45 | line-length = 100 46 | target-version = ["py311"] 47 | 48 | [tool.ruff] 49 | fix = true 50 | line-length = 100 51 | select = ["B", "E", "F", "I"] 52 | target-version = "py311" 53 | 54 | [tool.ruff.format] 55 | docstring-code-format = true 56 | 57 | [tool.mypy] 58 | check_untyped_defs = true 59 | disallow_untyped_defs = true 60 | ignore_missing_imports = true 61 | python_version = "3.11" 62 | show_error_codes = true 63 | strict = true 64 | warn_return_any = true 65 | warn_unused_ignores = false 66 | 67 | [tool.pytest.ini_options] 68 | asyncio_mode = "auto" 69 | python_files = "test_*.py" 70 | testpaths = ["tests"] 71 | 72 | [tool.pdm.build] 73 | includes = ["pylume/"] 74 | source-includes = ["LICENSE", "README.md", "tests/"] 75 | ``` -------------------------------------------------------------------------------- /examples/sandboxed_functions_examples.py: -------------------------------------------------------------------------------- ```python 1 | from pathlib import Path 2 | import os 3 | import sys 4 | 5 | # Load environment variables from .env file 6 | project_root = Path(__file__).parent.parent 7 | env_file = project_root / ".env" 8 | print(f"Loading environment from: {env_file}") 9 | from dotenv import load_dotenv 10 | 11 | load_dotenv(env_file) 12 | 13 | # Add paths to sys.path if needed 14 | pythonpath = os.environ.get("PYTHONPATH", "") 15 | for path in pythonpath.split(":"): 16 | if path and path not in sys.path: 17 | sys.path.insert(0, path) # Insert at beginning to prioritize 18 | print(f"Added to sys.path: {path}") 19 | 20 | import asyncio 21 | from computer.computer import Computer 22 | from computer.helpers import sandboxed 23 | 24 | async def main(): 25 | # Initialize the computer in a Cua Container 26 | computer = Computer() 27 | await computer.run() 28 | 29 | # Install a package in a virtual environment in the container 30 | await computer.venv_install("demo_venv", ["requests", "macos-pyxa"]) 31 | 32 | # Open Safari 33 | await computer.interface.run_command("open -a Safari") 34 | await asyncio.sleep(2) 35 | 36 | # Define a sandboxed function 37 | # This function will run inside the Cua Container 38 | @sandboxed("demo_venv") 39 | def greet_and_print(name): 40 | # get .html of the current Safari tab 41 | import PyXA 42 | safari = PyXA.Application("Safari") 43 | current_doc = safari.current_document 44 | html = current_doc.source() 45 | print(f"Hello from inside the container, {name}!") 46 | print("Safari HTML length:", len(html)) 47 | return {"greeted": name, "safari_html_length": len(html), "safari_html_snippet": html[:200]} 48 | 49 | # Call with args and kwargs 50 | result = await greet_and_print("Cua") 51 | print("Result from sandboxed function:", result) 52 | 53 | if __name__ == "__main__": 54 | asyncio.run(main()) 55 | ``` -------------------------------------------------------------------------------- /libs/lumier/Dockerfile: -------------------------------------------------------------------------------- ```dockerfile 1 | # Base image using Debian for arm64 architecture (optimized for Apple Silicon) 2 | FROM debian:bullseye-slim AS lumier-base 3 | 4 | # Set environment variables for Lume API server configuration 5 | ENV LUME_API_HOST="host.docker.internal" 6 | 7 | # Default VM configuration (can be overridden at runtime) 8 | ENV VERSION="ghcr.io/trycua/macos-sequoia-vanilla:latest" 9 | ENV RAM_SIZE="8192" 10 | ENV CPU_CORES="4" 11 | ENV DISK_SIZE="100" 12 | ENV DISPLAY="1024x768" 13 | ENV VM_NAME="lumier" 14 | ENV HOST_SHARED_PATH="" 15 | ENV LUMIER_DEBUG="0" 16 | 17 | # Install necessary tools and noVNC dependencies 18 | RUN apt-get update && \ 19 | apt-get install -y \ 20 | netcat-traditional \ 21 | curl \ 22 | sshpass \ 23 | wget \ 24 | unzip \ 25 | git \ 26 | python3 \ 27 | python3-pip \ 28 | python3-numpy \ 29 | procps && \ 30 | rm -rf /var/lib/apt/lists/* 31 | 32 | # Download and install noVNC without caching 33 | RUN wget https://github.com/trycua/noVNC/archive/refs/heads/master.zip -O master1.zip && \ 34 | unzip master1.zip && \ 35 | mv noVNC-master /opt/noVNC && \ 36 | rm master1.zip 37 | 38 | # Set environment variables for noVNC 39 | ENV NOVNC_PATH="/opt/noVNC" 40 | 41 | # Create necessary directories 42 | RUN mkdir -p /run/bin /run/lib /run/config /run/hooks /run/lifecycle 43 | 44 | # Copy scripts to the container 45 | COPY src/config/constants.sh /run/config/ 46 | COPY src/bin/entry.sh /run/bin/entry.sh 47 | 48 | # Copy library files if they exist 49 | COPY src/lib/ /run/lib/ 50 | COPY src/hooks/ /run/hooks/ 51 | 52 | # Copy on-logon script to lifecycle directory 53 | COPY src/hooks/on-logon.sh /run/lifecycle/ 54 | 55 | # Make scripts executable 56 | RUN chmod +x \ 57 | /run/bin/* \ 58 | /run/hooks/* \ 59 | /run/lifecycle/* 2>/dev/null || true 60 | 61 | # Expose ports for noVNC and Lume API 62 | EXPOSE 8006 63 | 64 | # VOLUME setup 65 | VOLUME [ "/storage" ] 66 | VOLUME [ "/data" ] 67 | 68 | # Default entrypoint 69 | ENTRYPOINT ["/run/bin/entry.sh"] ``` -------------------------------------------------------------------------------- /.github/workflows/pypi-publish-core.yml: -------------------------------------------------------------------------------- ```yaml 1 | name: Publish Core Package 2 | 3 | on: 4 | push: 5 | tags: 6 | - "core-v*" 7 | workflow_dispatch: 8 | inputs: 9 | version: 10 | description: "Version to publish (without v prefix)" 11 | required: true 12 | default: "0.1.0" 13 | workflow_call: 14 | inputs: 15 | version: 16 | description: "Version to publish" 17 | required: true 18 | type: string 19 | 20 | # Adding permissions at workflow level 21 | permissions: 22 | contents: write 23 | 24 | jobs: 25 | prepare: 26 | runs-on: macos-latest 27 | outputs: 28 | version: ${{ steps.get-version.outputs.version }} 29 | steps: 30 | - uses: actions/checkout@v4 31 | 32 | - name: Determine version 33 | id: get-version 34 | run: | 35 | if [ "${{ github.event_name }}" == "push" ]; then 36 | # Extract version from tag (for package-specific tags) 37 | if [[ "${{ github.ref }}" =~ ^refs/tags/core-v([0-9]+\.[0-9]+\.[0-9]+) ]]; then 38 | VERSION=${BASH_REMATCH[1]} 39 | else 40 | echo "Invalid tag format for core" 41 | exit 1 42 | fi 43 | elif [ "${{ github.event_name }}" == "workflow_dispatch" ]; then 44 | # Use version from workflow dispatch 45 | VERSION=${{ github.event.inputs.version }} 46 | else 47 | # Use version from workflow_call 48 | VERSION=${{ inputs.version }} 49 | fi 50 | echo "VERSION=$VERSION" 51 | echo "version=$VERSION" >> $GITHUB_OUTPUT 52 | 53 | publish: 54 | needs: prepare 55 | uses: ./.github/workflows/pypi-reusable-publish.yml 56 | with: 57 | package_name: "core" 58 | package_dir: "libs/python/core" 59 | version: ${{ needs.prepare.outputs.version }} 60 | is_lume_package: false 61 | base_package_name: "cua-core" 62 | secrets: 63 | PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} 64 | ``` -------------------------------------------------------------------------------- /libs/python/computer-server/pyproject.toml: -------------------------------------------------------------------------------- ```toml 1 | [build-system] 2 | requires = ["pdm-backend"] 3 | build-backend = "pdm.backend" 4 | 5 | [project] 6 | name = "cua-computer-server" 7 | version = "0.1.24" 8 | 9 | description = "Server component for the Computer-Use Interface (CUI) framework powering Cua" 10 | authors = [ 11 | { name = "TryCua", email = "[email protected]" } 12 | ] 13 | readme = "README.md" 14 | license = { text = "MIT" } 15 | requires-python = ">=3.9" 16 | dependencies = [ 17 | "fastapi>=0.111.0", 18 | "uvicorn[standard]>=0.27.0", 19 | "pydantic>=2.0.0", 20 | "pyautogui>=0.9.54", 21 | "pynput>=1.8.1", 22 | "pillow>=10.2.0", 23 | "aiohttp>=3.9.1", 24 | "pyperclip>=1.9.0", 25 | "websockets>=12.0" 26 | ] 27 | 28 | [project.optional-dependencies] 29 | macos = [ 30 | "pyobjc-framework-Cocoa>=10.1", 31 | "pyobjc-framework-Quartz>=10.1", 32 | "pyobjc-framework-ApplicationServices>=10.1" 33 | ] 34 | linux = [ 35 | "python-xlib>=0.33" 36 | ] 37 | windows = [ 38 | "pywin32>=310" 39 | ] 40 | 41 | [project.urls] 42 | homepage = "https://github.com/trycua/cua" 43 | repository = "https://github.com/trycua/cua" 44 | 45 | [project.scripts] 46 | cua-computer-server = "computer_server:run_cli" 47 | 48 | [tool.pdm] 49 | distribution = true 50 | 51 | [tool.pdm.build] 52 | includes = ["computer_server"] 53 | package-data = {"computer_server" = ["py.typed"]} 54 | 55 | [tool.pdm.dev-dependencies] 56 | test = [ 57 | "pytest>=7.0.0", 58 | "pytest-asyncio>=0.23.0" 59 | ] 60 | format = [ 61 | "black>=23.0.0", 62 | "isort>=5.12.0" 63 | ] 64 | dev = [ 65 | "ruff>=0.0.241", 66 | "mypy>=0.971" 67 | ] 68 | 69 | [tool.pdm.scripts] 70 | api = "python -m computer_server" 71 | 72 | [tool.ruff] 73 | line-length = 100 74 | target-version = "py310" 75 | select = ["E", "F", "B", "I"] 76 | fix = true 77 | 78 | [tool.ruff.format] 79 | docstring-code-format = true 80 | 81 | [tool.mypy] 82 | strict = true 83 | python_version = "3.10" 84 | ignore_missing_imports = true 85 | disallow_untyped_defs = true 86 | check_untyped_defs = true 87 | warn_return_any = true 88 | show_error_codes = true 89 | warn_unused_ignores = false 90 | ``` -------------------------------------------------------------------------------- /libs/lume/src/VM/LinuxVM.swift: -------------------------------------------------------------------------------- ```swift 1 | import Foundation 2 | 3 | /// Linux-specific virtual machine implementation 4 | @MainActor 5 | final class LinuxVM: VM { 6 | override init( 7 | vmDirContext: VMDirContext, 8 | virtualizationServiceFactory: @escaping (VMVirtualizationServiceContext) throws -> VMVirtualizationService = { try LinuxVirtualizationService(configuration: $0) }, 9 | vncServiceFactory: @escaping (VMDirectory) -> VNCService = { DefaultVNCService(vmDirectory: $0) } 10 | ) { 11 | super.init( 12 | vmDirContext: vmDirContext, 13 | virtualizationServiceFactory: virtualizationServiceFactory, 14 | vncServiceFactory: vncServiceFactory 15 | ) 16 | } 17 | 18 | override func getOSType() -> String { 19 | return "linux" 20 | } 21 | 22 | override func setup( 23 | ipswPath: String, 24 | cpuCount: Int, 25 | memorySize: UInt64, 26 | diskSize: UInt64, 27 | display: String 28 | ) async throws { 29 | 30 | try setDiskSize(diskSize) 31 | 32 | let service = try virtualizationServiceFactory( 33 | try createVMVirtualizationServiceContext( 34 | cpuCount: cpuCount, 35 | memorySize: memorySize, 36 | display: display 37 | ) 38 | ) 39 | guard let linuxService = service as? LinuxVirtualizationService else { 40 | throw VMError.internalError("Installation requires LinuxVirtualizationService") 41 | } 42 | 43 | try updateVMConfig(vmConfig: try VMConfig( 44 | os: getOSType(), 45 | cpuCount: cpuCount, 46 | memorySize: memorySize, 47 | diskSize: diskSize, 48 | macAddress: linuxService.generateMacAddress(), 49 | display: display 50 | )) 51 | 52 | // Create NVRAM store for EFI 53 | try linuxService.createNVRAM(at: vmDirContext.nvramPath) 54 | } 55 | } ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/lume/installation.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Installation 3 | description: Installation instructions for the current version of the Lume CLI. 4 | --- 5 | 6 | ## Quickstart 7 | 8 | Install and run a prebuilt macOS VM in two commands: 9 | 10 | ```bash 11 | # Install Lume 12 | /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" 13 | # Pull & start a macOS image 14 | lume run macos-sequoia-vanilla:latest 15 | ``` 16 | 17 | <Callout title="Security Note"> 18 | All prebuilt images use the default password `lume`. Change this immediately after your first login using the `passwd` command. 19 | </Callout> 20 | 21 | **System Requirements**: 22 | - Apple Silicon Mac (M1, M2, M3, etc.) 23 | - macOS 13.0 or later 24 | - At least 8GB of RAM (16GB recommended) 25 | - At least 50GB of free disk space 26 | 27 | ## Install with Script 28 | 29 | Install with a single command: 30 | 31 | ```bash 32 | /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" 33 | ``` 34 | 35 | ### Manual Start (No Background Service) 36 | By default, Lume is installed as a background service that starts automatically on login. If you prefer to start the Lume API service manually when needed, you can use the `--no-background-service` option: 37 | 38 | ```bash 39 | /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh) --no-background-service" 40 | ``` 41 | 42 | <Callout title="Note"> 43 | With this option, you'll need to manually start the Lume API service by running `lume serve` in your terminal whenever you need to use tools or libraries that rely on the Lume API (such as the Computer-Use Agent). 44 | </Callout> 45 | 46 | ## Manual Download and Installation 47 | You can also download the `lume.pkg.tar.gz` archive from the [latest release](https://github.com/trycua/cua/releases?q=lume&expanded=true), extract it, and install the package manually. ``` -------------------------------------------------------------------------------- /libs/lume/tests/Mocks/MockVMVirtualizationService.swift: -------------------------------------------------------------------------------- ```swift 1 | import Foundation 2 | import Virtualization 3 | @testable import lume 4 | 5 | @MainActor 6 | final class MockVMVirtualizationService: VMVirtualizationService { 7 | private(set) var currentState: VZVirtualMachine.State = .stopped 8 | private(set) var startCallCount = 0 9 | private(set) var stopCallCount = 0 10 | private(set) var pauseCallCount = 0 11 | private(set) var resumeCallCount = 0 12 | 13 | var state: VZVirtualMachine.State { 14 | currentState 15 | } 16 | 17 | private var _shouldFailNextOperation = false 18 | private var _operationError: Error = VMError.internalError("Mock operation failed") 19 | 20 | nonisolated func configure(shouldFail: Bool, error: Error = VMError.internalError("Mock operation failed")) async { 21 | await setConfiguration(shouldFail: shouldFail, error: error) 22 | } 23 | 24 | @MainActor 25 | private func setConfiguration(shouldFail: Bool, error: Error) { 26 | _shouldFailNextOperation = shouldFail 27 | _operationError = error 28 | } 29 | 30 | func start() async throws { 31 | startCallCount += 1 32 | if _shouldFailNextOperation { 33 | throw _operationError 34 | } 35 | currentState = .running 36 | } 37 | 38 | func stop() async throws { 39 | stopCallCount += 1 40 | if _shouldFailNextOperation { 41 | throw _operationError 42 | } 43 | currentState = .stopped 44 | } 45 | 46 | func pause() async throws { 47 | pauseCallCount += 1 48 | if _shouldFailNextOperation { 49 | throw _operationError 50 | } 51 | currentState = .paused 52 | } 53 | 54 | func resume() async throws { 55 | resumeCallCount += 1 56 | if _shouldFailNextOperation { 57 | throw _operationError 58 | } 59 | currentState = .running 60 | } 61 | 62 | func getVirtualMachine() -> Any { 63 | return "mock_vm" 64 | } 65 | } ``` -------------------------------------------------------------------------------- /docs/src/app/layout.config.tsx: -------------------------------------------------------------------------------- ```typescript 1 | import type { BaseLayoutProps } from 'fumadocs-ui/layouts/shared'; 2 | 3 | import Image from 'next/image'; 4 | import LogoBlack from '@/assets/logo-black.svg'; 5 | import LogoWhite from '@/assets/logo-white.svg'; 6 | import DiscordWhite from '@/assets/discord-white.svg'; 7 | import DiscordBlack from '@/assets/discord-black.svg'; 8 | import { HomeIcon } from 'lucide-react'; 9 | 10 | /** 11 | * Shared layout configurations 12 | * 13 | * you can customise layouts individually from: 14 | * Home Layout: app/(home)/layout.tsx 15 | * Docs Layout: app/docs/layout.tsx 16 | */ 17 | export const baseOptions: BaseLayoutProps = { 18 | nav: { 19 | title: ( 20 | <> 21 | <Image 22 | width={30} 23 | height={30} 24 | src={LogoBlack} 25 | aria-label="Logo" 26 | className="block dark:hidden" 27 | alt="Logo" 28 | /> 29 | <Image 30 | width={30} 31 | height={30} 32 | src={LogoWhite} 33 | aria-label="Logo" 34 | className="hidden dark:block" 35 | alt="Logo" 36 | /> 37 | Cua Documentation 38 | </> 39 | ), 40 | }, 41 | githubUrl: 'https://github.com/trycua/cua', 42 | links: [ 43 | { 44 | url: 'https://trycua.com', 45 | text: 'cua home', 46 | type: 'icon', 47 | icon: <HomeIcon />, 48 | external: false, 49 | }, 50 | { 51 | url: 'https://discord.com/invite/mVnXXpdE85', 52 | text: 'cua discord', 53 | type: 'icon', 54 | icon: ( 55 | <> 56 | <Image 57 | width={20} 58 | height={20} 59 | alt="Discord" 60 | className="hidden dark:block opacity-70 hover:opacity-100" 61 | src={DiscordWhite} 62 | /> 63 | <Image 64 | width={20} 65 | height={20} 66 | alt="Discord" 67 | className="dark:hidden block opacity-55 hover:opacity-100" 68 | src={DiscordBlack} 69 | /> 70 | </> 71 | ), 72 | }, 73 | ], 74 | }; 75 | ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/usage-tracking.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Usage Tracking 3 | sidebar_position: 9 4 | description: How to track token usage and cost in ComputerAgent and agent loops. 5 | --- 6 | 7 | Tracking usage is important for monitoring costs and optimizing your agent workflows. The ComputerAgent API provides easy access to token and cost usage for every run. 8 | 9 | ## Accessing Usage Data 10 | 11 | Whenever you run an agent loop, each result contains a `usage` dictionary with token and cost information: 12 | 13 | ```python 14 | async for result in agent.run(...): 15 | print(result["usage"]) 16 | # Example output: 17 | # { 18 | # "prompt_tokens": 150, 19 | # "completion_tokens": 75, 20 | # "total_tokens": 225, 21 | # "response_cost": 0.01, 22 | # } 23 | ``` 24 | 25 | - `prompt_tokens`: Number of tokens in the prompt 26 | - `completion_tokens`: Number of tokens in the agent's response 27 | - `total_tokens`: Total tokens used 28 | - `response_cost`: Estimated cost (USD) for this turn 29 | 30 | ## Tracking Total Usage 31 | 32 | You can accumulate usage across multiple turns: 33 | 34 | ```python 35 | total_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0, "response_cost": 0.0} 36 | async for result in agent.run(...): 37 | for k in total_usage: 38 | total_usage[k] += result["usage"].get(k, 0) 39 | print("Total usage:", total_usage) 40 | ``` 41 | 42 | ## Using Callbacks for Usage Tracking 43 | 44 | You can also use a callback to automatically track usage. Implement the `on_usage` method in your callback class: 45 | 46 | ```python 47 | from agent.callbacks import AsyncCallbackHandler 48 | 49 | class UsageTrackerCallback(AsyncCallbackHandler): 50 | async def on_usage(self, usage): 51 | print("Usage update:", usage) 52 | 53 | agent = ComputerAgent( 54 | ..., 55 | callbacks=[UsageTrackerCallback()] 56 | ) 57 | ``` 58 | 59 | See also: [Budget Manager Callbacks](./callbacks/cost-saving) 60 | 61 | ## See Also 62 | - [Prompt Caching](./prompt-caching) 63 | - [Callbacks](./callbacks) 64 | ``` -------------------------------------------------------------------------------- /libs/lume/src/Commands/Create.swift: -------------------------------------------------------------------------------- ```swift 1 | import ArgumentParser 2 | import Foundation 3 | import Virtualization 4 | 5 | // MARK: - Create Command 6 | 7 | struct Create: AsyncParsableCommand { 8 | static let configuration = CommandConfiguration( 9 | abstract: "Create a new virtual machine" 10 | ) 11 | 12 | @Argument(help: "Name for the virtual machine") 13 | var name: String 14 | 15 | @Option( 16 | help: "Operating system to install. Defaults to macOS.", 17 | completion: .list(["macOS", "linux"])) 18 | var os: String = "macOS" 19 | 20 | @Option(help: "Number of CPU cores", transform: { Int($0) ?? 4 }) 21 | var cpu: Int = 4 22 | 23 | @Option( 24 | help: "Memory size, e.g., 8192MB or 8GB. Defaults to 8GB.", transform: { try parseSize($0) } 25 | ) 26 | var memory: UInt64 = 8 * 1024 * 1024 * 1024 27 | 28 | @Option( 29 | help: "Disk size, e.g., 20480MB or 20GB. Defaults to 50GB.", 30 | transform: { try parseSize($0) }) 31 | var diskSize: UInt64 = 50 * 1024 * 1024 * 1024 32 | 33 | @Option(help: "Display resolution in format WIDTHxHEIGHT. Defaults to 1024x768.") 34 | var display: VMDisplayResolution = VMDisplayResolution(string: "1024x768")! 35 | 36 | @Option( 37 | help: 38 | "Path to macOS restore image (IPSW), or 'latest' to download the latest supported version. Required for macOS VMs.", 39 | completion: .file(extensions: ["ipsw"]) 40 | ) 41 | var ipsw: String? 42 | 43 | @Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location") 44 | var storage: String? 45 | 46 | init() { 47 | } 48 | 49 | @MainActor 50 | func run() async throws { 51 | let controller = LumeController() 52 | try await controller.create( 53 | name: name, 54 | os: os, 55 | diskSize: diskSize, 56 | cpuCount: cpu, 57 | memorySize: memory, 58 | display: display.string, 59 | ipsw: ipsw, 60 | storage: storage 61 | ) 62 | } 63 | } 64 | ``` -------------------------------------------------------------------------------- /libs/python/computer/computer/helpers.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Helper functions and decorators for the Computer module. 3 | """ 4 | import logging 5 | import asyncio 6 | from functools import wraps 7 | from typing import Any, Callable, Optional, TypeVar, cast 8 | 9 | # Global reference to the default computer instance 10 | _default_computer = None 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | def set_default_computer(computer): 15 | """ 16 | Set the default computer instance to be used by the remote decorator. 17 | 18 | Args: 19 | computer: The computer instance to use as default 20 | """ 21 | global _default_computer 22 | _default_computer = computer 23 | 24 | 25 | def sandboxed(venv_name: str = "default", computer: str = "default", max_retries: int = 3): 26 | """ 27 | Decorator that wraps a function to be executed remotely via computer.venv_exec 28 | 29 | Args: 30 | venv_name: Name of the virtual environment to execute in 31 | computer: The computer instance to use, or "default" to use the globally set default 32 | max_retries: Maximum number of retries for the remote execution 33 | """ 34 | def decorator(func): 35 | @wraps(func) 36 | async def wrapper(*args, **kwargs): 37 | # Determine which computer instance to use 38 | comp = computer if computer != "default" else _default_computer 39 | 40 | if comp is None: 41 | raise RuntimeError("No computer instance available. Either specify a computer instance or call set_default_computer() first.") 42 | 43 | for i in range(max_retries): 44 | try: 45 | return await comp.venv_exec(venv_name, func, *args, **kwargs) 46 | except Exception as e: 47 | logger.error(f"Attempt {i+1} failed: {e}") 48 | await asyncio.sleep(1) 49 | if i == max_retries - 1: 50 | raise e 51 | return wrapper 52 | return decorator 53 | ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/callbacks/budget_manager.py: -------------------------------------------------------------------------------- ```python 1 | from typing import Dict, List, Any 2 | from .base import AsyncCallbackHandler 3 | 4 | class BudgetExceededError(Exception): 5 | """Exception raised when budget is exceeded.""" 6 | pass 7 | 8 | class BudgetManagerCallback(AsyncCallbackHandler): 9 | """Budget manager callback that tracks usage costs and can stop execution when budget is exceeded.""" 10 | 11 | def __init__(self, max_budget: float, reset_after_each_run: bool = True, raise_error: bool = False): 12 | """ 13 | Initialize BudgetManagerCallback. 14 | 15 | Args: 16 | max_budget: Maximum budget allowed 17 | reset_after_each_run: Whether to reset budget after each run 18 | raise_error: Whether to raise an error when budget is exceeded 19 | """ 20 | self.max_budget = max_budget 21 | self.reset_after_each_run = reset_after_each_run 22 | self.raise_error = raise_error 23 | self.total_cost = 0.0 24 | 25 | async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None: 26 | """Reset budget if configured to do so.""" 27 | if self.reset_after_each_run: 28 | self.total_cost = 0.0 29 | 30 | async def on_usage(self, usage: Dict[str, Any]) -> None: 31 | """Track usage costs.""" 32 | if "response_cost" in usage: 33 | self.total_cost += usage["response_cost"] 34 | 35 | async def on_run_continue(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> bool: 36 | """Check if budget allows continuation.""" 37 | if self.total_cost >= self.max_budget: 38 | if self.raise_error: 39 | raise BudgetExceededError(f"Budget exceeded: ${self.total_cost} >= ${self.max_budget}") 40 | else: 41 | print(f"Budget exceeded: ${self.total_cost} >= ${self.max_budget}") 42 | return False 43 | return True 44 | ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/decorators.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Decorators for agent - agent_loop decorator 3 | """ 4 | 5 | from typing import List, Optional 6 | from .types import AgentConfigInfo 7 | 8 | # Global registry 9 | _agent_configs: List[AgentConfigInfo] = [] 10 | 11 | def register_agent(models: str, priority: int = 0): 12 | """ 13 | Decorator to register an AsyncAgentConfig class. 14 | 15 | Args: 16 | models: Regex pattern to match supported models 17 | priority: Priority for agent selection (higher = more priority) 18 | """ 19 | def decorator(agent_class: type): 20 | # Validate that the class implements AsyncAgentConfig protocol 21 | if not hasattr(agent_class, 'predict_step'): 22 | raise ValueError(f"Agent class {agent_class.__name__} must implement predict_step method") 23 | if not hasattr(agent_class, 'predict_click'): 24 | raise ValueError(f"Agent class {agent_class.__name__} must implement predict_click method") 25 | if not hasattr(agent_class, 'get_capabilities'): 26 | raise ValueError(f"Agent class {agent_class.__name__} must implement get_capabilities method") 27 | 28 | # Register the agent config 29 | config_info = AgentConfigInfo( 30 | agent_class=agent_class, 31 | models_regex=models, 32 | priority=priority 33 | ) 34 | _agent_configs.append(config_info) 35 | 36 | # Sort by priority (highest first) 37 | _agent_configs.sort(key=lambda x: x.priority, reverse=True) 38 | 39 | return agent_class 40 | 41 | return decorator 42 | 43 | def get_agent_configs() -> List[AgentConfigInfo]: 44 | """Get all registered agent configs""" 45 | return _agent_configs.copy() 46 | 47 | def find_agent_config(model: str) -> Optional[AgentConfigInfo]: 48 | """Find the best matching agent config for a model""" 49 | for config_info in _agent_configs: 50 | if config_info.matches_model(model): 51 | return config_info 52 | return None 53 | ``` -------------------------------------------------------------------------------- /libs/python/mcp-server/scripts/install_mcp_server.sh: -------------------------------------------------------------------------------- ```bash 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # Create the ~/.cua directory if it doesn't exist 6 | mkdir -p "$HOME/.cua" 7 | 8 | # Create start_mcp_server.sh script in ~/.cua directory 9 | cat > "$HOME/.cua/start_mcp_server.sh" << 'EOF' 10 | #!/bin/bash 11 | 12 | set -e 13 | 14 | # Function to check if a directory is writable 15 | is_writable() { 16 | [ -w "$1" ] 17 | } 18 | 19 | # Function to check if a command exists (silent) 20 | command_exists() { 21 | command -v "$1" >/dev/null 2>&1 22 | } 23 | 24 | # Find a writable directory for the virtual environment 25 | if is_writable "$HOME"; then 26 | VENV_DIR="$HOME/.cua-mcp-venv" 27 | elif is_writable "/tmp"; then 28 | VENV_DIR="/tmp/.cua-mcp-venv" 29 | else 30 | # Try to create a directory in the current working directory 31 | TEMP_DIR="$(pwd)/.cua-mcp-venv" 32 | if is_writable "$(pwd)"; then 33 | VENV_DIR="$TEMP_DIR" 34 | else 35 | echo "Error: Cannot find a writable directory for the virtual environment." >&2 36 | exit 1 37 | fi 38 | fi 39 | 40 | # Check if Python is installed 41 | if ! command_exists python3; then 42 | echo "Error: Python 3 is not installed." >&2 43 | exit 1 44 | fi 45 | 46 | # Check if pip is installed 47 | if ! command_exists pip3; then 48 | echo "Error: pip3 is not installed." >&2 49 | exit 1 50 | fi 51 | 52 | # Create virtual environment if it doesn't exist 53 | if [ ! -d "$VENV_DIR" ]; then 54 | # Redirect output to prevent JSON parsing errors in Claude 55 | python3 -m venv "$VENV_DIR" >/dev/null 2>&1 56 | fi 57 | 58 | # Activate virtual environment 59 | source "$VENV_DIR/bin/activate" 60 | 61 | # Always install/upgrade the latest version of cua-mcp-server 62 | pip install --upgrade "cua-mcp-server" 63 | 64 | # Run the MCP server with isolation from development paths 65 | cd "$VENV_DIR" # Change to venv directory to avoid current directory in path 66 | 67 | python3 -c "from mcp_server.server import main; main()" 68 | EOF 69 | 70 | # Make the script executable 71 | chmod +x "$HOME/.cua/start_mcp_server.sh" 72 | 73 | echo "MCP server startup script created at $HOME/.cua/start_mcp_server.sh" 74 | ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/supported-agents/human-in-the-loop.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Human-In-The-Loop 3 | description: Use humans as agents for evaluation, demonstrations, and interactive control 4 | --- 5 | 6 | The Agent SDK provides a human tool, with native support for using a human-in-the-loop as a way to evaluate your environment, tools, or to create demonstrations. You can use it by doing `grounding_model+human/human` or `human/human` directly. 7 | 8 | ## Getting Started 9 | 10 | To start the human agent tool, simply run: 11 | 12 | ```bash 13 | python -m agent.human_tool 14 | ``` 15 | 16 | The UI will show you pending completions. Select a completion to take control of the agent. 17 | 18 | ## Usage Examples 19 | 20 | ### Direct Human Agent 21 | 22 | ```python 23 | from agent import ComputerAgent 24 | from agent.computer import computer 25 | 26 | agent = ComputerAgent( 27 | "human/human", 28 | tools=[computer] 29 | ) 30 | 31 | async for _ in agent.run("Take a screenshot, analyze the UI, and click on the most prominent button"): 32 | pass 33 | ``` 34 | 35 | ### Composed with Grounding Model 36 | 37 | ```python 38 | agent = ComputerAgent( 39 | "huggingface-local/HelloKKMe/GTA1-7B+human/human", 40 | tools=[computer] 41 | ) 42 | 43 | async for _ in agent.run("Navigate to the settings page and enable dark mode"): 44 | pass 45 | ``` 46 | 47 | ## Features 48 | 49 | The human-in-the-loop interface provides: 50 | 51 | - **Interactive UI**: Web-based interface for reviewing and responding to agent requests 52 | - **Image Display**: Screenshots with click handlers for direct interaction 53 | - **Action Accordions**: Support for various computer actions (click, type, keypress, etc.) 54 | - **Tool Calls**: Full OpenAI-compatible tool call support 55 | - **Real-time Updates**: Smart polling for responsive UI updates 56 | 57 | ## Use Cases 58 | 59 | - **Evaluation**: Have humans evaluate agent performance and provide ground truth responses 60 | - **Demonstrations**: Create training data by having humans demonstrate tasks 61 | - **Interactive Control**: Take manual control when automated agents need human guidance 62 | - **Testing**: Validate agent, tool, and environment behavior manually 63 | 64 | --- 65 | ``` -------------------------------------------------------------------------------- /libs/lume/src/Utils/Utils.swift: -------------------------------------------------------------------------------- ```swift 1 | import Foundation 2 | import ArgumentParser 3 | 4 | extension Collection { 5 | subscript (safe index: Index) -> Element? { 6 | indices.contains(index) ? self[index] : nil 7 | } 8 | } 9 | 10 | func resolveBinaryPath(_ name: String) -> URL? { 11 | guard let path = ProcessInfo.processInfo.environment["PATH"] else { 12 | return nil 13 | } 14 | 15 | for pathComponent in path.split(separator: ":") { 16 | let url = URL(fileURLWithPath: String(pathComponent)) 17 | .appendingPathComponent(name, isDirectory: false) 18 | 19 | if FileManager.default.fileExists(atPath: url.path) { 20 | return url 21 | } 22 | } 23 | 24 | return nil 25 | } 26 | 27 | // Helper function to parse size strings 28 | func parseSize(_ input: String) throws -> UInt64 { 29 | let lowercased = input.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() 30 | let multiplier: Double 31 | let valueString: String 32 | 33 | if lowercased.hasSuffix("tb") { 34 | multiplier = 1024 * 1024 * 1024 * 1024 35 | valueString = String(lowercased.dropLast(2)) 36 | } else if lowercased.hasSuffix("gb") { 37 | multiplier = 1024 * 1024 * 1024 38 | valueString = String(lowercased.dropLast(2)) 39 | } else if lowercased.hasSuffix("mb") { 40 | multiplier = 1024 * 1024 41 | valueString = String(lowercased.dropLast(2)) 42 | } else if lowercased.hasSuffix("kb") { 43 | multiplier = 1024 44 | valueString = String(lowercased.dropLast(2)) 45 | } else { 46 | multiplier = 1024 * 1024 47 | valueString = lowercased 48 | } 49 | 50 | guard let value = Double(valueString.trimmingCharacters(in: .whitespacesAndNewlines)) else { 51 | throw ValidationError("Malformed size input: \(input). Could not parse numeric value.") 52 | } 53 | 54 | let bytesAsDouble = (value * multiplier).rounded() 55 | 56 | guard bytesAsDouble >= 0 && bytesAsDouble <= Double(UInt64.max) else { 57 | throw ValidationError("Calculated size out of bounds for UInt64: \(input)") 58 | } 59 | 60 | let val = UInt64(bytesAsDouble) 61 | 62 | return val 63 | } 64 | ``` -------------------------------------------------------------------------------- /.github/workflows/pypi-publish-som.yml: -------------------------------------------------------------------------------- ```yaml 1 | name: Publish SOM Package 2 | 3 | on: 4 | push: 5 | tags: 6 | - "som-v*" 7 | workflow_dispatch: 8 | inputs: 9 | version: 10 | description: "Version to publish (without v prefix)" 11 | required: true 12 | default: "0.1.0" 13 | workflow_call: 14 | inputs: 15 | version: 16 | description: "Version to publish" 17 | required: true 18 | type: string 19 | outputs: 20 | version: 21 | description: "The version that was published" 22 | value: ${{ jobs.determine-version.outputs.version }} 23 | 24 | # Adding permissions at workflow level 25 | permissions: 26 | contents: write 27 | 28 | jobs: 29 | determine-version: 30 | runs-on: macos-latest 31 | outputs: 32 | version: ${{ steps.get-version.outputs.version }} 33 | steps: 34 | - uses: actions/checkout@v4 35 | 36 | - name: Determine version 37 | id: get-version 38 | run: | 39 | if [ "${{ github.event_name }}" == "push" ]; then 40 | # Extract version from tag (for package-specific tags) 41 | if [[ "${{ github.ref }}" =~ ^refs/tags/som-v([0-9]+\.[0-9]+\.[0-9]+) ]]; then 42 | VERSION=${BASH_REMATCH[1]} 43 | else 44 | echo "Invalid tag format for som" 45 | exit 1 46 | fi 47 | elif [ "${{ github.event_name }}" == "workflow_dispatch" ]; then 48 | # Use version from workflow dispatch 49 | VERSION=${{ github.event.inputs.version }} 50 | else 51 | # Use version from workflow_call 52 | VERSION=${{ inputs.version }} 53 | fi 54 | echo "VERSION=$VERSION" 55 | echo "version=$VERSION" >> $GITHUB_OUTPUT 56 | 57 | publish: 58 | needs: determine-version 59 | uses: ./.github/workflows/pypi-reusable-publish.yml 60 | with: 61 | package_name: "som" 62 | package_dir: "libs/python/som" 63 | version: ${{ needs.determine-version.outputs.version }} 64 | is_lume_package: false 65 | base_package_name: "cua-som" 66 | secrets: 67 | PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} 68 | ``` -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- ```dockerfile 1 | FROM python:3.12-slim 2 | 3 | # Set environment variables 4 | ENV PYTHONUNBUFFERED=1 \ 5 | PYTHONDONTWRITEBYTECODE=1 \ 6 | PIP_NO_CACHE_DIR=1 \ 7 | PIP_DISABLE_PIP_VERSION_CHECK=1 \ 8 | PYTHONPATH="/app/libs/python/core:/app/libs/python/computer:/app/libs/python/agent:/app/libs/python/som:/app/libs/python/pylume:/app/libs/python/computer-server:/app/libs/python/mcp-server" 9 | 10 | # Install system dependencies for ARM architecture 11 | RUN apt-get update && apt-get install -y --no-install-recommends \ 12 | git \ 13 | build-essential \ 14 | libgl1-mesa-glx \ 15 | libglib2.0-0 \ 16 | libxcb-xinerama0 \ 17 | libxkbcommon-x11-0 \ 18 | cmake \ 19 | pkg-config \ 20 | curl \ 21 | iputils-ping \ 22 | net-tools \ 23 | sed \ 24 | xxd \ 25 | && apt-get clean \ 26 | && rm -rf /var/lib/apt/lists/* 27 | 28 | # Set working directory 29 | WORKDIR /app 30 | 31 | # Copy the entire project temporarily 32 | # We'll mount the real source code over this at runtime 33 | COPY . /app/ 34 | 35 | # Create a simple .env.local file for build.sh 36 | RUN echo "PYTHON_BIN=python" > /app/.env.local 37 | 38 | # Modify build.sh to skip virtual environment creation 39 | RUN sed -i 's/python -m venv .venv/echo "Skipping venv creation in Docker"/' /app/scripts/build.sh && \ 40 | sed -i 's/source .venv\/bin\/activate/echo "Skipping venv activation in Docker"/' /app/scripts/build.sh && \ 41 | sed -i 's/find . -type d -name ".venv" -exec rm -rf {} +/echo "Skipping .venv removal in Docker"/' /app/scripts/build.sh && \ 42 | chmod +x /app/scripts/build.sh 43 | 44 | # Run the build script to install dependencies 45 | RUN cd /app && ./scripts/build.sh 46 | 47 | # Clean up the source files now that dependencies are installed 48 | # When we run the container, we'll mount the actual source code 49 | RUN rm -rf /app/* /app/.??* 50 | 51 | # Note: This Docker image doesn't contain the lume executable (macOS-specific) 52 | # Instead, it relies on connecting to a lume server running on the host machine 53 | # via host.docker.internal:7777 54 | 55 | # Default command 56 | CMD ["bash"] ``` -------------------------------------------------------------------------------- /examples/computer-example-ts/src/helpers.ts: -------------------------------------------------------------------------------- ```typescript 1 | import type { Computer } from "@trycua/computer"; 2 | import type OpenAI from "openai"; 3 | 4 | export async function executeAction( 5 | computer: Computer, 6 | action: OpenAI.Responses.ResponseComputerToolCall["action"], 7 | ) { 8 | switch (action.type) { 9 | case "click": { 10 | const { x, y, button } = action; 11 | console.log(`Executing click at (${x}, ${y}) with button '${button}'.`); 12 | await computer.interface.moveCursor(x, y); 13 | if (button === "right") await computer.interface.rightClick(); 14 | else await computer.interface.leftClick(); 15 | break; 16 | } 17 | case "type": 18 | { 19 | const { text } = action; 20 | console.log(`Typing text: ${text}`); 21 | await computer.interface.typeText(text); 22 | } 23 | break; 24 | case "scroll": { 25 | const { x: locX, y: locY, scroll_x, scroll_y } = action; 26 | console.log( 27 | `Scrolling at (${locX}, ${locY}) with offsets (scroll_x=${scroll_x}, scroll_y=${scroll_y}).`, 28 | ); 29 | await computer.interface.moveCursor(locX, locY); 30 | await computer.interface.scroll(scroll_x, scroll_y); 31 | break; 32 | } 33 | case "keypress": { 34 | const { keys } = action; 35 | for (const key of keys) { 36 | console.log(`Pressing key: ${key}.`); 37 | // Map common key names to CUA equivalents 38 | if (key.toLowerCase() === "enter") { 39 | await computer.interface.pressKey("return"); 40 | } else if (key.toLowerCase() === "space") { 41 | await computer.interface.pressKey("space"); 42 | } else { 43 | await computer.interface.pressKey(key); 44 | } 45 | } 46 | break; 47 | } 48 | case "wait": { 49 | console.log(`Waiting for 3 seconds.`); 50 | await new Promise((resolve) => setTimeout(resolve, 3 * 1000)); 51 | break; 52 | } 53 | case "screenshot": { 54 | console.log("Taking screenshot."); 55 | // This is handled automatically in the main loop, but we can take an extra one if requested 56 | const screenshot = await computer.interface.screenshot(); 57 | return screenshot; 58 | } 59 | default: 60 | console.log(`Unrecognized action: ${action.type}`); 61 | break; 62 | } 63 | } 64 | ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/prompt-caching.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Prompt Caching 3 | sidebar_position: 8 4 | description: How to use prompt caching in ComputerAgent and agent loops. 5 | --- 6 | 7 | Prompt caching is a cost-saving feature offered by some LLM API providers that helps avoid reprocessing the same prompt, improving efficiency and reducing costs for repeated or long-running tasks. 8 | 9 | ## Usage 10 | 11 | The `use_prompt_caching` argument is available for `ComputerAgent` and agent loops: 12 | 13 | ```python 14 | agent = ComputerAgent( 15 | ..., 16 | use_prompt_caching=True, 17 | ) 18 | ``` 19 | 20 | - **Type:** `bool` 21 | - **Default:** `False` 22 | - **Purpose:** Use prompt caching to avoid reprocessing the same prompt. 23 | 24 | ## Anthropic CUAs 25 | 26 | When using Anthropic-based CUAs (Claude models), setting `use_prompt_caching=True` will automatically add `{ "cache_control": "ephemeral" }` to your messages. This enables prompt caching for the session and can speed up repeated runs with the same prompt. 27 | 28 | <Callout title="Note"> 29 | This argument is only required for Anthropic CUAs. For other providers, it is ignored. 30 | </Callout> 31 | 32 | ## OpenAI Provider 33 | 34 | With the OpenAI provider, prompt caching is handled automatically for prompts of 1000+ tokens. You do **not** need to set `use_prompt_caching`—caching will occur for long prompts without any extra configuration. 35 | 36 | ## Example 37 | 38 | ```python 39 | from agent import ComputerAgent 40 | agent = ComputerAgent( 41 | model="anthropic/claude-3-5-sonnet-20241022", 42 | use_prompt_caching=True, 43 | ) 44 | ``` 45 | 46 | ## Implementation Details 47 | - For Anthropic: Adds `{ "cache_control": "ephemeral" }` to messages when enabled. 48 | - For OpenAI: Caching is automatic for long prompts; the argument is ignored. 49 | 50 | ## When to Use 51 | - Enable for Anthropic CUAs if you want to avoid reprocessing the same prompt in repeated or iterative tasks. 52 | - Not needed for OpenAI models unless you want explicit ephemeral cache control (not required for most users). 53 | 54 | ## See Also 55 | - [Agent Loops](./agent-loops) 56 | - [Migration Guide](./migration-guide) 57 | ``` -------------------------------------------------------------------------------- /tests/test_telemetry.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Required environment variables: 3 | - CUA_API_KEY: API key for Cua cloud provider 4 | """ 5 | 6 | import os 7 | import pytest 8 | from pathlib import Path 9 | import sys 10 | 11 | # Load environment variables from .env file 12 | project_root = Path(__file__).parent.parent 13 | env_file = project_root / ".env" 14 | print(f"Loading environment from: {env_file}") 15 | from dotenv import load_dotenv 16 | 17 | load_dotenv(env_file) 18 | 19 | # Add paths to sys.path if needed 20 | pythonpath = os.environ.get("PYTHONPATH", "") 21 | for path in pythonpath.split(":"): 22 | if path and path not in sys.path: 23 | sys.path.insert(0, path) # Insert at beginning to prioritize 24 | print(f"Added to sys.path: {path}") 25 | 26 | from core.telemetry import record_event, is_telemetry_enabled, destroy_telemetry_client 27 | 28 | 29 | class TestTelemetry: 30 | def setup_method(self): 31 | """Reset environment variables before each test""" 32 | os.environ.pop('CUA_TELEMETRY', None) 33 | os.environ.pop('CUA_TELEMETRY_ENABLED', None) 34 | destroy_telemetry_client() 35 | 36 | def test_telemetry_disabled_when_cua_telemetry_is_off(self): 37 | """Should return false when CUA_TELEMETRY is off""" 38 | os.environ['CUA_TELEMETRY'] = 'off' 39 | assert is_telemetry_enabled() is False 40 | 41 | def test_telemetry_enabled_when_cua_telemetry_not_set(self): 42 | """Should return true when CUA_TELEMETRY is not set""" 43 | assert is_telemetry_enabled() is True 44 | 45 | def test_telemetry_disabled_when_cua_telemetry_enabled_is_0(self): 46 | """Should return false if CUA_TELEMETRY_ENABLED is 0""" 47 | os.environ['CUA_TELEMETRY_ENABLED'] = '0' 48 | assert is_telemetry_enabled() is False 49 | 50 | def test_send_test_event_to_posthog(self): 51 | """Should send a test event to PostHog""" 52 | # This should not raise an exception 53 | record_event('test_telemetry', {'message': 'Hello, world!'}) 54 | 55 | if __name__ == "__main__": 56 | # Run tests directly 57 | pytest.main([__file__, "-v"]) 58 | ``` -------------------------------------------------------------------------------- /libs/typescript/biome.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "$schema": "https://biomejs.dev/schemas/1.9.4/schema.json", 3 | "vcs": { 4 | "enabled": false, 5 | "clientKind": "git", 6 | "useIgnoreFile": false 7 | }, 8 | "files": { 9 | "ignoreUnknown": false, 10 | "include": ["core/**/*.ts", "computer/**/*.ts"], 11 | "ignore": ["dist", "node_modules"] 12 | }, 13 | "formatter": { 14 | "enabled": true, 15 | "useEditorconfig": true, 16 | "formatWithErrors": false, 17 | "indentStyle": "space", 18 | "indentWidth": 2, 19 | "lineEnding": "lf", 20 | "lineWidth": 80, 21 | "attributePosition": "auto", 22 | "bracketSpacing": true 23 | }, 24 | "organizeImports": { 25 | "enabled": true 26 | }, 27 | "linter": { 28 | "enabled": true, 29 | "rules": { 30 | "recommended": true, 31 | "style": { 32 | "useSelfClosingElements": "warn", 33 | "noUnusedTemplateLiteral": "warn", 34 | "noNonNullAssertion": "off" 35 | }, 36 | "a11y": { 37 | "useMediaCaption": "off", 38 | "useKeyWithClickEvents": "warn", 39 | "useKeyWithMouseEvents": "warn", 40 | "noSvgWithoutTitle": "off", 41 | "useButtonType": "warn", 42 | "noAutofocus": "off" 43 | }, 44 | "suspicious": { 45 | "noArrayIndexKey": "off" 46 | }, 47 | "correctness": { 48 | "noUnusedVariables": "warn", 49 | "noUnusedFunctionParameters": "warn", 50 | "noUnusedImports": "warn" 51 | }, 52 | "complexity": { 53 | "useOptionalChain": "info" 54 | }, 55 | "nursery": { 56 | "useSortedClasses": { 57 | "level": "warn", 58 | "fix": "safe", 59 | "options": { 60 | "attributes": ["className"], 61 | "functions": ["cn"] 62 | } 63 | } 64 | } 65 | } 66 | }, 67 | "javascript": { 68 | "formatter": { 69 | "jsxQuoteStyle": "double", 70 | "quoteProperties": "asNeeded", 71 | "trailingCommas": "es5", 72 | "semicolons": "always", 73 | "arrowParentheses": "always", 74 | "bracketSameLine": false, 75 | "quoteStyle": "single", 76 | "attributePosition": "auto", 77 | "bracketSpacing": true 78 | } 79 | } 80 | } 81 | ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/som/index.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Set-of-Mark 3 | description: Reference for the current version of the Set-of-Mark library. 4 | pypi: cua-som 5 | github: 6 | - https://github.com/trycua/cua/tree/main/libs/python/som 7 | --- 8 | 9 | <Callout>A corresponding <a href="https://github.com/trycua/cua/blob/main/examples/som_examples.py" target="_blank">Python example</a> is available for this documentation.</Callout> 10 | 11 | ## Overview 12 | 13 | The SOM library provides visual element detection and interaction capabilities. It is based on the [Set-of-Mark](https://arxiv.org/abs/2310.11441) research paper and the [OmniParser](https://github.com/microsoft/OmniParser) model. 14 | 15 | ## API Documentation 16 | 17 | ### OmniParser Class 18 | 19 | ```python 20 | class OmniParser: 21 | def __init__(self, device: str = "auto"): 22 | """Initialize the parser with automatic device detection""" 23 | 24 | def parse( 25 | self, 26 | image: PIL.Image, 27 | box_threshold: float = 0.3, 28 | iou_threshold: float = 0.1, 29 | use_ocr: bool = True, 30 | ocr_engine: str = "easyocr" 31 | ) -> ParseResult: 32 | """Parse UI elements from an image""" 33 | ``` 34 | 35 | ### ParseResult Object 36 | 37 | ```python 38 | @dataclass 39 | class ParseResult: 40 | elements: List[UIElement] # Detected elements 41 | visualized_image: PIL.Image # Annotated image 42 | processing_time: float # Time in seconds 43 | 44 | def to_dict(self) -> dict: 45 | """Convert to JSON-serializable dictionary""" 46 | 47 | def filter_by_type(self, elem_type: str) -> List[UIElement]: 48 | """Filter elements by type ('icon' or 'text')""" 49 | ``` 50 | 51 | ### UIElement 52 | 53 | ```python 54 | class UIElement(BaseModel): 55 | id: Optional[int] = Field(None) # Element ID (1-indexed) 56 | type: Literal["icon", "text"] # Element type 57 | bbox: BoundingBox # Bounding box coordinates { x1, y1, x2, y2 } 58 | interactivity: bool = Field(default=False) # Whether the element is interactive 59 | confidence: float = Field(default=1.0) # Detection confidence 60 | ``` 61 | ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/computer-server/WebSocket-API.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: WebSocket API Reference 3 | description: Reference for the /ws WebSocket endpoint of the Computer Server. 4 | --- 5 | 6 | # WebSocket API Reference 7 | 8 | The Computer Server exposes a WebSocket endpoint for real-time command execution and streaming results. 9 | 10 | - `ws://localhost:8000/ws` 11 | - `wss://your-container.containers.cloud.trycua.com:8443/ws` (cloud) 12 | 13 | ### Authentication (Cloud Only) 14 | For cloud containers, you must authenticate immediately after connecting: 15 | ```json 16 | { 17 | "command": "authenticate", 18 | "params": { 19 | "container_name": "your-container", 20 | "api_key": "your-api-key" 21 | } 22 | } 23 | ``` 24 | If authentication fails, the connection is closed. 25 | 26 | ### Command Format 27 | Send JSON messages: 28 | ```json 29 | { 30 | "command": "<command_name>", 31 | "params": { ... } 32 | } 33 | ``` 34 | 35 | ### Example (Python) 36 | ```python 37 | import websockets 38 | import asyncio 39 | import json 40 | 41 | async def main(): 42 | uri = "ws://localhost:8000/ws" 43 | async with websockets.connect(uri) as ws: 44 | await ws.send(json.dumps({"command": "version", "params": {}})) 45 | response = await ws.recv() 46 | print(response) 47 | 48 | asyncio.run(main()) 49 | ``` 50 | 51 | ### Example (Cloud) 52 | ```python 53 | import websockets 54 | import asyncio 55 | import json 56 | 57 | async def main(): 58 | uri = "wss://your-container.containers.cloud.trycua.com:8443/ws" 59 | async with websockets.connect(uri) as ws: 60 | await ws.send(json.dumps({ 61 | "command": "authenticate", 62 | "params": { 63 | "container_name": "your-container", 64 | "api_key": "your-api-key" 65 | } 66 | })) 67 | auth_response = await ws.recv() 68 | print(auth_response) 69 | await ws.send(json.dumps({"command": "version", "params": {}})) 70 | response = await ws.recv() 71 | print(response) 72 | 73 | asyncio.run(main()) 74 | ``` 75 | 76 | ### Response Format 77 | Each response is a JSON object: 78 | ```json 79 | { 80 | "success": true, 81 | ... 82 | } 83 | ``` 84 | 85 | ### Supported Commands 86 | See [Commands Reference](./Commands) for the full list of commands and parameters. 87 | ``` -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- ```toml 1 | [build-system] 2 | build-backend = "pdm.backend" 3 | requires = ["pdm-backend"] 4 | 5 | [project] 6 | authors = [{ name = "TryCua", email = "[email protected]" }] 7 | dependencies = [ 8 | "openai<1.100.0", 9 | "anthropic>=0.67.0", 10 | ] 11 | description = "CUA (Computer Use Agent) mono-repo" 12 | license = { text = "MIT" } 13 | name = "cua-workspace" 14 | readme = "README.md" 15 | requires-python = "<3.14,>=3.12" 16 | version = "0.1.0" 17 | 18 | [project.urls] 19 | repository = "https://github.com/trycua/cua" 20 | 21 | [dependency-groups] 22 | dev = [] 23 | examples = [] 24 | 25 | [tool.pdm] 26 | distribution = false 27 | 28 | [tool.pdm.dev-dependencies] 29 | dev = [ 30 | "-e core @ file:///${PROJECT_ROOT}/libs/python/core", 31 | "-e agent @ file:///${PROJECT_ROOT}/libs/python/agent", 32 | "-e computer @ file:///${PROJECT_ROOT}/libs/python/computer", 33 | "-e computer-server @ file:///${PROJECT_ROOT}/libs/python/computer-server", 34 | "-e cua-som @ file:///${PROJECT_ROOT}/libs/python/som", 35 | "-e mcp-server @ file:///${PROJECT_ROOT}/libs/python/mcp-server", 36 | "-e pylume @ file:///${PROJECT_ROOT}/libs/python/pylume", 37 | "black>=23.0.0", 38 | "ipykernel>=6.29.5", 39 | "jedi>=0.19.2", 40 | "jupyter>=1.0.0", 41 | "mypy>=1.10.0", 42 | "ruff>=0.9.2", 43 | "types-requests>=2.31.0", 44 | "hud-python[agent]==0.4.52" 45 | ] 46 | docs = ["mkdocs-material>=9.2.0", "mkdocs>=1.5.0"] 47 | test = [ 48 | "aioresponses>=0.7.4", 49 | "pytest-asyncio>=0.21.1", 50 | "pytest-cov>=4.1.0", 51 | "pytest-mock>=3.10.0", 52 | "pytest-xdist>=3.6.1", 53 | "pytest>=8.0.0", 54 | ] 55 | 56 | [tool.pdm.resolution] 57 | respect-source-order = true 58 | 59 | [tool.black] 60 | line-length = 100 61 | target-version = ["py311"] 62 | 63 | [tool.ruff] 64 | fix = true 65 | line-length = 100 66 | select = ["B", "E", "F", "I"] 67 | target-version = "py311" 68 | 69 | [tool.ruff.format] 70 | docstring-code-format = true 71 | 72 | [tool.mypy] 73 | check_untyped_defs = true 74 | disallow_untyped_defs = true 75 | ignore_missing_imports = true 76 | python_version = "3.11" 77 | show_error_codes = true 78 | strict = true 79 | warn_return_any = true 80 | warn_unused_ignores = false 81 | 82 | [tool.pytest.ini_options] 83 | asyncio_mode = "auto" 84 | python_files = "test_*.py" 85 | testpaths = ["libs/*/tests"] 86 | ``` -------------------------------------------------------------------------------- /libs/python/agent/pyproject.toml: -------------------------------------------------------------------------------- ```toml 1 | [build-system] 2 | requires = ["pdm-backend"] 3 | build-backend = "pdm.backend" 4 | 5 | [project] 6 | name = "cua-agent" 7 | version = "0.4.32" 8 | description = "CUA (Computer Use) Agent for AI-driven computer interaction" 9 | readme = "README.md" 10 | authors = [ 11 | { name = "TryCua", email = "[email protected]" } 12 | ] 13 | dependencies = [ 14 | "httpx>=0.27.0", 15 | "aiohttp>=3.9.3", 16 | "asyncio", 17 | "anyio>=4.4.1", 18 | "typing-extensions>=4.12.2", 19 | "pydantic>=2.6.4", 20 | "rich>=13.7.1", 21 | "python-dotenv>=1.0.1", 22 | "cua-computer>=0.4.0,<0.5.0", 23 | "cua-core>=0.1.8,<0.2.0", 24 | "certifi>=2024.2.2", 25 | "litellm>=1.74.12" 26 | ] 27 | requires-python = ">=3.12" 28 | 29 | [project.optional-dependencies] 30 | openai = [] 31 | anthropic = [] 32 | omni = [ 33 | "cua-som>=0.1.0,<0.2.0", 34 | ] 35 | uitars = [] 36 | uitars-mlx = [ 37 | "mlx-vlm>=0.1.27; sys_platform == 'darwin'" 38 | ] 39 | uitars-hf = [ 40 | "accelerate", 41 | "torch", 42 | "transformers>=4.54.0" 43 | ] 44 | glm45v-hf = [ 45 | "accelerate", 46 | "torch", 47 | "transformers-v4.55.0-GLM-4.5V-preview" 48 | ] 49 | opencua-hf = [ 50 | "accelerate", 51 | "torch", 52 | "transformers==4.53.0", 53 | "tiktoken>=0.11.0", 54 | "blobfile>=3.0.0" 55 | ] 56 | internvl-hf = [ 57 | "accelerate", 58 | "torch", 59 | "transformers>=4.55.0", 60 | "einops", 61 | "timm" 62 | ] 63 | ui = [ 64 | "gradio>=5.23.3", 65 | "python-dotenv>=1.0.1", 66 | ] 67 | cli = [ 68 | "yaspin>=3.1.0", 69 | ] 70 | hud = [ 71 | "hud-python==0.4.52", 72 | ] 73 | gemini = [ 74 | "google-genai>=1.41.0", 75 | ] 76 | all = [ 77 | # uitars requirements 78 | "mlx-vlm>=0.1.27; sys_platform == 'darwin'", 79 | "accelerate", 80 | "torch", 81 | "transformers>=4.55.0", 82 | # internvl requirements, 83 | "einops", 84 | "timm", 85 | # opencua requirements 86 | "tiktoken>=0.11.0", 87 | "blobfile>=3.0.0", 88 | # ui requirements 89 | "gradio>=5.23.3", 90 | "python-dotenv>=1.0.1", 91 | # cli requirements 92 | "yaspin>=3.1.0", 93 | # hud requirements 94 | "hud-python==0.4.52", 95 | # gemini requirements 96 | "google-genai>=1.41.0", 97 | ] 98 | 99 | [tool.uv] 100 | constraint-dependencies = ["fastrtc>0.43.0", "mlx-audio>0.2.3"] 101 | 102 | [tool.pdm] 103 | distribution = true 104 | 105 | [tool.pdm.build] 106 | includes = ["agent/"] 107 | ``` -------------------------------------------------------------------------------- /docs/content/docs/libraries/som/configuration.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Configuration 3 | --- 4 | 5 | ### Detection Parameters 6 | 7 | #### Box Threshold (0.3) 8 | Controls the confidence threshold for accepting detections: 9 | <img src="/docs/img/som_box_threshold.png" alt="Illustration of confidence thresholds in object detection, with a high-confidence detection accepted and a low-confidence detection rejected." width="500px" /> 10 | - Higher values (0.3) yield more precise but fewer detections 11 | - Lower values (0.01) catch more potential icons but increase false positives 12 | - Default is 0.3 for optimal precision/recall balance 13 | 14 | #### IOU Threshold (0.1) 15 | Controls how overlapping detections are merged: 16 | <img src="/docs/img/som_iou_threshold.png" alt="Diagram showing Intersection over Union (IOU) with low overlap between two boxes kept separate and high overlap leading to merging." width="500px" /> 17 | - Lower values (0.1) more aggressively remove overlapping boxes 18 | - Higher values (0.5) allow more overlapping detections 19 | - Default is 0.1 to handle densely packed UI elements 20 | 21 | ### OCR Configuration 22 | 23 | - **Engine**: EasyOCR 24 | - Primary choice for all platforms 25 | - Fast initialization and processing 26 | - Built-in English language support 27 | - GPU acceleration when available 28 | 29 | - **Settings**: 30 | - Timeout: 5 seconds 31 | - Confidence threshold: 0.5 32 | - Paragraph mode: Disabled 33 | - Language: English only 34 | 35 | ## Performance 36 | 37 | ### Hardware Acceleration 38 | 39 | #### MPS (Metal Performance Shaders) 40 | - Multi-scale detection (640px, 1280px, 1920px) 41 | - Test-time augmentation enabled 42 | - Half-precision (FP16) 43 | - Average detection time: ~0.4s 44 | - Best for production use when available 45 | 46 | #### CPU 47 | - Single-scale detection (1280px) 48 | - Full-precision (FP32) 49 | - Average detection time: ~1.3s 50 | - Reliable fallback option 51 | 52 | ### Example Output Structure 53 | 54 | ``` 55 | examples/output/ 56 | ├── {timestamp}_no_ocr/ 57 | │ ├── annotated_images/ 58 | │ │ └── screenshot_analyzed.png 59 | │ ├── screen_details.txt 60 | │ └── summary.json 61 | └── {timestamp}_ocr/ 62 | ├── annotated_images/ 63 | │ └── screenshot_analyzed.png 64 | ├── screen_details.txt 65 | └── summary.json 66 | ``` ``` -------------------------------------------------------------------------------- /libs/python/computer-server/examples/usage_example.py: -------------------------------------------------------------------------------- ```python 1 | #!/usr/bin/env python3 2 | """ 3 | Example showing how to use the CUA Computer API as an imported package. 4 | """ 5 | 6 | import asyncio 7 | import logging 8 | from typing import TYPE_CHECKING 9 | 10 | # For type checking only 11 | if TYPE_CHECKING: 12 | from computer_api import Server 13 | 14 | # Setup logging 15 | logging.basicConfig( 16 | level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" 17 | ) 18 | logger = logging.getLogger(__name__) 19 | 20 | 21 | # Example 1: Synchronous usage (blocks until server is stopped) 22 | def example_sync(): 23 | """ 24 | Example of synchronous server usage. This will block until interrupted. 25 | Run with: python3 -m examples.usage_example sync 26 | """ 27 | # Import directly to avoid any confusion 28 | from computer_api.server import Server 29 | 30 | server = Server(port=8080) 31 | print("Server started at http://localhost:8080") 32 | print("Press Ctrl+C to stop the server") 33 | 34 | try: 35 | server.start() # This will block until the server is stopped 36 | except KeyboardInterrupt: 37 | print("Server stopped by user") 38 | 39 | 40 | # Example 2: Asynchronous usage 41 | async def example_async(): 42 | """ 43 | Example of asynchronous server usage. This will start the server in the background 44 | and allow other operations to run concurrently. 45 | Run with: python3 -m examples.usage_example async 46 | """ 47 | # Import directly to avoid any confusion 48 | from computer_api.server import Server 49 | 50 | server = Server(port=8080) 51 | 52 | # Start the server in the background 53 | await server.start_async() 54 | 55 | print("Server is running in the background") 56 | print("Performing other tasks...") 57 | 58 | # Do other things while the server is running 59 | for i in range(5): 60 | print(f"Doing work iteration {i+1}/5...") 61 | await asyncio.sleep(2) 62 | 63 | print("Work complete, stopping server...") 64 | 65 | # Stop the server when done 66 | await server.stop() 67 | print("Server stopped") 68 | 69 | 70 | if __name__ == "__main__": 71 | import sys 72 | 73 | if len(sys.argv) > 1 and sys.argv[1] == "async": 74 | asyncio.run(example_async()) 75 | else: 76 | example_sync() 77 | ``` -------------------------------------------------------------------------------- /libs/lume/tests/VMVirtualizationServiceTests.swift: -------------------------------------------------------------------------------- ```swift 1 | import Foundation 2 | import Testing 3 | import Virtualization 4 | @testable import lume 5 | 6 | @Test("VMVirtualizationService starts correctly") 7 | func testVMVirtualizationServiceStart() async throws { 8 | let service = MockVMVirtualizationService() 9 | 10 | // Initial state 11 | #expect(await service.state == .stopped) 12 | #expect(await service.startCallCount == 0) 13 | 14 | // Start service 15 | try await service.start() 16 | #expect(await service.state == .running) 17 | #expect(await service.startCallCount == 1) 18 | } 19 | 20 | @Test("VMVirtualizationService stops correctly") 21 | func testVMVirtualizationServiceStop() async throws { 22 | let service = MockVMVirtualizationService() 23 | 24 | // Start then stop 25 | try await service.start() 26 | try await service.stop() 27 | 28 | #expect(await service.state == .stopped) 29 | #expect(await service.stopCallCount == 1) 30 | } 31 | 32 | @Test("VMVirtualizationService handles pause and resume") 33 | func testVMVirtualizationServicePauseResume() async throws { 34 | let service = MockVMVirtualizationService() 35 | 36 | // Start and pause 37 | try await service.start() 38 | try await service.pause() 39 | #expect(await service.state == .paused) 40 | #expect(await service.pauseCallCount == 1) 41 | 42 | // Resume 43 | try await service.resume() 44 | #expect(await service.state == .running) 45 | #expect(await service.resumeCallCount == 1) 46 | } 47 | 48 | @Test("VMVirtualizationService handles operation failures") 49 | func testVMVirtualizationServiceFailures() async throws { 50 | let service = MockVMVirtualizationService() 51 | await service.configure(shouldFail: true) 52 | 53 | // Test start failure 54 | do { 55 | try await service.start() 56 | #expect(Bool(false), "Expected start to throw") 57 | } catch let error as VMError { 58 | switch error { 59 | case .internalError(let message): 60 | #expect(message == "Mock operation failed") 61 | default: 62 | #expect(Bool(false), "Unexpected error type: \(error)") 63 | } 64 | } 65 | 66 | #expect(await service.state == .stopped) 67 | #expect(await service.startCallCount == 1) 68 | } ``` -------------------------------------------------------------------------------- /libs/python/som/pyproject.toml: -------------------------------------------------------------------------------- ```toml 1 | [build-system] 2 | requires = ["pdm-backend"] 3 | build-backend = "pdm.backend" 4 | 5 | [project] 6 | name = "cua-som" 7 | version = "0.1.3" 8 | description = "Computer Vision and OCR library for detecting and analyzing UI elements" 9 | authors = [ 10 | { name = "TryCua", email = "[email protected]" } 11 | ] 12 | dependencies = [ 13 | "torch>=2.2.1", 14 | "torchvision>=0.17.1", 15 | "ultralytics>=8.1.28", 16 | "easyocr>=1.7.1", 17 | "numpy>=1.26.4", 18 | "pillow>=10.2.0", 19 | "setuptools>=75.8.1", 20 | "opencv-python-headless>=4.11.0.86", 21 | "matplotlib>=3.8.3", 22 | "huggingface-hub>=0.21.4", 23 | "supervision>=0.25.1", 24 | "typing-extensions>=4.9.0", 25 | "pydantic>=2.6.3" 26 | ] 27 | requires-python = ">=3.11" 28 | readme = "README.md" 29 | license = {text = "AGPL-3.0-or-later"} 30 | keywords = ["computer-vision", "ocr", "ui-analysis", "icon-detection"] 31 | classifiers = [ 32 | "Development Status :: 4 - Beta", 33 | "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)", 34 | "Intended Audience :: Developers", 35 | "Programming Language :: Python :: 3", 36 | "Programming Language :: Python :: 3.11", 37 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 38 | "Topic :: Scientific/Engineering :: Image Recognition" 39 | ] 40 | 41 | [project.urls] 42 | Homepage = "https://github.com/trycua/cua" 43 | Repository = "https://github.com/trycua/cua" 44 | Documentation = "https://github.com/trycua/cua/tree/main/docs" 45 | 46 | [tool.pdm] 47 | distribution = true 48 | package-type = "library" 49 | src-layout = false 50 | 51 | [tool.pdm.build] 52 | includes = ["som/"] 53 | source-includes = ["tests/", "README.md", "LICENSE"] 54 | 55 | [tool.black] 56 | line-length = 100 57 | target-version = ["py311"] 58 | 59 | [tool.ruff] 60 | line-length = 100 61 | target-version = "py311" 62 | select = ["E", "F", "B", "I"] 63 | fix = true 64 | 65 | [tool.ruff.format] 66 | docstring-code-format = true 67 | 68 | [tool.mypy] 69 | strict = true 70 | python_version = "3.11" 71 | ignore_missing_imports = true 72 | disallow_untyped_defs = true 73 | check_untyped_defs = true 74 | warn_return_any = true 75 | show_error_codes = true 76 | warn_unused_ignores = false 77 | 78 | [tool.pytest.ini_options] 79 | asyncio_mode = "auto" 80 | testpaths = ["tests"] 81 | python_files = "test_*.py" 82 | ``` -------------------------------------------------------------------------------- /libs/lumier/src/hooks/on-logon.sh: -------------------------------------------------------------------------------- ```bash 1 | #!/bin/bash 2 | 3 | # Arguments passed from execute_remote_script in vm.sh 4 | # $1: VNC_PASSWORD 5 | # $2: HOST_SHARED_PATH (Path inside VM where host shared dir is mounted, e.g., /Volumes/My Shared Files) 6 | 7 | VNC_PASSWORD="$1" 8 | # IMPORTANT: In the VM, the shared folder is always mounted at this fixed location 9 | HOST_SHARED_PATH="/Volumes/My Shared Files" 10 | 11 | # Set default value for VNC_DEBUG if not provided 12 | VNC_DEBUG=${VNC_DEBUG:-0} 13 | 14 | # Define the path to the user's optional on-logon script within the shared folder 15 | USER_ON_LOGON_SCRIPT_PATH="$HOST_SHARED_PATH/lifecycle/on-logon.sh" 16 | 17 | # Show basic information when debug is enabled 18 | if [ "$VNC_DEBUG" = "1" ]; then 19 | echo "[VM] Lumier lifecycle script starting" 20 | echo "[VM] Looking for user script: $USER_ON_LOGON_SCRIPT_PATH" 21 | fi 22 | 23 | # Check if the user-provided script exists 24 | if [ -f "$USER_ON_LOGON_SCRIPT_PATH" ]; then 25 | if [ "$VNC_DEBUG" = "1" ]; then 26 | echo "[VM] Found user script: $USER_ON_LOGON_SCRIPT_PATH" 27 | fi 28 | 29 | # Always show what script we're executing 30 | echo "[VM] Executing user lifecycle script" 31 | 32 | # Make script executable 33 | chmod +x "$USER_ON_LOGON_SCRIPT_PATH" 34 | 35 | # Execute the user script in a subshell with error output captured 36 | "$USER_ON_LOGON_SCRIPT_PATH" "$VNC_PASSWORD" "$HOST_SHARED_PATH" 2>&1 37 | 38 | # Capture exit code 39 | USER_SCRIPT_EXIT_CODE=$? 40 | 41 | # Always report script execution results 42 | if [ $USER_SCRIPT_EXIT_CODE -eq 0 ]; then 43 | echo "[VM] User lifecycle script completed successfully" 44 | else 45 | echo "[VM] User lifecycle script failed with exit code: $USER_SCRIPT_EXIT_CODE" 46 | fi 47 | 48 | # Check results (only in debug mode) 49 | if [ "$VNC_DEBUG" = "1" ]; then 50 | # List any files created by the script 51 | echo "[VM] Files created by user script:" 52 | ls -la /Users/lume/Desktop/hello_*.txt 2>/dev/null || echo "[VM] No script-created files found" 53 | fi 54 | else 55 | if [ "$VNC_DEBUG" = "1" ]; then 56 | echo "[VM] No user lifecycle script found" 57 | fi 58 | fi 59 | 60 | exit 0 # Ensure the entry point script exits cleanly 61 | ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/callbacks/cost-saving.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Cost Optimization 3 | description: Budget management and image retention for cost optimization 4 | --- 5 | 6 | # Cost Optimization Callbacks 7 | 8 | Optimize agent costs with budget management and image retention callbacks. 9 | 10 | ## Budget Manager Callbacks Example 11 | 12 | ```python 13 | from agent.callbacks import BudgetManagerCallback 14 | 15 | agent = ComputerAgent( 16 | model="anthropic/claude-3-5-sonnet-20241022", 17 | tools=[computer], 18 | callbacks=[ 19 | BudgetManagerCallback( 20 | max_budget=5.0, # $5 limit 21 | reset_after_each_run=False, 22 | raise_error=True 23 | ) 24 | ] 25 | ) 26 | ``` 27 | 28 | ## Budget Manager Shorthand 29 | 30 | ```python 31 | # Simple budget limit 32 | agent = ComputerAgent( 33 | model="anthropic/claude-3-5-sonnet-20241022", 34 | max_trajectory_budget=5.0 # $5 limit 35 | ) 36 | ``` 37 | 38 | **Or with options:** 39 | ```python 40 | # Advanced budget configuration 41 | agent = ComputerAgent( 42 | model="anthropic/claude-3-5-sonnet-20241022", 43 | max_trajectory_budget={ 44 | "max_budget": 10.0, 45 | "raise_error": True, # Raise error when exceeded 46 | "reset_after_each_run": False # Persistent across runs 47 | } 48 | ) 49 | ``` 50 | 51 | ## Image Retention Callbacks Example 52 | 53 | ```python 54 | from agent.callbacks import ImageRetentionCallback 55 | 56 | agent = ComputerAgent( 57 | model="anthropic/claude-3-5-sonnet-20241022", 58 | tools=[computer], 59 | callbacks=[ 60 | ImageRetentionCallback(only_n_most_recent_images=3) 61 | ] 62 | ) 63 | ``` 64 | 65 | ## Image Retention Shorthand 66 | 67 | ```python 68 | agent = ComputerAgent( 69 | model="anthropic/claude-3-5-sonnet-20241022", 70 | tools=[computer], 71 | only_n_most_recent_images=3 # Auto-adds ImageRetentionCallback 72 | ) 73 | ``` 74 | 75 | ## Combined Cost Optimization 76 | 77 | ```python 78 | agent = ComputerAgent( 79 | model="anthropic/claude-3-5-sonnet-20241022", 80 | tools=[computer], 81 | max_trajectory_budget=5.0, # Budget limit 82 | only_n_most_recent_images=3, # Image retention 83 | trajectory_dir="trajectories" # Track spending 84 | ) 85 | ``` 86 | 87 | ## Budget Manager Options 88 | 89 | - `max_budget`: Dollar limit for trajectory 90 | - `reset_after_each_run`: Reset budget per run (default: True) 91 | - `raise_error`: Raise exception vs. graceful stop (default: False) 92 | ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/callbacks/index.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Callbacks 3 | --- 4 | 5 | Callbacks in the Agent SDK provide hooks into the agent's lifecycle, allowing for custom functionality to be executed at various stages of an agent's run. They enable extensibility by allowing developers to integrate their own logic for tasks such as logging, cost management, and data anonymization. 6 | 7 | ## Usage 8 | 9 | You can add preprocessing and postprocessing hooks using callbacks, or write your own by subclassing `AsyncCallbackHandler`. 10 | 11 | ### Built-in Callbacks 12 | 13 | Built-in callbacks can be used as follows: 14 | 15 | ```python 16 | from agent.callbacks import ( 17 | ImageRetentionCallback, 18 | TrajectorySaverCallback, 19 | BudgetManagerCallback, 20 | LoggingCallback 21 | ) 22 | 23 | agent = ComputerAgent( 24 | model="anthropic/claude-3-5-sonnet-20241022", 25 | tools=[computer], 26 | callbacks=[ 27 | ImageRetentionCallback(only_n_most_recent_images=3), 28 | TrajectorySaverCallback(trajectory_dir="trajectories"), 29 | BudgetManagerCallback(max_budget=10.0, raise_error=True), 30 | LoggingCallback(level=logging.INFO) 31 | ] 32 | ) 33 | ``` 34 | 35 | The following built-in callbacks are available: 36 | 37 | - [BudgetManagerCallback](callbacks/cost-saving): Stops execution when budget exceeded 38 | - [LoggingCallback](callbacks/trajectories): Logs agent activities 39 | - **ImageRetentionCallback**: Limits recent images in context 40 | - **TrajectorySaverCallback**: Saves conversation trajectories 41 | - [PII Anonymization](callbacks/pii-anonymization) 42 | 43 | ### Custom Callbacks 44 | 45 | Create custom callbacks using knowlege of the callback lifecycle as described in [Agent Lifecycle](callbacks/agent-lifecycle). 46 | 47 | ```python 48 | from agent.callbacks.base import AsyncCallbackHandler 49 | 50 | class CustomCallback(AsyncCallbackHandler): 51 | async def on_llm_start(self, messages): 52 | """Preprocess messages before LLM call""" 53 | # Add custom preprocessing logic 54 | return messages 55 | 56 | async def on_llm_end(self, messages): 57 | """Postprocess messages after LLM call""" 58 | # Add custom postprocessing logic 59 | return messages 60 | 61 | async def on_usage(self, usage): 62 | """Track usage information""" 63 | print(f"Tokens used: {usage.total_tokens}") 64 | ``` 65 | ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/computers/base.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Base computer interface protocol for agent interactions. 3 | """ 4 | 5 | from typing import Protocol, Literal, List, Dict, Any, Union, Optional, runtime_checkable 6 | 7 | 8 | @runtime_checkable 9 | class AsyncComputerHandler(Protocol): 10 | """Protocol defining the interface for computer interactions.""" 11 | 12 | # ==== Computer-Use-Preview Action Space ==== 13 | 14 | async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]: 15 | """Get the current environment type.""" 16 | ... 17 | 18 | async def get_dimensions(self) -> tuple[int, int]: 19 | """Get screen dimensions as (width, height).""" 20 | ... 21 | 22 | async def screenshot(self) -> str: 23 | """Take a screenshot and return as base64 string.""" 24 | ... 25 | 26 | async def click(self, x: int, y: int, button: str = "left") -> None: 27 | """Click at coordinates with specified button.""" 28 | ... 29 | 30 | async def double_click(self, x: int, y: int) -> None: 31 | """Double click at coordinates.""" 32 | ... 33 | 34 | async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: 35 | """Scroll at coordinates with specified scroll amounts.""" 36 | ... 37 | 38 | async def type(self, text: str) -> None: 39 | """Type text.""" 40 | ... 41 | 42 | async def wait(self, ms: int = 1000) -> None: 43 | """Wait for specified milliseconds.""" 44 | ... 45 | 46 | async def move(self, x: int, y: int) -> None: 47 | """Move cursor to coordinates.""" 48 | ... 49 | 50 | async def keypress(self, keys: Union[List[str], str]) -> None: 51 | """Press key combination.""" 52 | ... 53 | 54 | async def drag(self, path: List[Dict[str, int]]) -> None: 55 | """Drag along specified path.""" 56 | ... 57 | 58 | async def get_current_url(self) -> str: 59 | """Get current URL (for browser environments).""" 60 | ... 61 | 62 | # ==== Anthropic Action Space ==== 63 | 64 | async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None: 65 | """Left mouse down at coordinates.""" 66 | ... 67 | 68 | async def left_mouse_up(self, x: Optional[int] = None, y: Optional[int] = None) -> None: 69 | """Left mouse up at coordinates.""" 70 | ... 71 | ``` -------------------------------------------------------------------------------- /libs/typescript/computer/tests/interface/factory.test.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { describe, expect, it } from 'vitest'; 2 | import { InterfaceFactory } from '../../src/interface/factory.ts'; 3 | import { LinuxComputerInterface } from '../../src/interface/linux.ts'; 4 | import { MacOSComputerInterface } from '../../src/interface/macos.ts'; 5 | import { WindowsComputerInterface } from '../../src/interface/windows.ts'; 6 | import { OSType } from '../../src/types.ts'; 7 | 8 | describe('InterfaceFactory', () => { 9 | const testParams = { 10 | ipAddress: '192.168.1.100', 11 | username: 'testuser', 12 | password: 'testpass', 13 | apiKey: 'test-api-key', 14 | vmName: 'test-vm', 15 | }; 16 | 17 | describe('createInterfaceForOS', () => { 18 | it('should create MacOSComputerInterface for macOS', () => { 19 | const interface_ = InterfaceFactory.createInterfaceForOS( 20 | OSType.MACOS, 21 | testParams.ipAddress, 22 | testParams.apiKey, 23 | testParams.vmName 24 | ); 25 | 26 | expect(interface_).toBeInstanceOf(MacOSComputerInterface); 27 | }); 28 | 29 | it('should create LinuxComputerInterface for Linux', () => { 30 | const interface_ = InterfaceFactory.createInterfaceForOS( 31 | OSType.LINUX, 32 | testParams.ipAddress, 33 | testParams.apiKey, 34 | testParams.vmName 35 | ); 36 | 37 | expect(interface_).toBeInstanceOf(LinuxComputerInterface); 38 | }); 39 | 40 | it('should create WindowsComputerInterface for Windows', () => { 41 | const interface_ = InterfaceFactory.createInterfaceForOS( 42 | OSType.WINDOWS, 43 | testParams.ipAddress, 44 | testParams.apiKey, 45 | testParams.vmName 46 | ); 47 | 48 | expect(interface_).toBeInstanceOf(WindowsComputerInterface); 49 | }); 50 | 51 | it('should throw error for unsupported OS type', () => { 52 | expect(() => { 53 | InterfaceFactory.createInterfaceForOS( 54 | 'unsupported' as OSType, 55 | testParams.ipAddress, 56 | testParams.apiKey, 57 | testParams.vmName 58 | ); 59 | }).toThrow('Unsupported OS type: unsupported'); 60 | }); 61 | 62 | it('should create interface without API key and VM name', () => { 63 | const interface_ = InterfaceFactory.createInterfaceForOS( 64 | OSType.MACOS, 65 | testParams.ipAddress 66 | ); 67 | 68 | expect(interface_).toBeInstanceOf(MacOSComputerInterface); 69 | }); 70 | }); 71 | }); 72 | ``` -------------------------------------------------------------------------------- /.github/scripts/get_pyproject_version.py: -------------------------------------------------------------------------------- ```python 1 | #!/usr/bin/env python3 2 | """ 3 | Verifies that the version in pyproject.toml matches the expected version. 4 | 5 | Usage: 6 | python get_pyproject_version.py <pyproject_path> <expected_version> 7 | 8 | Exit codes: 9 | 0 - Versions match 10 | 1 - Versions don't match or error occurred 11 | """ 12 | 13 | import sys 14 | try: 15 | import tomllib 16 | except ImportError: 17 | # Fallback for Python < 3.11 18 | import toml as tomllib 19 | 20 | 21 | def main(): 22 | if len(sys.argv) != 3: 23 | print("Usage: python get_pyproject_version.py <pyproject_path> <expected_version>", file=sys.stderr) 24 | sys.exit(1) 25 | 26 | pyproject_path = sys.argv[1] 27 | expected_version = sys.argv[2] 28 | 29 | # tomllib requires binary mode 30 | try: 31 | with open(pyproject_path, 'rb') as f: 32 | data = tomllib.load(f) 33 | except FileNotFoundError: 34 | print(f"❌ ERROR: File not found: {pyproject_path}", file=sys.stderr) 35 | sys.exit(1) 36 | except Exception as e: 37 | # Fallback to toml if using the old library or handle other errors 38 | try: 39 | import toml 40 | data = toml.load(pyproject_path) 41 | except FileNotFoundError: 42 | print(f"❌ ERROR: File not found: {pyproject_path}", file=sys.stderr) 43 | sys.exit(1) 44 | except Exception as toml_err: 45 | print(f"❌ ERROR: Failed to parse TOML file: {e}", file=sys.stderr) 46 | sys.exit(1) 47 | 48 | actual_version = data.get('project', {}).get('version') 49 | 50 | if not actual_version: 51 | print("❌ ERROR: No version found in pyproject.toml", file=sys.stderr) 52 | sys.exit(1) 53 | 54 | if actual_version != expected_version: 55 | print("❌ Version mismatch detected!", file=sys.stderr) 56 | print(f" pyproject.toml version: {actual_version}", file=sys.stderr) 57 | print(f" Expected version: {expected_version}", file=sys.stderr) 58 | print("", file=sys.stderr) 59 | print("The version in pyproject.toml must match the version being published.", file=sys.stderr) 60 | print(f"Please update pyproject.toml to version {expected_version} or use the correct tag.", file=sys.stderr) 61 | sys.exit(1) 62 | 63 | print(f"✅ Version consistency check passed: {actual_version}") 64 | sys.exit(0) 65 | 66 | 67 | if __name__ == '__main__': 68 | main() 69 | ``` -------------------------------------------------------------------------------- /libs/kasm/src/ubuntu/install/firefox/custom_startup.sh: -------------------------------------------------------------------------------- ```bash 1 | #!/usr/bin/env bash 2 | set -ex 3 | START_COMMAND="firefox" 4 | PGREP="firefox" 5 | export MAXIMIZE="true" 6 | export MAXIMIZE_NAME="Mozilla Firefox" 7 | MAXIMIZE_SCRIPT=$STARTUPDIR/maximize_window.sh 8 | DEFAULT_ARGS="" 9 | ARGS=${APP_ARGS:-$DEFAULT_ARGS} 10 | 11 | options=$(getopt -o gau: -l go,assign,url: -n "$0" -- "$@") || exit 12 | eval set -- "$options" 13 | 14 | while [[ $1 != -- ]]; do 15 | case $1 in 16 | -g|--go) GO='true'; shift 1;; 17 | -a|--assign) ASSIGN='true'; shift 1;; 18 | -u|--url) OPT_URL=$2; shift 2;; 19 | *) echo "bad option: $1" >&2; exit 1;; 20 | esac 21 | done 22 | shift 23 | 24 | # Process non-option arguments. 25 | for arg; do 26 | echo "arg! $arg" 27 | done 28 | 29 | FORCE=$2 30 | 31 | # run with vgl if GPU is available 32 | if [ -f /opt/VirtualGL/bin/vglrun ] && [ ! -z "${KASM_EGL_CARD}" ] && [ ! -z "${KASM_RENDERD}" ] && [ -O "${KASM_RENDERD}" ] && [ -O "${KASM_EGL_CARD}" ] ; then 33 | START_COMMAND="/opt/VirtualGL/bin/vglrun -d ${KASM_EGL_CARD} $START_COMMAND" 34 | fi 35 | 36 | kasm_exec() { 37 | if [ -n "$OPT_URL" ] ; then 38 | URL=$OPT_URL 39 | elif [ -n "$1" ] ; then 40 | URL=$1 41 | fi 42 | 43 | # Since we are execing into a container that already has the browser running from startup, 44 | # when we don't have a URL to open we want to do nothing. Otherwise a second browser instance would open. 45 | if [ -n "$URL" ] ; then 46 | /usr/bin/filter_ready 47 | /usr/bin/desktop_ready 48 | bash ${MAXIMIZE_SCRIPT} & 49 | $START_COMMAND $ARGS $OPT_URL 50 | else 51 | echo "No URL specified for exec command. Doing nothing." 52 | fi 53 | } 54 | 55 | kasm_startup() { 56 | if [ -n "$KASM_URL" ] ; then 57 | URL=$KASM_URL 58 | elif [ -z "$URL" ] ; then 59 | URL=$LAUNCH_URL 60 | fi 61 | 62 | if [ -z "$DISABLE_CUSTOM_STARTUP" ] || [ -n "$FORCE" ] ; then 63 | 64 | echo "Entering process startup loop" 65 | set +x 66 | while true 67 | do 68 | if ! pgrep -x $PGREP > /dev/null 69 | then 70 | /usr/bin/filter_ready 71 | /usr/bin/desktop_ready 72 | set +e 73 | bash ${MAXIMIZE_SCRIPT} & 74 | $START_COMMAND $ARGS $URL 75 | set -e 76 | fi 77 | sleep 1 78 | done 79 | set -x 80 | 81 | fi 82 | 83 | } 84 | 85 | if [ -n "$GO" ] || [ -n "$ASSIGN" ] ; then 86 | kasm_exec 87 | else 88 | kasm_startup 89 | fi 90 | ``` -------------------------------------------------------------------------------- /examples/cloud_api_examples.py: -------------------------------------------------------------------------------- ```python 1 | import asyncio 2 | import os 3 | from utils import load_dotenv_files 4 | 5 | load_dotenv_files() 6 | 7 | from computer.providers.cloud.provider import CloudProvider 8 | 9 | async def main() -> None: 10 | api_key = os.getenv("CUA_API_KEY") 11 | if not api_key: 12 | raise RuntimeError("CUA_API_KEY environment variable is not set") 13 | api_base = os.getenv("CUA_API_BASE") 14 | if api_base: 15 | print(f"Using API base: {api_base}") 16 | 17 | provider = CloudProvider(api_key=api_key, verbose=True) 18 | async with provider: 19 | 20 | # List all VMs 21 | vms = await provider.list_vms() 22 | print(f"Found {len(vms)} VM(s)") 23 | for vm in vms: 24 | print( 25 | f"name: {vm['name']}\n", 26 | f"status: {vm['status']}\n", # pending, running, stopped, terminated, failed 27 | f"api_url: {vm.get('api_url')}\n", 28 | f"vnc_url: {vm.get('vnc_url')}\n", 29 | ) 30 | 31 | # # --- Additional operations (commented out) --- 32 | # # To stop a VM by name: 33 | # name = "m-linux-96lcxd2c2k" 34 | # resp = await provider.stop_vm(name) 35 | # print( 36 | # "stop_vm response:\n", 37 | # f"name: {resp['name']}\n", 38 | # f"status: {resp['status']}\n", # stopping 39 | # ) 40 | 41 | # # To start a VM by name: 42 | # name = "m-linux-96lcxd2c2k" 43 | # resp = await provider.run_vm(name) 44 | # print( 45 | # "run_vm response:\n", 46 | # f"name: {resp['name']}\n", 47 | # f"status: {resp['status']}\n", # starting 48 | # ) 49 | 50 | # # To restart a VM by name: 51 | # name = "m-linux-96lcxd2c2k" 52 | # resp = await provider.restart_vm(name) 53 | # print( 54 | # "restart_vm response:\n", 55 | # f"name: {resp['name']}\n", 56 | # f"status: {resp['status']}\n", # restarting 57 | # ) 58 | 59 | # # To probe a VM's status via its public hostname (if you know the name): 60 | # name = "m-linux-96lcxd2c2k" 61 | # info = await provider.get_vm(name) 62 | # print("get_vm info:\n", 63 | # f"name: {info['name']}\n", 64 | # f"status: {info['status']}\n", # running 65 | # f"api_url: {info.get('api_url')}\n", 66 | # f"os_type: {info.get('os_type')}\n", 67 | # ) 68 | 69 | if __name__ == "__main__": 70 | asyncio.run(main()) 71 | ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/callbacks/logging.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Logging 3 | description: Agent logging and custom logger implementation 4 | --- 5 | 6 | # Logging Callback 7 | 8 | Built-in logging callback and custom logger creation for agent monitoring. 9 | 10 | ## Callbacks Example 11 | 12 | ```python 13 | from agent.callbacks import LoggingCallback 14 | import logging 15 | 16 | agent = ComputerAgent( 17 | model="anthropic/claude-3-5-sonnet-20241022", 18 | tools=[computer], 19 | callbacks=[ 20 | LoggingCallback( 21 | logger=logging.getLogger("cua"), 22 | level=logging.INFO 23 | ) 24 | ] 25 | ) 26 | ``` 27 | 28 | ## Shorthand 29 | 30 | ```python 31 | agent = ComputerAgent( 32 | model="anthropic/claude-3-5-sonnet-20241022", 33 | tools=[computer], 34 | verbosity=logging.INFO # Auto-adds LoggingCallback 35 | ) 36 | ``` 37 | 38 | ## Custom Logger 39 | 40 | Create custom loggers by extending AsyncCallbackHandler: 41 | 42 | ```python 43 | from agent.callbacks.base import AsyncCallbackHandler 44 | import logging 45 | 46 | class CustomLogger(AsyncCallbackHandler): 47 | def __init__(self, logger_name="agent"): 48 | self.logger = logging.getLogger(logger_name) 49 | self.logger.setLevel(logging.INFO) 50 | 51 | # Add console handler 52 | handler = logging.StreamHandler() 53 | formatter = logging.Formatter( 54 | '%(asctime)s - %(name)s - %(levelname)s - %(message)s' 55 | ) 56 | handler.setFormatter(formatter) 57 | self.logger.addHandler(handler) 58 | 59 | async def on_run_start(self, kwargs, old_items): 60 | self.logger.info(f"Agent run started with model: {kwargs.get('model')}") 61 | 62 | async def on_computer_call_start(self, item): 63 | action = item.get('action', {}) 64 | self.logger.info(f"Computer action: {action.get('type')}") 65 | 66 | async def on_usage(self, usage): 67 | cost = usage.get('response_cost', 0) 68 | self.logger.info(f"API call cost: ${cost:.4f}") 69 | 70 | async def on_run_end(self, kwargs, old_items, new_items): 71 | self.logger.info("Agent run completed") 72 | 73 | # Use custom logger 74 | agent = ComputerAgent( 75 | model="anthropic/claude-3-5-sonnet-20241022", 76 | tools=[computer], 77 | callbacks=[CustomLogger("my_agent")] 78 | ) 79 | ``` 80 | 81 | ## Available Hooks 82 | 83 | Log any agent event using these callback methods: 84 | - `on_run_start/end` - Run lifecycle 85 | - `on_computer_call_start/end` - Computer actions 86 | - `on_api_start/end` - LLM API calls 87 | - `on_usage` - Cost tracking 88 | - `on_screenshot` - Screenshot events 89 | ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/chat-history.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Chat History 3 | description: Managing conversation history and message arrays 4 | --- 5 | 6 | Managing conversation history is essential for multi-turn agent interactions. The agent maintains a messages array that tracks the entire conversation flow. 7 | 8 | ## Managing History 9 | 10 | ### Continuous Conversation 11 | 12 | ```python 13 | history = [] 14 | 15 | while True: 16 | user_input = input("> ") 17 | history.append({"role": "user", "content": user_input}) 18 | 19 | async for result in agent.run(history, stream=False): 20 | history += result["output"] 21 | ``` 22 | 23 | ## Message Array Structure 24 | 25 | The messages array contains different types of messages that represent the conversation state: 26 | 27 | ```python 28 | messages = [ 29 | # user input 30 | { 31 | "role": "user", 32 | "content": "go to trycua on gh" 33 | }, 34 | # first agent turn adds the model output to the history 35 | { 36 | "summary": [ 37 | { 38 | "text": "Searching Firefox for Trycua GitHub", 39 | "type": "summary_text" 40 | } 41 | ], 42 | "type": "reasoning" 43 | }, 44 | { 45 | "action": { 46 | "text": "Trycua GitHub", 47 | "type": "type" 48 | }, 49 | "call_id": "call_QI6OsYkXxl6Ww1KvyJc4LKKq", 50 | "status": "completed", 51 | "type": "computer_call" 52 | }, 53 | # second agent turn adds the computer output to the history 54 | { 55 | "type": "computer_call_output", 56 | "call_id": "call_QI6OsYkXxl6Ww1KvyJc4LKKq", 57 | "output": { 58 | "type": "input_image", 59 | "image_url": "data:image/png;base64,..." 60 | } 61 | }, 62 | # final agent turn adds the agent output text to the history 63 | { 64 | "type": "message", 65 | "role": "assistant", 66 | "content": [ 67 | { 68 | "text": "Success! The Trycua GitHub page has been opened.", 69 | "type": "output_text" 70 | } 71 | ] 72 | } 73 | ] 74 | ``` 75 | 76 | ## Message Types 77 | 78 | See the complete schema in [Message Format](./message-format). 79 | 80 | ### Memory Management 81 | 82 | For long conversations, consider using the `only_n_most_recent_images` parameter to manage memory: 83 | 84 | ```python 85 | agent = ComputerAgent( 86 | model="anthropic/claude-3-5-sonnet-20241022", 87 | tools=[computer], 88 | only_n_most_recent_images=3 89 | ) 90 | ``` 91 | 92 | This automatically removes old images from the conversation history to prevent context window overflow. 93 | ``` -------------------------------------------------------------------------------- /.github/workflows/pypi-publish-computer-server.yml: -------------------------------------------------------------------------------- ```yaml 1 | name: Publish Computer Server Package 2 | 3 | on: 4 | push: 5 | tags: 6 | - "computer-server-v*" 7 | workflow_dispatch: 8 | inputs: 9 | version: 10 | description: "Version to publish (without v prefix)" 11 | required: true 12 | default: "0.1.0" 13 | workflow_call: 14 | inputs: 15 | version: 16 | description: "Version to publish" 17 | required: true 18 | type: string 19 | outputs: 20 | version: 21 | description: "The version that was published" 22 | value: ${{ jobs.prepare.outputs.version }} 23 | 24 | # Adding permissions at workflow level 25 | permissions: 26 | contents: write 27 | 28 | jobs: 29 | prepare: 30 | runs-on: macos-latest 31 | outputs: 32 | version: ${{ steps.get-version.outputs.version }} 33 | steps: 34 | - uses: actions/checkout@v4 35 | 36 | - name: Determine version 37 | id: get-version 38 | run: | 39 | if [ "${{ github.event_name }}" == "push" ]; then 40 | # Extract version from tag (for package-specific tags) 41 | if [[ "${{ github.ref }}" =~ ^refs/tags/computer-server-v([0-9]+\.[0-9]+\.[0-9]+) ]]; then 42 | VERSION=${BASH_REMATCH[1]} 43 | else 44 | echo "Invalid tag format for computer-server" 45 | exit 1 46 | fi 47 | elif [ "${{ github.event_name }}" == "workflow_dispatch" ]; then 48 | # Use version from workflow dispatch 49 | VERSION=${{ github.event.inputs.version }} 50 | else 51 | # Use version from workflow_call 52 | VERSION=${{ inputs.version }} 53 | fi 54 | echo "VERSION=$VERSION" 55 | echo "version=$VERSION" >> $GITHUB_OUTPUT 56 | 57 | - name: Set up Python 58 | uses: actions/setup-python@v4 59 | with: 60 | python-version: "3.10" 61 | 62 | publish: 63 | needs: prepare 64 | uses: ./.github/workflows/pypi-reusable-publish.yml 65 | with: 66 | package_name: "computer-server" 67 | package_dir: "libs/python/computer-server" 68 | version: ${{ needs.prepare.outputs.version }} 69 | is_lume_package: false 70 | base_package_name: "cua-computer-server" 71 | secrets: 72 | PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} 73 | 74 | set-env-variables: 75 | needs: [prepare, publish] 76 | runs-on: macos-latest 77 | steps: 78 | - name: Set environment variables for use in other jobs 79 | run: | 80 | echo "COMPUTER_VERSION=${{ needs.prepare.outputs.version }}" >> $GITHUB_ENV 81 | ``` -------------------------------------------------------------------------------- /libs/lume/src/VNC/PassphraseGenerator.swift: -------------------------------------------------------------------------------- ```swift 1 | import Foundation 2 | import CryptoKit 3 | 4 | final class PassphraseGenerator { 5 | private let words: [String] 6 | 7 | init(words: [String] = PassphraseGenerator.defaultWords) { 8 | self.words = words 9 | } 10 | 11 | func prefix(_ count: Int) -> [String] { 12 | guard count > 0 else { return [] } 13 | 14 | // Use secure random number generation 15 | var result: [String] = [] 16 | for _ in 0..<count { 17 | let randomBytes = (0..<4).map { _ in UInt8.random(in: 0...255) } 18 | let randomNumber = Data(randomBytes).withUnsafeBytes { bytes in 19 | bytes.load(as: UInt32.self) 20 | } 21 | let index = Int(randomNumber % UInt32(words.count)) 22 | result.append(words[index]) 23 | } 24 | return result 25 | } 26 | 27 | // A much larger set of common, easy-to-type words 28 | private static let defaultWords = [ 29 | "alpha", "bravo", "charlie", "delta", "echo", "foxtrot", "golf", "hotel", 30 | "india", "juliet", "kilo", "lima", "mike", "november", "oscar", "papa", 31 | "quebec", "romeo", "sierra", "tango", "uniform", "victor", "whiskey", "xray", 32 | "yankee", "zulu", "zero", "one", "two", "three", "four", "five", 33 | "six", "seven", "eight", "nine", "apple", "banana", "cherry", "date", 34 | "elder", "fig", "grape", "honey", "iris", "jade", "kiwi", "lemon", 35 | "mango", "nectarine", "orange", "peach", "quince", "raspberry", "strawberry", "tangerine", 36 | "red", "blue", "green", "yellow", "purple", "orange", "pink", "brown", 37 | "black", "white", "gray", "silver", "gold", "copper", "bronze", "steel", 38 | "north", "south", "east", "west", "spring", "summer", "autumn", "winter", 39 | "river", "ocean", "mountain", "valley", "forest", "desert", "island", "beach", 40 | "sun", "moon", "star", "cloud", "rain", "snow", "wind", "storm", 41 | "happy", "brave", "calm", "swift", "wise", "kind", "bold", "free", 42 | "safe", "strong", "bright", "clear", "light", "soft", "warm", "cool", 43 | "eagle", "falcon", "hawk", "owl", "robin", "sparrow", "swan", "dove", 44 | "tiger", "lion", "bear", "wolf", "deer", "horse", "dolphin", "whale", 45 | "maple", "oak", "pine", "birch", "cedar", "fir", "palm", "willow", 46 | "rose", "lily", "daisy", "tulip", "lotus", "orchid", "violet", "jasmine" 47 | ] 48 | } ``` -------------------------------------------------------------------------------- /docs/content/docs/agent-sdk/benchmarks/introduction.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Introduction 3 | description: Overview of benchmarking in the c/ua agent framework 4 | --- 5 | 6 | The c/ua agent framework uses benchmarks to test the performance of supported models and providers at various agentic tasks. 7 | 8 | ## Benchmark Types 9 | 10 | Computer-Agent benchmarks evaluate two key capabilities: 11 | - **Plan Generation**: Breaking down complex tasks into a sequence of actions 12 | - **Coordinate Generation**: Predicting precise click locations on GUI elements 13 | 14 | ## Using State-of-the-Art Models 15 | 16 | Let's see how to use the SOTA vision-language models in the c/ua agent framework. 17 | 18 | ### Plan Generation + Coordinate Generation 19 | 20 | **[OS-World](https://os-world.github.io/)** - Benchmark for complete computer-use agents 21 | 22 | This leaderboard tests models that can understand instructions and automatically perform the full sequence of actions needed to complete tasks. 23 | 24 | ```python 25 | # UI-TARS-1.5 is a SOTA unified plan generation + coordinate generation VLM 26 | # This makes it suitable for agentic loops for computer-use 27 | agent = ComputerAgent("huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B", tools=[computer]) 28 | agent.run("Open Firefox and go to github.com") 29 | # Success! 🎉 30 | ``` 31 | 32 | ### Coordinate Generation Only 33 | 34 | **[GUI Agent Grounding Leaderboard](https://gui-agent.github.io/grounding-leaderboard/)** - Benchmark for click prediction accuracy 35 | 36 | This leaderboard tests models that specialize in finding exactly where to click on screen elements, but needs to be told what specific action to take. 37 | 38 | ```python 39 | # GTA1-7B is a SOTA coordinate generation VLM 40 | # It can only generate coordinates, not plan: 41 | agent = ComputerAgent("huggingface-local/HelloKKMe/GTA1-7B", tools=[computer]) 42 | agent.predict_click("find the button to open the settings") # (27, 450) 43 | # This will raise an error: 44 | # agent.run("Open Firefox and go to github.com") 45 | ``` 46 | 47 | ### Composed Agent 48 | 49 | The c/ua agent framework also supports composed agents, which combine a planning model with a clicking model for the best of both worlds. Any liteLLM model can be used as the plan generation model. 50 | 51 | ```python 52 | # It can be paired with any LLM to form a composed agent: 53 | # "gemini/gemini-1.5-pro" will be used as the plan generation LLM 54 | agent = ComputerAgent("huggingface-local/HelloKKMe/GTA1-7B+gemini/gemini-1.5-pro", tools=[computer]) 55 | agent.run("Open Firefox and go to github.com") 56 | # Success! 🎉 57 | ``` 58 | ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/loops/base.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Base protocol for async agent configurations 3 | """ 4 | 5 | from typing import Protocol, List, Dict, Any, Optional, Tuple, Union 6 | from abc import abstractmethod 7 | from ..types import AgentCapability 8 | 9 | class AsyncAgentConfig(Protocol): 10 | """Protocol defining the interface for async agent configurations.""" 11 | 12 | @abstractmethod 13 | async def predict_step( 14 | self, 15 | messages: List[Dict[str, Any]], 16 | model: str, 17 | tools: Optional[List[Dict[str, Any]]] = None, 18 | max_retries: Optional[int] = None, 19 | stream: bool = False, 20 | computer_handler=None, 21 | _on_api_start=None, 22 | _on_api_end=None, 23 | _on_usage=None, 24 | _on_screenshot=None, 25 | **kwargs 26 | ) -> Dict[str, Any]: 27 | """ 28 | Predict the next step based on input items. 29 | 30 | Args: 31 | messages: Input items following Responses format (message, function_call, computer_call) 32 | model: Model name to use 33 | tools: Optional list of tool schemas 34 | max_retries: Maximum number of retries for failed API calls 35 | stream: Whether to stream responses 36 | computer_handler: Computer handler instance 37 | _on_api_start: Callback for API start 38 | _on_api_end: Callback for API end 39 | _on_usage: Callback for usage tracking 40 | _on_screenshot: Callback for screenshot events 41 | **kwargs: Additional arguments 42 | 43 | Returns: 44 | Dictionary with "output" (output items) and "usage" array 45 | """ 46 | ... 47 | 48 | @abstractmethod 49 | async def predict_click( 50 | self, 51 | model: str, 52 | image_b64: str, 53 | instruction: str 54 | ) -> Optional[Tuple[int, int]]: 55 | """ 56 | Predict click coordinates based on image and instruction. 57 | 58 | Args: 59 | model: Model name to use 60 | image_b64: Base64 encoded image 61 | instruction: Instruction for where to click 62 | 63 | Returns: 64 | None or tuple with (x, y) coordinates 65 | """ 66 | ... 67 | 68 | @abstractmethod 69 | def get_capabilities(self) -> List[AgentCapability]: 70 | """ 71 | Get list of capabilities supported by this agent config. 72 | 73 | Returns: 74 | List of capability strings (e.g., ["step", "click"]) 75 | """ 76 | ... 77 | ``` -------------------------------------------------------------------------------- /libs/typescript/computer/src/computer/providers/cloud.ts: -------------------------------------------------------------------------------- ```typescript 1 | import pino from 'pino'; 2 | import { 3 | type BaseComputerInterface, 4 | InterfaceFactory, 5 | } from '../../interface/index'; 6 | import type { CloudComputerConfig, VMProviderType } from '../types'; 7 | import { BaseComputer } from './base'; 8 | 9 | /** 10 | * Cloud-specific computer implementation 11 | */ 12 | export class CloudComputer extends BaseComputer { 13 | protected static vmProviderType: VMProviderType.CLOUD; 14 | protected apiKey: string; 15 | private iface?: BaseComputerInterface; 16 | private initialized = false; 17 | 18 | protected logger = pino({ name: 'computer.provider_cloud' }); 19 | 20 | constructor(config: CloudComputerConfig) { 21 | super(config); 22 | this.apiKey = config.apiKey; 23 | } 24 | 25 | get ip() { 26 | return `${this.name}.containers.cloud.trycua.com`; 27 | } 28 | 29 | /** 30 | * Initialize the cloud VM and interface 31 | */ 32 | async run(): Promise<void> { 33 | if (this.initialized) { 34 | this.logger.info('Computer already initialized, skipping initialization'); 35 | return; 36 | } 37 | 38 | try { 39 | // For cloud provider, the VM is already running, we just need to connect 40 | const ipAddress = this.ip; 41 | this.logger.info(`Connecting to cloud VM at ${ipAddress}`); 42 | 43 | // Create the interface with API key authentication 44 | this.iface = InterfaceFactory.createInterfaceForOS( 45 | this.osType, 46 | ipAddress, 47 | this.apiKey, 48 | this.name 49 | ); 50 | 51 | // Wait for the interface to be ready 52 | this.logger.info('Waiting for interface to be ready...'); 53 | await this.iface.waitForReady(); 54 | 55 | this.initialized = true; 56 | this.logger.info('Cloud computer ready'); 57 | } catch (error) { 58 | this.logger.error(`Failed to initialize cloud computer: ${error}`); 59 | throw new Error(`Failed to initialize cloud computer: ${error}`); 60 | } 61 | } 62 | 63 | /** 64 | * Stop the cloud computer (disconnect interface) 65 | */ 66 | async stop(): Promise<void> { 67 | this.logger.info('Disconnecting from cloud computer...'); 68 | 69 | if (this.iface) { 70 | this.iface.disconnect(); 71 | this.iface = undefined; 72 | } 73 | 74 | this.initialized = false; 75 | this.logger.info('Disconnected from cloud computer'); 76 | } 77 | 78 | /** 79 | * Get the computer interface 80 | */ 81 | get interface(): BaseComputerInterface { 82 | if (!this.iface) { 83 | throw new Error('Computer not initialized. Call run() first.'); 84 | } 85 | return this.iface; 86 | } 87 | 88 | /** 89 | * Disconnect from the cloud computer 90 | */ 91 | async disconnect(): Promise<void> { 92 | await this.stop(); 93 | } 94 | } 95 | ``` -------------------------------------------------------------------------------- /libs/lume/src/Commands/Push.swift: -------------------------------------------------------------------------------- ```swift 1 | import ArgumentParser 2 | import Foundation 3 | 4 | struct Push: AsyncParsableCommand { 5 | static let configuration = CommandConfiguration( 6 | abstract: "Push a macOS VM to GitHub Container Registry" 7 | ) 8 | 9 | @Argument(help: "Name of the VM to push") 10 | var name: String 11 | 12 | @Argument(help: "Image tag to push (format: name:tag)") 13 | var image: String 14 | 15 | @Option(parsing: .upToNextOption, help: "Additional tags to push the same image to") 16 | var additionalTags: [String] = [] 17 | 18 | @Option(help: "Github Container Registry to push to. Defaults to ghcr.io") 19 | var registry: String = "ghcr.io" 20 | 21 | @Option(help: "Organization to push to. Defaults to trycua") 22 | var organization: String = "trycua" 23 | 24 | @Option(name: .customLong("storage"), help: "VM storage location to use") 25 | var storage: String? 26 | 27 | @Option(help: "Chunk size for large files in MB. Defaults to 512.") 28 | var chunkSizeMb: Int = 512 29 | 30 | @Flag(name: .long, help: "Enable verbose logging") 31 | var verbose: Bool = false 32 | 33 | @Flag(name: .long, help: "Prepare files without uploading to registry") 34 | var dryRun: Bool = false 35 | 36 | @Flag(name: .long, help: "In dry-run mode, also reassemble chunks to verify integrity") 37 | var reassemble: Bool = true 38 | 39 | init() {} 40 | 41 | @MainActor 42 | func run() async throws { 43 | let controller = LumeController() 44 | 45 | // Parse primary image name and tag 46 | let components = image.split(separator: ":") 47 | guard components.count == 2, let primaryTag = components.last else { 48 | throw ValidationError("Invalid primary image format. Expected format: name:tag") 49 | } 50 | let imageName = String(components.first!) 51 | 52 | // Combine primary and additional tags, ensuring uniqueness 53 | var allTags: Swift.Set<String> = [] 54 | allTags.insert(String(primaryTag)) 55 | allTags.formUnion(additionalTags) 56 | 57 | guard !allTags.isEmpty else { 58 | throw ValidationError("At least one tag must be provided.") 59 | } 60 | 61 | try await controller.pushImage( 62 | name: name, 63 | imageName: imageName, // Pass base image name 64 | tags: Array(allTags), // Pass array of all unique tags 65 | registry: registry, 66 | organization: organization, 67 | storage: storage, 68 | chunkSizeMb: chunkSizeMb, 69 | verbose: verbose, 70 | dryRun: dryRun, 71 | reassemble: reassemble 72 | ) 73 | } 74 | } ``` -------------------------------------------------------------------------------- /.github/workflows/pypi-publish-pylume.yml: -------------------------------------------------------------------------------- ```yaml 1 | name: Publish Pylume Package 2 | 3 | on: 4 | push: 5 | tags: 6 | - "pylume-v*" 7 | workflow_dispatch: 8 | inputs: 9 | version: 10 | description: "Version to publish (without v prefix)" 11 | required: true 12 | default: "0.1.0" 13 | workflow_call: 14 | inputs: 15 | version: 16 | description: "Version to publish" 17 | required: true 18 | type: string 19 | outputs: 20 | version: 21 | description: "The version that was published" 22 | value: ${{ jobs.determine-version.outputs.version }} 23 | 24 | # Adding permissions at workflow level 25 | permissions: 26 | contents: write 27 | 28 | jobs: 29 | determine-version: 30 | runs-on: macos-latest 31 | outputs: 32 | version: ${{ steps.get-version.outputs.version }} 33 | steps: 34 | - uses: actions/checkout@v4 35 | 36 | - name: Determine version 37 | id: get-version 38 | run: | 39 | if [ "${{ github.event_name }}" == "push" ]; then 40 | # Extract version from tag (for package-specific tags) 41 | if [[ "${{ github.ref }}" =~ ^refs/tags/pylume-v([0-9]+\.[0-9]+\.[0-9]+) ]]; then 42 | VERSION=${BASH_REMATCH[1]} 43 | else 44 | echo "Invalid tag format for pylume" 45 | exit 1 46 | fi 47 | elif [ "${{ github.event_name }}" == "workflow_dispatch" ]; then 48 | # Use version from workflow dispatch 49 | VERSION=${{ github.event.inputs.version }} 50 | else 51 | # Use version from workflow_call 52 | VERSION=${{ inputs.version }} 53 | fi 54 | echo "VERSION=$VERSION" 55 | echo "version=$VERSION" >> $GITHUB_OUTPUT 56 | 57 | validate-version: 58 | runs-on: macos-latest 59 | needs: determine-version 60 | steps: 61 | - uses: actions/checkout@v4 62 | - name: Validate version 63 | id: validate-version 64 | run: | 65 | CODE_VERSION=$(grep '__version__' libs/python/pylume/pylume/__init__.py | cut -d'"' -f2) 66 | if [ "${{ needs.determine-version.outputs.version }}" != "$CODE_VERSION" ]; then 67 | echo "Version mismatch: expected $CODE_VERSION, got ${{ needs.determine-version.outputs.version }}" 68 | exit 1 69 | fi 70 | echo "Version validated: $CODE_VERSION" 71 | 72 | publish: 73 | needs: determine-version 74 | uses: ./.github/workflows/pypi-reusable-publish.yml 75 | with: 76 | package_name: "pylume" 77 | package_dir: "libs/python/pylume" 78 | version: ${{ needs.determine-version.outputs.version }} 79 | is_lume_package: true 80 | base_package_name: "pylume" 81 | secrets: 82 | PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} 83 | ``` -------------------------------------------------------------------------------- /libs/lume/src/FileSystem/VMLocation.swift: -------------------------------------------------------------------------------- ```swift 1 | import Foundation 2 | 3 | /// Represents a location where VMs can be stored 4 | struct VMLocation: Codable, Equatable, Sendable { 5 | let name: String 6 | let path: String 7 | 8 | var expandedPath: String { 9 | (path as NSString).expandingTildeInPath 10 | } 11 | 12 | /// Validates the location path exists and is writable 13 | func validate() throws { 14 | let fullPath = expandedPath 15 | var isDir: ObjCBool = false 16 | 17 | if FileManager.default.fileExists(atPath: fullPath, isDirectory: &isDir) { 18 | if !isDir.boolValue { 19 | throw VMLocationError.notADirectory(path: fullPath) 20 | } 21 | 22 | if !FileManager.default.isWritableFile(atPath: fullPath) { 23 | throw VMLocationError.directoryNotWritable(path: fullPath) 24 | } 25 | } else { 26 | // Try to create the directory 27 | do { 28 | try FileManager.default.createDirectory( 29 | atPath: fullPath, 30 | withIntermediateDirectories: true 31 | ) 32 | } catch { 33 | throw VMLocationError.directoryCreationFailed(path: fullPath, error: error) 34 | } 35 | } 36 | } 37 | } 38 | 39 | // MARK: - Errors 40 | 41 | enum VMLocationError: Error, LocalizedError { 42 | case notADirectory(path: String) 43 | case directoryNotWritable(path: String) 44 | case directoryCreationFailed(path: String, error: Error) 45 | case locationNotFound(name: String) 46 | case duplicateLocationName(name: String) 47 | case invalidLocationName(name: String) 48 | case defaultLocationCannotBeRemoved(name: String) 49 | 50 | var errorDescription: String? { 51 | switch self { 52 | case .notADirectory(let path): 53 | return "Path is not a directory: \(path)" 54 | case .directoryNotWritable(let path): 55 | return "Directory is not writable: \(path)" 56 | case .directoryCreationFailed(let path, let error): 57 | return "Failed to create directory at \(path): \(error.localizedDescription)" 58 | case .locationNotFound(let name): 59 | return "VM location not found: \(name)" 60 | case .duplicateLocationName(let name): 61 | return "VM location with name '\(name)' already exists" 62 | case .invalidLocationName(let name): 63 | return 64 | "Invalid location name: \(name). Names should be alphanumeric with underscores or dashes." 65 | case .defaultLocationCannotBeRemoved(let name): 66 | return "Cannot remove the default location '\(name)'. Set a new default location first." 67 | } 68 | } 69 | } 70 | ``` -------------------------------------------------------------------------------- /docs/content/docs/computer-sdk/computer-ui.mdx: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | title: Computer UI 3 | --- 4 | 5 | The computer module includes a Gradio UI for creating and sharing demonstration data. We make it easy for people to build community datasets for better computer use models with an upload to Huggingface feature. 6 | 7 | ```bash 8 | # Install with UI support 9 | pip install "cua-computer[ui]" 10 | ``` 11 | 12 | <Callout title="Note"> 13 | For precise control of the computer, we recommend using VNC or Screen Sharing instead of the Computer Gradio UI. 14 | </Callout> 15 | 16 | ### Building and Sharing Demonstrations with Huggingface 17 | 18 | Follow these steps to contribute your own demonstrations: 19 | 20 | #### 1. Set up Huggingface Access 21 | 22 | Set your HF_TOKEN in a .env file or in your environment variables: 23 | 24 | ```bash 25 | # In .env file 26 | HF_TOKEN=your_huggingface_token 27 | ``` 28 | 29 | #### 2. Launch the Computer UI 30 | 31 | ```python 32 | # launch_ui.py 33 | from computer.ui.gradio.app import create_gradio_ui 34 | from dotenv import load_dotenv 35 | load_dotenv('.env') 36 | 37 | app = create_gradio_ui() 38 | app.launch(share=False) 39 | ``` 40 | 41 | For examples, see [Computer UI Examples](https://github.com/trycua/cua/tree/main/examples/computer_ui_examples.py) 42 | 43 | #### 3. Record Your Tasks 44 | 45 | <details open> 46 | <summary>View demonstration video</summary> 47 | <video src="https://github.com/user-attachments/assets/de3c3477-62fe-413c-998d-4063e48de176" controls width="600"></video> 48 | </details> 49 | 50 | Record yourself performing various computer tasks using the UI. 51 | 52 | #### 4. Save Your Demonstrations 53 | 54 | <details open> 55 | <summary>View demonstration video</summary> 56 | <video src="https://github.com/user-attachments/assets/5ad1df37-026a-457f-8b49-922ae805faef" controls width="600"></video> 57 | </details> 58 | 59 | Save each task by picking a descriptive name and adding relevant tags (e.g., "office", "web-browsing", "coding"). 60 | 61 | #### 5. Record Additional Demonstrations 62 | 63 | Repeat steps 3 and 4 until you have a good amount of demonstrations covering different tasks and scenarios. 64 | 65 | #### 6. Upload to Huggingface 66 | 67 | <details open> 68 | <summary>View demonstration video</summary> 69 | <video src="https://github.com/user-attachments/assets/c586d460-3877-4b5f-a736-3248886d2134" controls width="600"></video> 70 | </details> 71 | 72 | Upload your dataset to Huggingface by: 73 | - Naming it as `{your_username}/{dataset_name}` 74 | - Choosing public or private visibility 75 | - Optionally selecting specific tags to upload only tasks with certain tags 76 | 77 | #### Examples and Resources 78 | 79 | - Example Dataset: [ddupont/test-dataset](https://huggingface.co/datasets/ddupont/test-dataset) 80 | - Find Community Datasets: 🔍 [Browse CUA Datasets on Huggingface](https://huggingface.co/datasets?other=cua) ``` -------------------------------------------------------------------------------- /libs/xfce/src/xfce-config/xfce4-session.xml: -------------------------------------------------------------------------------- ``` 1 | <?xml version="1.0" encoding="UTF-8"?> 2 | <channel name="xfce4-session" version="1.0"> 3 | <property name="general" type="empty"> 4 | <property name="FailsafeSessionName" type="string" value="Failsafe"/> 5 | <property name="SessionName" type="string" value="Default"/> 6 | <property name="SaveOnExit" type="bool" value="false"/> 7 | </property> 8 | <property name="sessions" type="empty"> 9 | <property name="Failsafe" type="empty"> 10 | <property name="IsFailsafe" type="bool" value="true"/> 11 | <property name="Count" type="int" value="5"/> 12 | <property name="Client0_Command" type="array"> 13 | <value type="string" value="xfwm4"/> 14 | </property> 15 | <property name="Client0_Priority" type="int" value="15"/> 16 | <property name="Client0_PerScreen" type="bool" value="false"/> 17 | <property name="Client1_Command" type="array"> 18 | <value type="string" value="xfce4-panel"/> 19 | </property> 20 | <property name="Client1_Priority" type="int" value="25"/> 21 | <property name="Client1_PerScreen" type="bool" value="false"/> 22 | <property name="Client2_Command" type="array"> 23 | <value type="string" value="xfdesktop"/> 24 | </property> 25 | <property name="Client2_Priority" type="int" value="35"/> 26 | <property name="Client2_PerScreen" type="bool" value="false"/> 27 | <property name="Client3_Command" type="array"> 28 | <value type="string" value="xfsettingsd"/> 29 | </property> 30 | <property name="Client3_Priority" type="int" value="10"/> 31 | <property name="Client3_PerScreen" type="bool" value="false"/> 32 | <property name="Client4_Command" type="array"> 33 | <value type="string" value="xfce4-notifyd"/> 34 | </property> 35 | <property name="Client4_Priority" type="int" value="20"/> 36 | <property name="Client4_PerScreen" type="bool" value="false"/> 37 | </property> 38 | </property> 39 | <property name="splash" type="empty"> 40 | <property name="Engine" type="string" value=""/> 41 | </property> 42 | <property name="compat" type="empty"> 43 | <property name="LaunchGNOME" type="bool" value="false"/> 44 | </property> 45 | <property name="shutdown" type="empty"> 46 | <property name="ShowSuspend" type="bool" value="false"/> 47 | <property name="ShowHibernate" type="bool" value="false"/> 48 | <property name="ShowHybridSleep" type="bool" value="false"/> 49 | <property name="ShowSwitchUser" type="bool" value="false"/> 50 | </property> 51 | <property name="screensaver" type="empty"> 52 | <property name="enabled" type="bool" value="false"/> 53 | <property name="lock-enabled" type="bool" value="false"/> 54 | </property> 55 | </channel> 56 | ``` -------------------------------------------------------------------------------- /examples/pylume_examples.py: -------------------------------------------------------------------------------- ```python 1 | import asyncio 2 | from pylume import PyLume, ImageRef, VMRunOpts, SharedDirectory, VMConfig, VMUpdateOpts 3 | 4 | 5 | async def main(): 6 | """Example usage of PyLume.""" 7 | async with PyLume(port=7777, use_existing_server=False, debug=True) as pylume: 8 | 9 | # Get latest IPSW URL 10 | print("\n=== Getting Latest IPSW URL ===") 11 | url = await pylume.get_latest_ipsw_url() 12 | print("Latest IPSW URL:", url) 13 | 14 | # Create a new VM 15 | print("\n=== Creating a new VM ===") 16 | vm_config = VMConfig( 17 | name="lume-vm-new", 18 | os="macOS", 19 | cpu=2, 20 | memory="4GB", 21 | disk_size="64GB", # type: ignore 22 | display="1024x768", 23 | ipsw="latest", 24 | ) 25 | await pylume.create_vm(vm_config) 26 | 27 | # Get latest IPSW URL 28 | print("\n=== Getting Latest IPSW URL ===") 29 | url = await pylume.get_latest_ipsw_url() 30 | print("Latest IPSW URL:", url) 31 | 32 | # List available images 33 | print("\n=== Listing Available Images ===") 34 | images = await pylume.get_images() 35 | print("Available Images:", images) 36 | 37 | # List all VMs to verify creation 38 | print("\n=== Listing All VMs ===") 39 | vms = await pylume.list_vms() 40 | print("VMs:", vms) 41 | 42 | # Get specific VM details 43 | print("\n=== Getting VM Details ===") 44 | vm = await pylume.get_vm("lume-vm") 45 | print("VM Details:", vm) 46 | 47 | # Update VM settings 48 | print("\n=== Updating VM Settings ===") 49 | update_opts = VMUpdateOpts(cpu=8, memory="4GB") 50 | await pylume.update_vm("lume-vm", update_opts) 51 | 52 | # Pull an image 53 | image_ref = ImageRef( 54 | image="macos-sequoia-vanilla", tag="latest", registry="ghcr.io", organization="trycua" 55 | ) 56 | await pylume.pull_image(image_ref, name="lume-vm-pulled") 57 | 58 | # Run with shared directory 59 | run_opts = VMRunOpts( 60 | no_display=False, # type: ignore 61 | shared_directories=[ # type: ignore 62 | SharedDirectory(host_path="~/shared", read_only=False) # type: ignore 63 | ], 64 | ) 65 | await pylume.run_vm("lume-vm", run_opts) 66 | 67 | # Or simpler: 68 | await pylume.run_vm("lume-vm") 69 | 70 | # Clone VM 71 | print("\n=== Cloning VM ===") 72 | await pylume.clone_vm("lume-vm", "lume-vm-cloned") 73 | 74 | # Stop VM 75 | print("\n=== Stopping VM ===") 76 | await pylume.stop_vm("lume-vm") 77 | 78 | # Delete VM 79 | print("\n=== Deleting VM ===") 80 | await pylume.delete_vm("lume-vm-cloned") 81 | 82 | 83 | if __name__ == "__main__": 84 | asyncio.run(main()) 85 | ```