This is page 12 of 21. Use http://codebase.md/trycua/cua?lines=true&page={x} to view the full context. # Directory Structure ``` ├── .all-contributorsrc ├── .cursorignore ├── .devcontainer │ ├── devcontainer.json │ ├── post-install.sh │ └── README.md ├── .dockerignore ├── .gitattributes ├── .github │ ├── FUNDING.yml │ ├── scripts │ │ ├── get_pyproject_version.py │ │ └── tests │ │ ├── __init__.py │ │ ├── README.md │ │ └── test_get_pyproject_version.py │ └── workflows │ ├── ci-lume.yml │ ├── docker-publish-kasm.yml │ ├── docker-publish-xfce.yml │ ├── docker-reusable-publish.yml │ ├── npm-publish-computer.yml │ ├── npm-publish-core.yml │ ├── publish-lume.yml │ ├── pypi-publish-agent.yml │ ├── pypi-publish-computer-server.yml │ ├── pypi-publish-computer.yml │ ├── pypi-publish-core.yml │ ├── pypi-publish-mcp-server.yml │ ├── pypi-publish-pylume.yml │ ├── pypi-publish-som.yml │ ├── pypi-reusable-publish.yml │ └── test-validation-script.yml ├── .gitignore ├── .vscode │ ├── docs.code-workspace │ ├── launch.json │ ├── libs-ts.code-workspace │ ├── lume.code-workspace │ ├── lumier.code-workspace │ ├── py.code-workspace │ └── settings.json ├── blog │ ├── app-use.md │ ├── assets │ │ ├── composite-agents.png │ │ ├── docker-ubuntu-support.png │ │ ├── hack-booth.png │ │ ├── hack-closing-ceremony.jpg │ │ ├── hack-cua-ollama-hud.jpeg │ │ ├── hack-leaderboard.png │ │ ├── hack-the-north.png │ │ ├── hack-winners.jpeg │ │ ├── hack-workshop.jpeg │ │ ├── hud-agent-evals.png │ │ └── trajectory-viewer.jpeg │ ├── bringing-computer-use-to-the-web.md │ ├── build-your-own-operator-on-macos-1.md │ ├── build-your-own-operator-on-macos-2.md │ ├── composite-agents.md │ ├── cua-hackathon.md │ ├── hack-the-north.md │ ├── hud-agent-evals.md │ ├── human-in-the-loop.md │ ├── introducing-cua-cloud-containers.md │ ├── lume-to-containerization.md │ ├── sandboxed-python-execution.md │ ├── training-computer-use-models-trajectories-1.md │ ├── trajectory-viewer.md │ ├── ubuntu-docker-support.md │ └── windows-sandbox.md ├── CONTRIBUTING.md ├── Development.md ├── Dockerfile ├── docs │ ├── .gitignore │ ├── .prettierrc │ ├── content │ │ └── docs │ │ ├── agent-sdk │ │ │ ├── agent-loops.mdx │ │ │ ├── benchmarks │ │ │ │ ├── index.mdx │ │ │ │ ├── interactive.mdx │ │ │ │ ├── introduction.mdx │ │ │ │ ├── meta.json │ │ │ │ ├── osworld-verified.mdx │ │ │ │ ├── screenspot-pro.mdx │ │ │ │ └── screenspot-v2.mdx │ │ │ ├── callbacks │ │ │ │ ├── agent-lifecycle.mdx │ │ │ │ ├── cost-saving.mdx │ │ │ │ ├── index.mdx │ │ │ │ ├── logging.mdx │ │ │ │ ├── meta.json │ │ │ │ ├── pii-anonymization.mdx │ │ │ │ └── trajectories.mdx │ │ │ ├── chat-history.mdx │ │ │ ├── custom-computer-handlers.mdx │ │ │ ├── custom-tools.mdx │ │ │ ├── customizing-computeragent.mdx │ │ │ ├── integrations │ │ │ │ ├── hud.mdx │ │ │ │ └── meta.json │ │ │ ├── message-format.mdx │ │ │ ├── meta.json │ │ │ ├── migration-guide.mdx │ │ │ ├── prompt-caching.mdx │ │ │ ├── supported-agents │ │ │ │ ├── composed-agents.mdx │ │ │ │ ├── computer-use-agents.mdx │ │ │ │ ├── grounding-models.mdx │ │ │ │ ├── human-in-the-loop.mdx │ │ │ │ └── meta.json │ │ │ ├── supported-model-providers │ │ │ │ ├── index.mdx │ │ │ │ └── local-models.mdx │ │ │ └── usage-tracking.mdx │ │ ├── computer-sdk │ │ │ ├── cloud-vm-management.mdx │ │ │ ├── commands.mdx │ │ │ ├── computer-ui.mdx │ │ │ ├── computers.mdx │ │ │ ├── meta.json │ │ │ └── sandboxed-python.mdx │ │ ├── index.mdx │ │ ├── libraries │ │ │ ├── agent │ │ │ │ └── index.mdx │ │ │ ├── computer │ │ │ │ └── index.mdx │ │ │ ├── computer-server │ │ │ │ ├── Commands.mdx │ │ │ │ ├── index.mdx │ │ │ │ ├── REST-API.mdx │ │ │ │ └── WebSocket-API.mdx │ │ │ ├── core │ │ │ │ └── index.mdx │ │ │ ├── lume │ │ │ │ ├── cli-reference.mdx │ │ │ │ ├── faq.md │ │ │ │ ├── http-api.mdx │ │ │ │ ├── index.mdx │ │ │ │ ├── installation.mdx │ │ │ │ ├── meta.json │ │ │ │ └── prebuilt-images.mdx │ │ │ ├── lumier │ │ │ │ ├── building-lumier.mdx │ │ │ │ ├── docker-compose.mdx │ │ │ │ ├── docker.mdx │ │ │ │ ├── index.mdx │ │ │ │ ├── installation.mdx │ │ │ │ └── meta.json │ │ │ ├── mcp-server │ │ │ │ ├── client-integrations.mdx │ │ │ │ ├── configuration.mdx │ │ │ │ ├── index.mdx │ │ │ │ ├── installation.mdx │ │ │ │ ├── llm-integrations.mdx │ │ │ │ ├── meta.json │ │ │ │ ├── tools.mdx │ │ │ │ └── usage.mdx │ │ │ └── som │ │ │ ├── configuration.mdx │ │ │ └── index.mdx │ │ ├── meta.json │ │ ├── quickstart-cli.mdx │ │ ├── quickstart-devs.mdx │ │ └── telemetry.mdx │ ├── next.config.mjs │ ├── package-lock.json │ ├── package.json │ ├── pnpm-lock.yaml │ ├── postcss.config.mjs │ ├── public │ │ └── img │ │ ├── agent_gradio_ui.png │ │ ├── agent.png │ │ ├── cli.png │ │ ├── computer.png │ │ ├── som_box_threshold.png │ │ └── som_iou_threshold.png │ ├── README.md │ ├── source.config.ts │ ├── src │ │ ├── app │ │ │ ├── (home) │ │ │ │ ├── [[...slug]] │ │ │ │ │ └── page.tsx │ │ │ │ └── layout.tsx │ │ │ ├── api │ │ │ │ └── search │ │ │ │ └── route.ts │ │ │ ├── favicon.ico │ │ │ ├── global.css │ │ │ ├── layout.config.tsx │ │ │ ├── layout.tsx │ │ │ ├── llms.mdx │ │ │ │ └── [[...slug]] │ │ │ │ └── route.ts │ │ │ └── llms.txt │ │ │ └── route.ts │ │ ├── assets │ │ │ ├── discord-black.svg │ │ │ ├── discord-white.svg │ │ │ ├── logo-black.svg │ │ │ └── logo-white.svg │ │ ├── components │ │ │ ├── iou.tsx │ │ │ └── mermaid.tsx │ │ ├── lib │ │ │ ├── llms.ts │ │ │ └── source.ts │ │ └── mdx-components.tsx │ └── tsconfig.json ├── examples │ ├── agent_examples.py │ ├── agent_ui_examples.py │ ├── cloud_api_examples.py │ ├── computer_examples_windows.py │ ├── computer_examples.py │ ├── computer_ui_examples.py │ ├── computer-example-ts │ │ ├── .env.example │ │ ├── .gitignore │ │ ├── .prettierrc │ │ ├── package-lock.json │ │ ├── package.json │ │ ├── pnpm-lock.yaml │ │ ├── README.md │ │ ├── src │ │ │ ├── helpers.ts │ │ │ └── index.ts │ │ └── tsconfig.json │ ├── docker_examples.py │ ├── evals │ │ ├── hud_eval_examples.py │ │ └── wikipedia_most_linked.txt │ ├── pylume_examples.py │ ├── sandboxed_functions_examples.py │ ├── som_examples.py │ ├── utils.py │ └── winsandbox_example.py ├── img │ ├── agent_gradio_ui.png │ ├── agent.png │ ├── cli.png │ ├── computer.png │ ├── logo_black.png │ └── logo_white.png ├── libs │ ├── kasm │ │ ├── Dockerfile │ │ ├── LICENSE │ │ ├── README.md │ │ └── src │ │ └── ubuntu │ │ └── install │ │ └── firefox │ │ ├── custom_startup.sh │ │ ├── firefox.desktop │ │ └── install_firefox.sh │ ├── lume │ │ ├── .cursorignore │ │ ├── CONTRIBUTING.md │ │ ├── Development.md │ │ ├── img │ │ │ └── cli.png │ │ ├── Package.resolved │ │ ├── Package.swift │ │ ├── README.md │ │ ├── resources │ │ │ └── lume.entitlements │ │ ├── scripts │ │ │ ├── build │ │ │ │ ├── build-debug.sh │ │ │ │ ├── build-release-notarized.sh │ │ │ │ └── build-release.sh │ │ │ └── install.sh │ │ ├── src │ │ │ ├── Commands │ │ │ │ ├── Clone.swift │ │ │ │ ├── Config.swift │ │ │ │ ├── Create.swift │ │ │ │ ├── Delete.swift │ │ │ │ ├── Get.swift │ │ │ │ ├── Images.swift │ │ │ │ ├── IPSW.swift │ │ │ │ ├── List.swift │ │ │ │ ├── Logs.swift │ │ │ │ ├── Options │ │ │ │ │ └── FormatOption.swift │ │ │ │ ├── Prune.swift │ │ │ │ ├── Pull.swift │ │ │ │ ├── Push.swift │ │ │ │ ├── Run.swift │ │ │ │ ├── Serve.swift │ │ │ │ ├── Set.swift │ │ │ │ └── Stop.swift │ │ │ ├── ContainerRegistry │ │ │ │ ├── ImageContainerRegistry.swift │ │ │ │ ├── ImageList.swift │ │ │ │ └── ImagesPrinter.swift │ │ │ ├── Errors │ │ │ │ └── Errors.swift │ │ │ ├── FileSystem │ │ │ │ ├── Home.swift │ │ │ │ ├── Settings.swift │ │ │ │ ├── VMConfig.swift │ │ │ │ ├── VMDirectory.swift │ │ │ │ └── VMLocation.swift │ │ │ ├── LumeController.swift │ │ │ ├── Main.swift │ │ │ ├── Server │ │ │ │ ├── Handlers.swift │ │ │ │ ├── HTTP.swift │ │ │ │ ├── Requests.swift │ │ │ │ ├── Responses.swift │ │ │ │ └── Server.swift │ │ │ ├── Utils │ │ │ │ ├── CommandRegistry.swift │ │ │ │ ├── CommandUtils.swift │ │ │ │ ├── Logger.swift │ │ │ │ ├── NetworkUtils.swift │ │ │ │ ├── Path.swift │ │ │ │ ├── ProcessRunner.swift │ │ │ │ ├── ProgressLogger.swift │ │ │ │ ├── String.swift │ │ │ │ └── Utils.swift │ │ │ ├── Virtualization │ │ │ │ ├── DarwinImageLoader.swift │ │ │ │ ├── DHCPLeaseParser.swift │ │ │ │ ├── ImageLoaderFactory.swift │ │ │ │ └── VMVirtualizationService.swift │ │ │ ├── VM │ │ │ │ ├── DarwinVM.swift │ │ │ │ ├── LinuxVM.swift │ │ │ │ ├── VM.swift │ │ │ │ ├── VMDetails.swift │ │ │ │ ├── VMDetailsPrinter.swift │ │ │ │ ├── VMDisplayResolution.swift │ │ │ │ └── VMFactory.swift │ │ │ └── VNC │ │ │ ├── PassphraseGenerator.swift │ │ │ └── VNCService.swift │ │ └── tests │ │ ├── Mocks │ │ │ ├── MockVM.swift │ │ │ ├── MockVMVirtualizationService.swift │ │ │ └── MockVNCService.swift │ │ ├── VM │ │ │ └── VMDetailsPrinterTests.swift │ │ ├── VMTests.swift │ │ ├── VMVirtualizationServiceTests.swift │ │ └── VNCServiceTests.swift │ ├── lumier │ │ ├── .dockerignore │ │ ├── Dockerfile │ │ ├── README.md │ │ └── src │ │ ├── bin │ │ │ └── entry.sh │ │ ├── config │ │ │ └── constants.sh │ │ ├── hooks │ │ │ └── on-logon.sh │ │ └── lib │ │ ├── utils.sh │ │ └── vm.sh │ ├── python │ │ ├── agent │ │ │ ├── .bumpversion.cfg │ │ │ ├── agent │ │ │ │ ├── __init__.py │ │ │ │ ├── __main__.py │ │ │ │ ├── adapters │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── huggingfacelocal_adapter.py │ │ │ │ │ ├── human_adapter.py │ │ │ │ │ ├── mlxvlm_adapter.py │ │ │ │ │ └── models │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── generic.py │ │ │ │ │ ├── internvl.py │ │ │ │ │ ├── opencua.py │ │ │ │ │ └── qwen2_5_vl.py │ │ │ │ ├── agent.py │ │ │ │ ├── callbacks │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── budget_manager.py │ │ │ │ │ ├── image_retention.py │ │ │ │ │ ├── logging.py │ │ │ │ │ ├── operator_validator.py │ │ │ │ │ ├── pii_anonymization.py │ │ │ │ │ ├── prompt_instructions.py │ │ │ │ │ ├── telemetry.py │ │ │ │ │ └── trajectory_saver.py │ │ │ │ ├── cli.py │ │ │ │ ├── computers │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── cua.py │ │ │ │ │ └── custom.py │ │ │ │ ├── decorators.py │ │ │ │ ├── human_tool │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── __main__.py │ │ │ │ │ ├── server.py │ │ │ │ │ └── ui.py │ │ │ │ ├── integrations │ │ │ │ │ └── hud │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── agent.py │ │ │ │ │ └── proxy.py │ │ │ │ ├── loops │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── anthropic.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── composed_grounded.py │ │ │ │ │ ├── gemini.py │ │ │ │ │ ├── glm45v.py │ │ │ │ │ ├── gta1.py │ │ │ │ │ ├── holo.py │ │ │ │ │ ├── internvl.py │ │ │ │ │ ├── model_types.csv │ │ │ │ │ ├── moondream3.py │ │ │ │ │ ├── omniparser.py │ │ │ │ │ ├── openai.py │ │ │ │ │ ├── opencua.py │ │ │ │ │ └── uitars.py │ │ │ │ ├── proxy │ │ │ │ │ ├── examples.py │ │ │ │ │ └── handlers.py │ │ │ │ ├── responses.py │ │ │ │ ├── types.py │ │ │ │ └── ui │ │ │ │ ├── __init__.py │ │ │ │ ├── __main__.py │ │ │ │ └── gradio │ │ │ │ ├── __init__.py │ │ │ │ ├── app.py │ │ │ │ └── ui_components.py │ │ │ ├── benchmarks │ │ │ │ ├── .gitignore │ │ │ │ ├── contrib.md │ │ │ │ ├── interactive.py │ │ │ │ ├── models │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ └── gta1.py │ │ │ │ ├── README.md │ │ │ │ ├── ss-pro.py │ │ │ │ ├── ss-v2.py │ │ │ │ └── utils.py │ │ │ ├── example.py │ │ │ ├── pyproject.toml │ │ │ └── README.md │ │ ├── computer │ │ │ ├── .bumpversion.cfg │ │ │ ├── computer │ │ │ │ ├── __init__.py │ │ │ │ ├── computer.py │ │ │ │ ├── diorama_computer.py │ │ │ │ ├── helpers.py │ │ │ │ ├── interface │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── factory.py │ │ │ │ │ ├── generic.py │ │ │ │ │ ├── linux.py │ │ │ │ │ ├── macos.py │ │ │ │ │ ├── models.py │ │ │ │ │ └── windows.py │ │ │ │ ├── logger.py │ │ │ │ ├── models.py │ │ │ │ ├── providers │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── cloud │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── provider.py │ │ │ │ │ ├── docker │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── provider.py │ │ │ │ │ ├── factory.py │ │ │ │ │ ├── lume │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── provider.py │ │ │ │ │ ├── lume_api.py │ │ │ │ │ ├── lumier │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── provider.py │ │ │ │ │ ├── types.py │ │ │ │ │ └── winsandbox │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── provider.py │ │ │ │ │ └── setup_script.ps1 │ │ │ │ ├── ui │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── __main__.py │ │ │ │ │ └── gradio │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── app.py │ │ │ │ └── utils.py │ │ │ ├── poetry.toml │ │ │ ├── pyproject.toml │ │ │ └── README.md │ │ ├── computer-server │ │ │ ├── .bumpversion.cfg │ │ │ ├── computer_server │ │ │ │ ├── __init__.py │ │ │ │ ├── __main__.py │ │ │ │ ├── cli.py │ │ │ │ ├── diorama │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── diorama_computer.py │ │ │ │ │ ├── diorama.py │ │ │ │ │ ├── draw.py │ │ │ │ │ ├── macos.py │ │ │ │ │ └── safezone.py │ │ │ │ ├── handlers │ │ │ │ │ ├── base.py │ │ │ │ │ ├── factory.py │ │ │ │ │ ├── generic.py │ │ │ │ │ ├── linux.py │ │ │ │ │ ├── macos.py │ │ │ │ │ └── windows.py │ │ │ │ ├── main.py │ │ │ │ ├── server.py │ │ │ │ └── watchdog.py │ │ │ ├── examples │ │ │ │ ├── __init__.py │ │ │ │ └── usage_example.py │ │ │ ├── pyproject.toml │ │ │ ├── README.md │ │ │ ├── run_server.py │ │ │ └── test_connection.py │ │ ├── core │ │ │ ├── .bumpversion.cfg │ │ │ ├── core │ │ │ │ ├── __init__.py │ │ │ │ └── telemetry │ │ │ │ ├── __init__.py │ │ │ │ └── posthog.py │ │ │ ├── poetry.toml │ │ │ ├── pyproject.toml │ │ │ └── README.md │ │ ├── mcp-server │ │ │ ├── .bumpversion.cfg │ │ │ ├── CONCURRENT_SESSIONS.md │ │ │ ├── mcp_server │ │ │ │ ├── __init__.py │ │ │ │ ├── __main__.py │ │ │ │ ├── server.py │ │ │ │ └── session_manager.py │ │ │ ├── pdm.lock │ │ │ ├── pyproject.toml │ │ │ ├── README.md │ │ │ └── scripts │ │ │ ├── install_mcp_server.sh │ │ │ └── start_mcp_server.sh │ │ ├── pylume │ │ │ ├── __init__.py │ │ │ ├── .bumpversion.cfg │ │ │ ├── pylume │ │ │ │ ├── __init__.py │ │ │ │ ├── client.py │ │ │ │ ├── exceptions.py │ │ │ │ ├── lume │ │ │ │ ├── models.py │ │ │ │ ├── pylume.py │ │ │ │ └── server.py │ │ │ ├── pyproject.toml │ │ │ └── README.md │ │ └── som │ │ ├── .bumpversion.cfg │ │ ├── LICENSE │ │ ├── poetry.toml │ │ ├── pyproject.toml │ │ ├── README.md │ │ ├── som │ │ │ ├── __init__.py │ │ │ ├── detect.py │ │ │ ├── detection.py │ │ │ ├── models.py │ │ │ ├── ocr.py │ │ │ ├── util │ │ │ │ └── utils.py │ │ │ └── visualization.py │ │ └── tests │ │ └── test_omniparser.py │ ├── typescript │ │ ├── .gitignore │ │ ├── .nvmrc │ │ ├── agent │ │ │ ├── examples │ │ │ │ ├── playground-example.html │ │ │ │ └── README.md │ │ │ ├── package.json │ │ │ ├── README.md │ │ │ ├── src │ │ │ │ ├── client.ts │ │ │ │ ├── index.ts │ │ │ │ └── types.ts │ │ │ ├── tests │ │ │ │ └── client.test.ts │ │ │ ├── tsconfig.json │ │ │ ├── tsdown.config.ts │ │ │ └── vitest.config.ts │ │ ├── biome.json │ │ ├── computer │ │ │ ├── .editorconfig │ │ │ ├── .gitattributes │ │ │ ├── .gitignore │ │ │ ├── LICENSE │ │ │ ├── package.json │ │ │ ├── README.md │ │ │ ├── src │ │ │ │ ├── computer │ │ │ │ │ ├── index.ts │ │ │ │ │ ├── providers │ │ │ │ │ │ ├── base.ts │ │ │ │ │ │ ├── cloud.ts │ │ │ │ │ │ └── index.ts │ │ │ │ │ └── types.ts │ │ │ │ ├── index.ts │ │ │ │ ├── interface │ │ │ │ │ ├── base.ts │ │ │ │ │ ├── factory.ts │ │ │ │ │ ├── index.ts │ │ │ │ │ ├── linux.ts │ │ │ │ │ ├── macos.ts │ │ │ │ │ └── windows.ts │ │ │ │ └── types.ts │ │ │ ├── tests │ │ │ │ ├── computer │ │ │ │ │ └── cloud.test.ts │ │ │ │ ├── interface │ │ │ │ │ ├── factory.test.ts │ │ │ │ │ ├── index.test.ts │ │ │ │ │ ├── linux.test.ts │ │ │ │ │ ├── macos.test.ts │ │ │ │ │ └── windows.test.ts │ │ │ │ └── setup.ts │ │ │ ├── tsconfig.json │ │ │ ├── tsdown.config.ts │ │ │ └── vitest.config.ts │ │ ├── core │ │ │ ├── .editorconfig │ │ │ ├── .gitattributes │ │ │ ├── .gitignore │ │ │ ├── LICENSE │ │ │ ├── package.json │ │ │ ├── README.md │ │ │ ├── src │ │ │ │ ├── index.ts │ │ │ │ └── telemetry │ │ │ │ ├── clients │ │ │ │ │ ├── index.ts │ │ │ │ │ └── posthog.ts │ │ │ │ └── index.ts │ │ │ ├── tests │ │ │ │ └── telemetry.test.ts │ │ │ ├── tsconfig.json │ │ │ ├── tsdown.config.ts │ │ │ └── vitest.config.ts │ │ ├── package.json │ │ ├── pnpm-lock.yaml │ │ ├── pnpm-workspace.yaml │ │ └── README.md │ └── xfce │ ├── .dockerignore │ ├── .gitignore │ ├── Dockerfile │ ├── README.md │ └── src │ ├── scripts │ │ ├── resize-display.sh │ │ ├── start-computer-server.sh │ │ ├── start-novnc.sh │ │ ├── start-vnc.sh │ │ └── xstartup.sh │ ├── supervisor │ │ └── supervisord.conf │ └── xfce-config │ ├── helpers.rc │ ├── xfce4-power-manager.xml │ └── xfce4-session.xml ├── LICENSE.md ├── Makefile ├── notebooks │ ├── agent_nb.ipynb │ ├── blog │ │ ├── build-your-own-operator-on-macos-1.ipynb │ │ └── build-your-own-operator-on-macos-2.ipynb │ ├── composite_agents_docker_nb.ipynb │ ├── computer_nb.ipynb │ ├── computer_server_nb.ipynb │ ├── customizing_computeragent.ipynb │ ├── eval_osworld.ipynb │ ├── ollama_nb.ipynb │ ├── pylume_nb.ipynb │ ├── README.md │ ├── sota_hackathon_cloud.ipynb │ └── sota_hackathon.ipynb ├── pdm.lock ├── pyproject.toml ├── pyrightconfig.json ├── README.md ├── samples │ └── community │ ├── global-online │ │ └── README.md │ └── hack-the-north │ └── README.md ├── scripts │ ├── build-uv.sh │ ├── build.ps1 │ ├── build.sh │ ├── cleanup.sh │ ├── playground-docker.sh │ ├── playground.sh │ └── run-docker-dev.sh └── tests ├── pytest.ini ├── shell_cmd.py ├── test_files.py ├── test_mcp_server_session_management.py ├── test_mcp_server_streaming.py ├── test_shell_bash.py ├── test_telemetry.py ├── test_venv.py └── test_watchdog.py ``` # Files -------------------------------------------------------------------------------- /libs/python/computer/computer/interface/base.py: -------------------------------------------------------------------------------- ```python 1 | """Base interface for computer control.""" 2 | 3 | from abc import ABC, abstractmethod 4 | from typing import Optional, Dict, Any, Tuple, List 5 | from ..logger import Logger, LogLevel 6 | from .models import MouseButton, CommandResult 7 | 8 | class BaseComputerInterface(ABC): 9 | """Base class for computer control interfaces.""" 10 | 11 | def __init__(self, ip_address: str, username: str = "lume", password: str = "lume", api_key: Optional[str] = None, vm_name: Optional[str] = None): 12 | """Initialize interface. 13 | 14 | Args: 15 | ip_address: IP address of the computer to control 16 | username: Username for authentication 17 | password: Password for authentication 18 | api_key: Optional API key for cloud authentication 19 | vm_name: Optional VM name for cloud authentication 20 | """ 21 | self.ip_address = ip_address 22 | self.username = username 23 | self.password = password 24 | self.api_key = api_key 25 | self.vm_name = vm_name 26 | self.logger = Logger("cua.interface", LogLevel.NORMAL) 27 | 28 | # Optional default delay time between commands (in seconds) 29 | self.delay: float = 0.0 30 | 31 | @abstractmethod 32 | async def wait_for_ready(self, timeout: int = 60) -> None: 33 | """Wait for interface to be ready. 34 | 35 | Args: 36 | timeout: Maximum time to wait in seconds 37 | 38 | Raises: 39 | TimeoutError: If interface is not ready within timeout 40 | """ 41 | pass 42 | 43 | @abstractmethod 44 | def close(self) -> None: 45 | """Close the interface connection.""" 46 | pass 47 | 48 | def force_close(self) -> None: 49 | """Force close the interface connection. 50 | 51 | By default, this just calls close(), but subclasses can override 52 | to provide more forceful cleanup. 53 | """ 54 | self.close() 55 | 56 | # Mouse Actions 57 | @abstractmethod 58 | async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: "MouseButton" = "left", delay: Optional[float] = None) -> None: 59 | """Press and hold a mouse button. 60 | 61 | Args: 62 | x: X coordinate to press at. If None, uses current cursor position. 63 | y: Y coordinate to press at. If None, uses current cursor position. 64 | button: Mouse button to press ('left', 'middle', 'right'). 65 | delay: Optional delay in seconds after the action 66 | """ 67 | pass 68 | 69 | @abstractmethod 70 | async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: "MouseButton" = "left", delay: Optional[float] = None) -> None: 71 | """Release a mouse button. 72 | 73 | Args: 74 | x: X coordinate to release at. If None, uses current cursor position. 75 | y: Y coordinate to release at. If None, uses current cursor position. 76 | button: Mouse button to release ('left', 'middle', 'right'). 77 | delay: Optional delay in seconds after the action 78 | """ 79 | pass 80 | 81 | @abstractmethod 82 | async def left_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None: 83 | """Perform a left mouse button click. 84 | 85 | Args: 86 | x: X coordinate to click at. If None, uses current cursor position. 87 | y: Y coordinate to click at. If None, uses current cursor position. 88 | delay: Optional delay in seconds after the action 89 | """ 90 | pass 91 | 92 | @abstractmethod 93 | async def right_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None: 94 | """Perform a right mouse button click. 95 | 96 | Args: 97 | x: X coordinate to click at. If None, uses current cursor position. 98 | y: Y coordinate to click at. If None, uses current cursor position. 99 | delay: Optional delay in seconds after the action 100 | """ 101 | pass 102 | 103 | @abstractmethod 104 | async def double_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None: 105 | """Perform a double left mouse button click. 106 | 107 | Args: 108 | x: X coordinate to double-click at. If None, uses current cursor position. 109 | y: Y coordinate to double-click at. If None, uses current cursor position. 110 | delay: Optional delay in seconds after the action 111 | """ 112 | pass 113 | 114 | @abstractmethod 115 | async def move_cursor(self, x: int, y: int, delay: Optional[float] = None) -> None: 116 | """Move the cursor to the specified screen coordinates. 117 | 118 | Args: 119 | x: X coordinate to move cursor to. 120 | y: Y coordinate to move cursor to. 121 | delay: Optional delay in seconds after the action 122 | """ 123 | pass 124 | 125 | @abstractmethod 126 | async def drag_to(self, x: int, y: int, button: str = "left", duration: float = 0.5, delay: Optional[float] = None) -> None: 127 | """Drag from current position to specified coordinates. 128 | 129 | Args: 130 | x: The x coordinate to drag to 131 | y: The y coordinate to drag to 132 | button: The mouse button to use ('left', 'middle', 'right') 133 | duration: How long the drag should take in seconds 134 | delay: Optional delay in seconds after the action 135 | """ 136 | pass 137 | 138 | @abstractmethod 139 | async def drag(self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5, delay: Optional[float] = None) -> None: 140 | """Drag the cursor along a path of coordinates. 141 | 142 | Args: 143 | path: List of (x, y) coordinate tuples defining the drag path 144 | button: The mouse button to use ('left', 'middle', 'right') 145 | duration: Total time in seconds that the drag operation should take 146 | delay: Optional delay in seconds after the action 147 | """ 148 | pass 149 | 150 | # Keyboard Actions 151 | @abstractmethod 152 | async def key_down(self, key: str, delay: Optional[float] = None) -> None: 153 | """Press and hold a key. 154 | 155 | Args: 156 | key: The key to press and hold (e.g., 'a', 'shift', 'ctrl'). 157 | delay: Optional delay in seconds after the action. 158 | """ 159 | pass 160 | 161 | @abstractmethod 162 | async def key_up(self, key: str, delay: Optional[float] = None) -> None: 163 | """Release a previously pressed key. 164 | 165 | Args: 166 | key: The key to release (e.g., 'a', 'shift', 'ctrl'). 167 | delay: Optional delay in seconds after the action. 168 | """ 169 | pass 170 | 171 | @abstractmethod 172 | async def type_text(self, text: str, delay: Optional[float] = None) -> None: 173 | """Type the specified text string. 174 | 175 | Args: 176 | text: The text string to type. 177 | delay: Optional delay in seconds after the action. 178 | """ 179 | pass 180 | 181 | @abstractmethod 182 | async def press_key(self, key: str, delay: Optional[float] = None) -> None: 183 | """Press and release a single key. 184 | 185 | Args: 186 | key: The key to press (e.g., 'a', 'enter', 'escape'). 187 | delay: Optional delay in seconds after the action. 188 | """ 189 | pass 190 | 191 | @abstractmethod 192 | async def hotkey(self, *keys: str, delay: Optional[float] = None) -> None: 193 | """Press multiple keys simultaneously (keyboard shortcut). 194 | 195 | Args: 196 | *keys: Variable number of keys to press together (e.g., 'ctrl', 'c'). 197 | delay: Optional delay in seconds after the action. 198 | """ 199 | pass 200 | 201 | # Scrolling Actions 202 | @abstractmethod 203 | async def scroll(self, x: int, y: int, delay: Optional[float] = None) -> None: 204 | """Scroll the mouse wheel by specified amounts. 205 | 206 | Args: 207 | x: Horizontal scroll amount (positive = right, negative = left). 208 | y: Vertical scroll amount (positive = up, negative = down). 209 | delay: Optional delay in seconds after the action. 210 | """ 211 | pass 212 | 213 | @abstractmethod 214 | async def scroll_down(self, clicks: int = 1, delay: Optional[float] = None) -> None: 215 | """Scroll down by the specified number of clicks. 216 | 217 | Args: 218 | clicks: Number of scroll clicks to perform downward. 219 | delay: Optional delay in seconds after the action. 220 | """ 221 | pass 222 | 223 | @abstractmethod 224 | async def scroll_up(self, clicks: int = 1, delay: Optional[float] = None) -> None: 225 | """Scroll up by the specified number of clicks. 226 | 227 | Args: 228 | clicks: Number of scroll clicks to perform upward. 229 | delay: Optional delay in seconds after the action. 230 | """ 231 | pass 232 | 233 | # Screen Actions 234 | @abstractmethod 235 | async def screenshot(self) -> bytes: 236 | """Take a screenshot. 237 | 238 | Returns: 239 | Raw bytes of the screenshot image 240 | """ 241 | pass 242 | 243 | @abstractmethod 244 | async def get_screen_size(self) -> Dict[str, int]: 245 | """Get the screen dimensions. 246 | 247 | Returns: 248 | Dict with 'width' and 'height' keys 249 | """ 250 | pass 251 | 252 | @abstractmethod 253 | async def get_cursor_position(self) -> Dict[str, int]: 254 | """Get the current cursor position on screen. 255 | 256 | Returns: 257 | Dict with 'x' and 'y' keys containing cursor coordinates. 258 | """ 259 | pass 260 | 261 | # Clipboard Actions 262 | @abstractmethod 263 | async def copy_to_clipboard(self) -> str: 264 | """Get the current clipboard content. 265 | 266 | Returns: 267 | The text content currently stored in the clipboard. 268 | """ 269 | pass 270 | 271 | @abstractmethod 272 | async def set_clipboard(self, text: str) -> None: 273 | """Set the clipboard content to the specified text. 274 | 275 | Args: 276 | text: The text to store in the clipboard. 277 | """ 278 | pass 279 | 280 | # File System Actions 281 | @abstractmethod 282 | async def file_exists(self, path: str) -> bool: 283 | """Check if a file exists at the specified path. 284 | 285 | Args: 286 | path: The file path to check. 287 | 288 | Returns: 289 | True if the file exists, False otherwise. 290 | """ 291 | pass 292 | 293 | @abstractmethod 294 | async def directory_exists(self, path: str) -> bool: 295 | """Check if a directory exists at the specified path. 296 | 297 | Args: 298 | path: The directory path to check. 299 | 300 | Returns: 301 | True if the directory exists, False otherwise. 302 | """ 303 | pass 304 | 305 | @abstractmethod 306 | async def list_dir(self, path: str) -> List[str]: 307 | """List the contents of a directory. 308 | 309 | Args: 310 | path: The directory path to list. 311 | 312 | Returns: 313 | List of file and directory names in the specified directory. 314 | """ 315 | pass 316 | 317 | @abstractmethod 318 | async def read_text(self, path: str) -> str: 319 | """Read the text contents of a file. 320 | 321 | Args: 322 | path: The file path to read from. 323 | 324 | Returns: 325 | The text content of the file. 326 | """ 327 | pass 328 | 329 | @abstractmethod 330 | async def write_text(self, path: str, content: str) -> None: 331 | """Write text content to a file. 332 | 333 | Args: 334 | path: The file path to write to. 335 | content: The text content to write. 336 | """ 337 | pass 338 | 339 | @abstractmethod 340 | async def read_bytes(self, path: str, offset: int = 0, length: Optional[int] = None) -> bytes: 341 | """Read file binary contents with optional seeking support. 342 | 343 | Args: 344 | path: Path to the file 345 | offset: Byte offset to start reading from (default: 0) 346 | length: Number of bytes to read (default: None for entire file) 347 | """ 348 | pass 349 | 350 | @abstractmethod 351 | async def write_bytes(self, path: str, content: bytes) -> None: 352 | """Write binary content to a file. 353 | 354 | Args: 355 | path: The file path to write to. 356 | content: The binary content to write. 357 | """ 358 | pass 359 | 360 | @abstractmethod 361 | async def delete_file(self, path: str) -> None: 362 | """Delete a file at the specified path. 363 | 364 | Args: 365 | path: The file path to delete. 366 | """ 367 | pass 368 | 369 | @abstractmethod 370 | async def create_dir(self, path: str) -> None: 371 | """Create a directory at the specified path. 372 | 373 | Args: 374 | path: The directory path to create. 375 | """ 376 | pass 377 | 378 | @abstractmethod 379 | async def delete_dir(self, path: str) -> None: 380 | """Delete a directory at the specified path. 381 | 382 | Args: 383 | path: The directory path to delete. 384 | """ 385 | pass 386 | 387 | @abstractmethod 388 | async def get_file_size(self, path: str) -> int: 389 | """Get the size of a file in bytes. 390 | 391 | Args: 392 | path: The file path to get the size of. 393 | 394 | Returns: 395 | The size of the file in bytes. 396 | """ 397 | pass 398 | 399 | @abstractmethod 400 | async def run_command(self, command: str) -> CommandResult: 401 | """Run shell command and return structured result. 402 | 403 | Executes a shell command using subprocess.run with shell=True and check=False. 404 | The command is run in the target environment and captures both stdout and stderr. 405 | 406 | Args: 407 | command (str): The shell command to execute 408 | 409 | Returns: 410 | CommandResult: A structured result containing: 411 | - stdout (str): Standard output from the command 412 | - stderr (str): Standard error from the command 413 | - returncode (int): Exit code from the command (0 indicates success) 414 | 415 | Raises: 416 | RuntimeError: If the command execution fails at the system level 417 | 418 | Example: 419 | result = await interface.run_command("ls -la") 420 | if result.returncode == 0: 421 | print(f"Output: {result.stdout}") 422 | else: 423 | print(f"Error: {result.stderr}, Exit code: {result.returncode}") 424 | """ 425 | pass 426 | 427 | # Accessibility Actions 428 | @abstractmethod 429 | async def get_accessibility_tree(self) -> Dict: 430 | """Get the accessibility tree of the current screen. 431 | 432 | Returns: 433 | Dict containing the hierarchical accessibility information of screen elements. 434 | """ 435 | pass 436 | 437 | @abstractmethod 438 | async def to_screen_coordinates(self, x: float, y: float) -> tuple[float, float]: 439 | """Convert screenshot coordinates to screen coordinates. 440 | 441 | Args: 442 | x: X coordinate in screenshot space 443 | y: Y coordinate in screenshot space 444 | 445 | Returns: 446 | tuple[float, float]: (x, y) coordinates in screen space 447 | """ 448 | pass 449 | 450 | @abstractmethod 451 | async def to_screenshot_coordinates(self, x: float, y: float) -> tuple[float, float]: 452 | """Convert screen coordinates to screenshot coordinates. 453 | 454 | Args: 455 | x: X coordinate in screen space 456 | y: Y coordinate in screen space 457 | 458 | Returns: 459 | tuple[float, float]: (x, y) coordinates in screenshot space 460 | """ 461 | pass 462 | ``` -------------------------------------------------------------------------------- /libs/lumier/src/lib/vm.sh: -------------------------------------------------------------------------------- ```bash 1 | #!/usr/bin/env bash 2 | 3 | # Initialize global flags 4 | export PULL_IN_PROGRESS=0 5 | 6 | start_vm() { 7 | # Determine storage path for VM 8 | STORAGE_PATH="$HOST_STORAGE_PATH" 9 | if [ -z "$STORAGE_PATH" ]; then 10 | STORAGE_PATH="storage_${VM_NAME}" 11 | fi 12 | 13 | # Check if VM exists and its status using JSON format - quietly 14 | VM_INFO=$(lume_get "$VM_NAME" "$STORAGE_PATH" "json" "${LUMIER_DEBUG:-0}") 15 | 16 | # Check if VM not found error 17 | if [[ $VM_INFO == *"Virtual machine not found"* ]]; then 18 | IMAGE_NAME="${VERSION##*/}" 19 | # Parse registry and organization from VERSION 20 | REGISTRY=$(echo $VERSION | cut -d'/' -f1) 21 | ORGANIZATION=$(echo $VERSION | cut -d'/' -f2) 22 | 23 | echo "Pulling VM image $IMAGE_NAME..." 24 | lume_pull "$IMAGE_NAME" "$VM_NAME" "$STORAGE_PATH" "$REGISTRY" "$ORGANIZATION" 25 | else 26 | # Parse the JSON status - check if it contains "status" : "running" 27 | if [[ $VM_INFO == *'"status" : "running"'* ]]; then 28 | lume_stop "$VM_NAME" "$STORAGE_PATH" 29 | fi 30 | fi 31 | 32 | # Format memory size for display purposes 33 | MEMORY_DISPLAY="$RAM_SIZE" 34 | if [[ ! "$RAM_SIZE" == *"GB"* && ! "$RAM_SIZE" == *"MB"* ]]; then 35 | MEMORY_DISPLAY="${RAM_SIZE}MB" 36 | fi 37 | 38 | # Set VM parameters using the wrapper function 39 | if [[ "$LUMIER_DEBUG" == "1" ]]; then 40 | echo "Updating VM settings: cpu=$CPU_CORES memory=$MEMORY_DISPLAY display=$DISPLAY" 41 | fi 42 | lume_set "$VM_NAME" "$STORAGE_PATH" "$CPU_CORES" "$RAM_SIZE" "$DISPLAY" 43 | 44 | # Fetch VM configuration - quietly (don't display to console) 45 | CONFIG_JSON=$(lume_get "$VM_NAME" "$STORAGE_PATH" "json" "${LUMIER_DEBUG:-0}") 46 | 47 | # Setup shared directory args if necessary 48 | SHARED_DIR_ARGS="" 49 | if [ -d "/shared" ]; then 50 | if [ -n "$HOST_SHARED_PATH" ]; then 51 | SHARED_DIR_ARGS="--shared-dir=$HOST_SHARED_PATH" 52 | else 53 | echo "Warning: /shared volume exists but HOST_SHARED_PATH is not set. Cannot mount volume." 54 | fi 55 | fi 56 | 57 | # Run VM with VNC and shared directory using curl 58 | lume_run $SHARED_DIR_ARGS --storage "$STORAGE_PATH" "$VM_NAME" & 59 | # lume run "$VM_NAME" --storage "$STORAGE_PATH" --no-display 60 | 61 | # sleep 10000000 62 | 63 | # Wait for VM to be running and VNC URL to be available 64 | vm_ip="" 65 | vnc_url="" 66 | max_attempts=30 67 | attempt=0 68 | 69 | while [ $attempt -lt $max_attempts ]; do 70 | # Get VM info as JSON using the API function - pass debug flag 71 | VM_INFO=$(lume_get "$VM_NAME" "$STORAGE_PATH" "json" "${LUMIER_DEBUG:-0}") 72 | 73 | # Extract status, IP address, and VNC URL using the helper function 74 | vm_status=$(extract_json_field "status" "$VM_INFO") 75 | vm_ip=$(extract_json_field "ipAddress" "$VM_INFO") 76 | vnc_url=$(extract_json_field "vncUrl" "$VM_INFO") 77 | 78 | # Check if VM status is 'running' and we have IP and VNC URL 79 | if [ "$vm_status" = "running" ] && [ -n "$vm_ip" ] && [ -n "$vnc_url" ]; then 80 | break 81 | fi 82 | 83 | sleep 2 84 | attempt=$((attempt + 1)) 85 | done 86 | 87 | if [ -z "$vm_ip" ] || [ -z "$vnc_url" ]; then 88 | echo "Timed out waiting for VM to start or VNC URL to become available." 89 | lume_stop "$VM_NAME" "$STORAGE_PATH" > /dev/null 2>&1 90 | # lume stop "$VM_NAME" --storage "$STORAGE_PATH" > /dev/null 2>&1 91 | exit 1 92 | fi 93 | 94 | # Parse VNC URL to extract password and port 95 | VNC_PASSWORD=$(echo "$vnc_url" | sed -n 's/.*:\(.*\)@.*/\1/p') 96 | VNC_PORT=$(echo "$vnc_url" | sed -n 's/.*:\([0-9]\+\)$/\1/p') 97 | 98 | # Wait for SSH to become available 99 | wait_for_ssh "$vm_ip" "$HOST_USER" "$HOST_PASSWORD" 5 20 100 | 101 | # Export VNC variables for entry.sh to use 102 | export VNC_PORT 103 | export VNC_PASSWORD 104 | 105 | # Execute on-logon.sh if present 106 | on_logon_script="/run/lifecycle/on-logon.sh" 107 | 108 | # Only show detailed logs in debug mode 109 | if [ "${LUMIER_DEBUG:-0}" == "1" ]; then 110 | echo "Running on-logon.sh hook script on VM..." 111 | fi 112 | 113 | # Check if script exists 114 | if [ ! -f "$on_logon_script" ]; then 115 | echo "Warning: on-logon.sh hook script not found at $on_logon_script" 116 | else 117 | # Execute the remote script 118 | execute_remote_script "$vm_ip" "$HOST_USER" "$HOST_PASSWORD" "$on_logon_script" "$VNC_PASSWORD" "$HOST_SHARED_PATH" 119 | fi 120 | } 121 | 122 | # Get VM information using curl 123 | lume_get() { 124 | local vm_name="$1" 125 | local storage="$2" 126 | local format="${3:-json}" 127 | local debug="${4:-false}" 128 | 129 | local api_host="${LUME_API_HOST:-host.docker.internal}" 130 | local api_port="${LUME_API_PORT:-7777}" 131 | 132 | # URL encode the storage path for the query parameter 133 | # Replace special characters with their URL encoded equivalents 134 | local encoded_storage=$(echo "$storage" | sed 's/\//%2F/g' | sed 's/ /%20/g' | sed 's/:/%3A/g') 135 | 136 | # Construct API URL with encoded storage parameter 137 | local api_url="http://${api_host}:${api_port}/lume/vms/${vm_name}?storage=${encoded_storage}" 138 | 139 | # Construct the full curl command 140 | local curl_cmd="curl --connect-timeout 6000 --max-time 5000 -s '$api_url'" 141 | 142 | # Print debug info 143 | if [[ "$debug" == "true" || "$LUMIER_DEBUG" == "1" ]]; then 144 | echo "[DEBUG] Calling API: $api_url" 145 | echo "[DEBUG] Full curl command: $curl_cmd" 146 | fi 147 | 148 | # Log curl commands only when in debug mode 149 | if [[ "$debug" == "true" || "$LUMIER_DEBUG" == "1" ]]; then 150 | echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] DEBUG: Executing curl request: $api_url" >&2 151 | fi 152 | 153 | # Make the API call 154 | local response=$(curl --connect-timeout 6000 \ 155 | --max-time 5000 \ 156 | -s \ 157 | "$api_url") 158 | 159 | # Print the response if debugging is enabled 160 | if [[ "$debug" == "true" || "${LUMIER_DEBUG:-0}" == "1" ]]; then 161 | echo "[DEBUG] API Response:" 162 | echo "$response" | jq '.' 2>/dev/null || echo "$response" 163 | fi 164 | 165 | # Output the response so callers can capture it 166 | echo "$response" 167 | } 168 | 169 | # Set VM properties using curl 170 | lume_set() { 171 | local vm_name="$1" 172 | local storage="$2" 173 | local cpu="${3:-4}" 174 | local memory="${4:-8192}" 175 | local display="${5:-1024x768}" 176 | 177 | local api_host="${LUME_API_HOST:-host.docker.internal}" 178 | local api_port="${LUME_API_PORT:-7777}" 179 | 180 | # Handle memory format for the API 181 | if [[ "$memory" == *"GB"* ]]; then 182 | # Already in GB format, keep as is 183 | : # No-op 184 | elif [[ "$memory" =~ ^[0-9]+$ ]]; then 185 | # If memory is a simple number, assume MB and convert to GB 186 | memory="$(awk "BEGIN { printf \"%.1f\", $memory/1024 }")GB" 187 | fi 188 | 189 | # Only show memory formatting debug in debug mode 190 | if [[ "$LUMIER_DEBUG" == "1" ]]; then 191 | echo "[DEBUG] Formatted memory value: $memory" 192 | fi 193 | 194 | # Store response to conditionally show based on debug mode 195 | local response=$(curl --connect-timeout 6000 \ 196 | --max-time 5000 \ 197 | -s \ 198 | -X PATCH \ 199 | -H "Content-Type: application/json" \ 200 | -d "{ 201 | \"cpu\": $cpu, 202 | \"memory\": \"$memory\", 203 | \"display\": \"$display\", 204 | \"storage\": \"$storage\" 205 | }" \ 206 | "http://${api_host}:${api_port}/lume/vms/${vm_name}") 207 | 208 | # Only show response in debug mode 209 | if [[ "${LUMIER_DEBUG:-0}" == "1" ]]; then 210 | echo "$response" 211 | fi 212 | } 213 | 214 | stop_vm() { 215 | local in_cleanup=${1:-false} # Optional first argument to indicate if called from cleanup trap 216 | echo "Stopping VM '$VM_NAME'..." 217 | STORAGE_PATH="$HOST_STORAGE_PATH" 218 | 219 | # Only show storage path in debug mode 220 | if [[ "$LUMIER_DEBUG" == "1" ]]; then 221 | echo "STORAGE_PATH: $STORAGE_PATH" 222 | fi 223 | 224 | VM_INFO=$(lume_get "$VM_NAME" "$STORAGE_PATH" "json" "${LUMIER_DEBUG:-0}") 225 | vm_status=$(extract_json_field "status" "$VM_INFO") 226 | 227 | if [ "$vm_status" == "running" ]; then 228 | lume_stop "$VM_NAME" "$STORAGE_PATH" 229 | elif [ "$vm_status" == "stopped" ]; then 230 | echo "VM '$VM_NAME' is already stopped." 231 | elif [ "$in_cleanup" = true ]; then 232 | # If we are in the cleanup trap and status is unknown or VM not found, 233 | # still attempt a stop just in case. 234 | echo "VM status is unknown ('$vm_status') or VM not found during cleanup. Attempting stop anyway." 235 | lume_stop "$VM_NAME" "$STORAGE_PATH" 236 | sleep 5 237 | echo "VM '$VM_NAME' stop command issued as a precaution." 238 | else 239 | echo "VM status is unknown ('$vm_status') or VM not found. Not attempting stop." 240 | fi 241 | } 242 | 243 | is_vm_running() { 244 | # Check VM status using the API function 245 | local vm_info 246 | vm_info=$(lume_get "$VM_NAME" "$HOST_STORAGE_PATH") 247 | if [[ $vm_info == *'"status" : "running"'* ]]; then 248 | return 0 # Running 249 | else 250 | return 1 # Not running or doesn't exist 251 | fi 252 | # lume ls | grep -q "$VM_NAME" # Old CLI check 253 | } 254 | 255 | # Stop VM with storage location specified using curl 256 | lume_stop() { 257 | local vm_name="$1" 258 | local storage="$2" 259 | 260 | local api_host="${LUME_API_HOST:-host.docker.internal}" 261 | local api_port="${LUME_API_PORT:-7777}" 262 | 263 | # Only log in debug mode 264 | if [[ "$LUMIER_DEBUG" == "1" ]]; then 265 | echo "Stopping VM $vm_name..." 266 | fi 267 | 268 | # Execute command and capture response 269 | local response 270 | if [[ "${LUMIER_DEBUG:-0}" == "1" ]]; then 271 | # Show output in debug mode 272 | response=$(curl --connect-timeout 6000 \ 273 | --max-time 5000 \ 274 | -X POST \ 275 | -H "Content-Type: application/json" \ 276 | -d '{"storage":"'$storage'"}' \ 277 | "http://${api_host}:${api_port}/lume/vms/${vm_name}/stop") 278 | echo "$response" 279 | else 280 | # Run silently in normal mode 281 | response=$(curl --connect-timeout 6000 \ 282 | --max-time 5000 \ 283 | -s \ 284 | -X POST \ 285 | -H "Content-Type: application/json" \ 286 | -d '{"storage":"'$storage'"}' \ 287 | "http://${api_host}:${api_port}/lume/vms/${vm_name}/stop") 288 | fi 289 | } 290 | 291 | # Pull a VM image using curl 292 | lume_pull() { 293 | local image="$1" # Image name with tag 294 | local vm_name="$2" # Name for the new VM 295 | local storage="$3" # Storage location 296 | local registry="${4:-ghcr.io}" # Registry, default is ghcr.io 297 | local organization="${5:-trycua}" # Organization, default is trycua 298 | 299 | local api_host="${LUME_API_HOST:-host.docker.internal}" 300 | local api_port="${LUME_API_PORT:-7777}" 301 | 302 | # Mark that pull is in progress for interrupt handling 303 | export PULL_IN_PROGRESS=1 304 | 305 | # Only log full details in debug mode 306 | if [[ "$LUMIER_DEBUG" == "1" ]]; then 307 | echo "Pulling image $image from $registry/$organization..." 308 | else 309 | echo "Pulling image $image..." 310 | fi 311 | 312 | # Inform users how to check pull progress 313 | echo "You can check the pull progress using: lume logs -f" 314 | 315 | # Pull image via API and capture response 316 | local response 317 | if [[ "${LUMIER_DEBUG:-0}" == "1" ]]; then 318 | # Show full response in debug mode - no timeout limits 319 | response=$(curl \ 320 | -X POST \ 321 | -H "Content-Type: application/json" \ 322 | -d "{ 323 | \"image\": \"$image\", 324 | \"name\": \"$vm_name\", 325 | \"registry\": \"$registry\", 326 | \"organization\": \"$organization\", 327 | \"storage\": \"$storage\" 328 | }" \ 329 | "http://${api_host}:${api_port}/lume/pull") 330 | echo "$response" 331 | else 332 | # Run silently in normal mode - no timeout limits 333 | response=$(curl \ 334 | -s \ 335 | -X POST \ 336 | -H "Content-Type: application/json" \ 337 | -d "{ 338 | \"image\": \"$image\", 339 | \"name\": \"$vm_name\", 340 | \"registry\": \"$registry\", 341 | \"organization\": \"$organization\", 342 | \"storage\": \"$storage\" 343 | }" \ 344 | "http://${api_host}:${api_port}/lume/pull") 345 | fi 346 | 347 | # Unset pull in progress flag 348 | export PULL_IN_PROGRESS=0 349 | } 350 | 351 | 352 | # Run VM with VNC client started and shared directory using curl 353 | lume_run() { 354 | # Parse args 355 | local shared_dir="" 356 | local storage="" 357 | local vm_name="lume_vm" 358 | local no_display=true 359 | while [[ $# -gt 0 ]]; do 360 | case $1 in 361 | --shared-dir=*) 362 | shared_dir="${1#*=}" 363 | shift 364 | ;; 365 | --storage) 366 | storage="$2" 367 | shift 2 368 | ;; 369 | --no-display) 370 | no_display=true 371 | shift 372 | ;; 373 | *) 374 | # Assume last arg is VM name if not an option 375 | vm_name="$1" 376 | shift 377 | ;; 378 | esac 379 | done 380 | 381 | local api_host="${LUME_API_HOST:-host.docker.internal}" 382 | local api_port="${LUME_API_PORT:-7777}" 383 | 384 | # Only log in debug mode 385 | if [[ "$LUMIER_DEBUG" == "1" ]]; then 386 | echo "Running VM $vm_name..." 387 | fi 388 | 389 | # Build the JSON body dynamically based on what's provided 390 | local json_body="{\"noDisplay\": true" 391 | 392 | # Only include shared directories if shared_dir is provided 393 | if [[ -n "$shared_dir" ]]; then 394 | json_body+=", \"sharedDirectories\": [{\"hostPath\": \"$shared_dir\", \"readOnly\": false}]" 395 | fi 396 | 397 | # Only include storage if it's provided 398 | if [[ -n "$storage" ]]; then 399 | json_body+=", \"storage\": \"$storage\"" 400 | fi 401 | 402 | # Add recovery mode (always false) 403 | json_body+=", \"recoveryMode\": false}" 404 | 405 | # Execute the command and store the response 406 | local response 407 | if [[ "${LUMIER_DEBUG:-0}" == "1" ]]; then 408 | # Show response in debug mode 409 | response=$(curl --connect-timeout 6000 \ 410 | --max-time 5000 \ 411 | -X POST \ 412 | -H 'Content-Type: application/json' \ 413 | -d "$json_body" \ 414 | http://${api_host}:${api_port}/lume/vms/$vm_name/run) 415 | echo "$response" 416 | else 417 | # Run silently in normal mode 418 | response=$(curl --connect-timeout 6000 \ 419 | --max-time 5000 \ 420 | -s \ 421 | -X POST \ 422 | -H 'Content-Type: application/json' \ 423 | -d "$json_body" \ 424 | http://${api_host}:${api_port}/lume/vms/$vm_name/run) 425 | fi 426 | } 427 | 428 | # Delete a VM using curl 429 | lume_delete() { 430 | local vm_name="$1" 431 | local storage="$2" 432 | 433 | local api_host="${LUME_API_HOST:-host.docker.internal}" 434 | local api_port="${LUME_API_PORT:-7777}" 435 | 436 | # URL encode the storage path for the query parameter 437 | # Replace special characters with their URL encoded equivalents 438 | local encoded_storage=$(echo "$storage" | sed 's/\//%2F/g' | sed 's/ /%20/g' | sed 's/:/%3A/g') 439 | 440 | # Construct API URL with encoded storage parameter 441 | local api_url="http://${api_host}:${api_port}/lume/vms/${vm_name}?storage=${encoded_storage}" 442 | 443 | # Only log in debug mode 444 | if [[ "$LUMIER_DEBUG" == "1" ]]; then 445 | echo "Deleting VM $vm_name from storage $storage..." 446 | fi 447 | 448 | # Execute command and capture response 449 | local response 450 | if [[ "${LUMIER_DEBUG:-0}" == "1" ]]; then 451 | # Show output in debug mode 452 | response=$(curl --connect-timeout 6000 \ 453 | --max-time 5000 \ 454 | -X DELETE \ 455 | "$api_url") 456 | echo "$response" 457 | else 458 | # Run silently in normal mode 459 | response=$(curl --connect-timeout 6000 \ 460 | --max-time 5000 \ 461 | -s \ 462 | -X DELETE \ 463 | "$api_url") 464 | fi 465 | } ``` -------------------------------------------------------------------------------- /libs/python/agent/benchmarks/utils.py: -------------------------------------------------------------------------------- ```python 1 | #!/usr/bin/env python3 2 | """ 3 | Shared utilities for ScreenSpot-Pro benchmarking and interactive testing. 4 | """ 5 | 6 | import dotenv 7 | dotenv.load_dotenv() 8 | 9 | import asyncio 10 | import base64 11 | import os 12 | import sys 13 | import subprocess as sp 14 | import statistics 15 | from datetime import datetime 16 | from io import BytesIO 17 | from typing import List, Union, Tuple, Optional 18 | 19 | from PIL import Image, ImageDraw 20 | from tqdm import tqdm 21 | import gc 22 | import torch 23 | 24 | # Add parent directory to path for imports 25 | sys.path.append(os.path.join(os.path.dirname(__file__), '..')) 26 | from agent.agent import ComputerAgent 27 | from models.base import ModelProtocol 28 | 29 | def get_gpu_memory() -> List[int]: 30 | """ 31 | Get GPU memory usage using nvidia-smi. 32 | 33 | Returns: 34 | List of free memory values in MB for each GPU 35 | """ 36 | try: 37 | command = "nvidia-smi --query-gpu=memory.free --format=csv" 38 | memory_free_info = sp.check_output(command.split()).decode('ascii').split('\n')[:-1][1:] 39 | memory_free_values = [int(x.split()[0]) for i, x in enumerate(memory_free_info)] 40 | return memory_free_values 41 | except (sp.CalledProcessError, FileNotFoundError, IndexError): 42 | # Fallback to torch if nvidia-smi is not available 43 | if torch.cuda.is_available(): 44 | device = torch.cuda.current_device() 45 | total = torch.cuda.get_device_properties(device).total_memory / 1024 / 1024 46 | reserved = torch.cuda.memory_reserved(device) / 1024 / 1024 47 | return [int(total - reserved)] 48 | return [0] 49 | 50 | 51 | def get_vram_usage() -> dict: 52 | """ 53 | Get current VRAM usage statistics. 54 | 55 | Returns: 56 | Dictionary with VRAM usage info (in MB) 57 | """ 58 | if torch.cuda.is_available(): 59 | device = torch.cuda.current_device() 60 | allocated = torch.cuda.memory_allocated(device) / 1024 / 1024 # Convert to MB 61 | reserved = torch.cuda.memory_reserved(device) / 1024 / 1024 # Convert to MB 62 | total = torch.cuda.get_device_properties(device).total_memory / 1024 / 1024 63 | return { 64 | 'allocated_mb': allocated, 65 | 'reserved_mb': reserved, 66 | 'total_mb': total, 67 | 'free_mb': total - reserved 68 | } 69 | else: 70 | return { 71 | 'allocated_mb': 0.0, 72 | 'reserved_mb': 0.0, 73 | 'total_mb': 0.0, 74 | 'free_mb': 0.0 75 | } 76 | 77 | 78 | def get_available_models() -> List[Union[str, ModelProtocol]]: 79 | """ 80 | Get list of available models for testing. 81 | 82 | Returns: 83 | List of model strings and model classes 84 | """ 85 | local_provider = "huggingface-local/" # Options: huggingface-local/ or mlx/ 86 | 87 | # from models.gta1 import GTA1Model 88 | 89 | models = [ 90 | # === ComputerAgent model strings === 91 | "openai/computer-use-preview", 92 | "anthropic/claude-opus-4-20250514", 93 | # f"{local_provider}HelloKKMe/GTA1-7B", 94 | # f"{local_provider}HelloKKMe/GTA1-32B", 95 | "openai/computer-use-preview+openai/gpt-4o-mini", 96 | "anthropic/claude-opus-4-20250514+openai/gpt-4o-mini", 97 | 98 | # === Reference model classes === 99 | # GTA1Model("HelloKKMe/GTA1-7B"), 100 | # GTA1Model("HelloKKMe/GTA1-32B"), 101 | ] 102 | 103 | return models 104 | 105 | 106 | def is_click_in_bbox(click_coords: Optional[Tuple[int, int]], bbox: List[int]) -> bool: 107 | """ 108 | Check if click coordinates are within the bounding box. 109 | 110 | Args: 111 | click_coords: (x, y) coordinates or None 112 | bbox: [x1, y1, x2, y2] bounding box 113 | 114 | Returns: 115 | True if click is within bbox, False otherwise 116 | """ 117 | if click_coords is None: 118 | return False 119 | 120 | x, y = click_coords 121 | x1, y1, x2, y2 = bbox 122 | 123 | return x1 <= x <= x2 and y1 <= y <= y2 124 | 125 | 126 | def image_to_base64(image: Image.Image) -> str: 127 | """ 128 | Convert PIL Image to base64 string. 129 | 130 | Args: 131 | image: PIL Image 132 | 133 | Returns: 134 | Base64 encoded image string 135 | """ 136 | buffered = BytesIO() 137 | image.save(buffered, format="PNG") 138 | return base64.b64encode(buffered.getvalue()).decode() 139 | 140 | 141 | class ModelWrapper: 142 | """ 143 | Wrapper to provide unified interface for both ComputerAgent and custom models. 144 | """ 145 | 146 | def __init__(self, model: Union[str, ModelProtocol]): 147 | self.model = model 148 | self.is_computer_agent = isinstance(model, str) 149 | self.agent: Optional[ComputerAgent] = None 150 | self.vram_usage_history: List[float] = [] # Track VRAM usage over time 151 | 152 | if self.is_computer_agent: 153 | self.model_name = str(model) 154 | else: 155 | self.model_name = f"{model.__class__.__name__}('{getattr(model, 'model_name', 'unknown')}')" 156 | 157 | async def load_model(self) -> None: 158 | """Load the model.""" 159 | if self.is_computer_agent: 160 | self.agent = ComputerAgent(model=str(self.model)) 161 | else: 162 | await self.model.load_model() # type: ignore 163 | 164 | # Record initial VRAM usage after loading 165 | vram_info = get_vram_usage() 166 | self.vram_usage_history.append(vram_info['allocated_mb']) 167 | 168 | async def unload_model(self) -> None: 169 | """Unload the model.""" 170 | if not self.is_computer_agent: 171 | await self.model.unload_model() # type: ignore 172 | else: 173 | del self.agent 174 | self.agent = None 175 | gc.collect() 176 | if torch.cuda.is_available(): 177 | torch.cuda.empty_cache() 178 | 179 | # Record VRAM usage after unloading 180 | vram_info = get_vram_usage() 181 | self.vram_usage_history.append(vram_info['allocated_mb']) 182 | 183 | def get_vram_stats(self) -> dict: 184 | """Get VRAM usage statistics for this model.""" 185 | if not self.vram_usage_history: 186 | return {'max_mb': 0.0, 'avg_mb': 0.0} 187 | 188 | return { 189 | 'max_mb': max(self.vram_usage_history), 190 | 'avg_mb': sum(self.vram_usage_history) / len(self.vram_usage_history) 191 | } 192 | 193 | 194 | async def predict_click(self, image: Image.Image, instruction: str) -> Optional[Tuple[int, int]]: 195 | """Predict click coordinates.""" 196 | # Record VRAM usage before prediction 197 | vram_info = get_vram_usage() 198 | self.vram_usage_history.append(vram_info['allocated_mb']) 199 | 200 | if self.is_computer_agent: 201 | if self.agent is None: 202 | await self.load_model() 203 | 204 | if self.agent is not None: 205 | image_b64 = image_to_base64(image) 206 | result = await self.agent.predict_click(instruction=instruction, image_b64=image_b64) 207 | 208 | # Record VRAM usage after prediction 209 | vram_info = get_vram_usage() 210 | self.vram_usage_history.append(vram_info['allocated_mb']) 211 | 212 | return result 213 | return None 214 | else: 215 | result = await self.model.predict_click(image, instruction) # type: ignore 216 | 217 | # Record VRAM usage after prediction 218 | vram_info = get_vram_usage() 219 | self.vram_usage_history.append(vram_info['allocated_mb']) 220 | 221 | return result 222 | 223 | 224 | def save_results_to_markdown(all_results: List[dict],output_file: str = "screenspot_pro_results.md", title: str = "ScreenSpot-Pro Benchmark Results") -> None: 225 | """ 226 | Save evaluation results to a markdown table. 227 | 228 | Args: 229 | all_results: List of evaluation results for each model 230 | output_file: Output markdown file path 231 | """ 232 | with open(output_file, 'w', encoding='utf-8') as f: 233 | f.write(f"# {title}\n\n") 234 | f.write(f"**Evaluation Date:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n") 235 | 236 | # Summary table 237 | f.write("## Summary\n\n") 238 | f.write("| Model | Total Samples | Correct | Errors | Accuracy | Error Rate | Avg Time (s) | Median Time (s) | Time Range (s) | VRAM Max (GB) | VRAM Avg (GB) |\n") 239 | f.write("|-------|---------------|---------|--------|----------|------------|--------------|-----------------|----------------|---------------|---------------|\n") 240 | 241 | for result in all_results: 242 | model_name = result['model_name'] 243 | total = result['total_samples'] 244 | correct = result['correct_predictions'] 245 | errors = result['failed_predictions'] 246 | accuracy = result['accuracy'] * 100 247 | error_rate = result['failure_rate'] * 100 248 | avg_time = result.get('avg_prediction_time', 0.0) 249 | median_time = result.get('median_prediction_time', 0.0) 250 | min_time = result.get('min_prediction_time', 0.0) 251 | max_time = result.get('max_prediction_time', 0.0) 252 | time_range = f"{min_time:.2f} - {max_time:.2f}" 253 | vram_max = result.get('vram_max_mb', 0.0) / 1024 254 | vram_avg = result.get('vram_avg_mb', 0.0) / 1024 255 | 256 | f.write(f"| {model_name} | {total} | {correct} | {errors} | {accuracy:.2f}% | {error_rate:.2f}% | {avg_time:.2f} | {median_time:.2f} | {time_range} | {vram_max:.1f} | {vram_avg:.1f} |\n") 257 | 258 | # Detailed results for each model 259 | for result in all_results: 260 | f.write(f"\n## {result['model_name']} - Detailed Results\n\n") 261 | f.write("| Sample Index | Instruction | BBox | Predicted | Correct | Error | Time (s) |\n") 262 | f.write("|-----------|-------------|------|-----------|---------|-------|----------|\n") 263 | 264 | for sample_result in result['results'][:10]: # Show first 10 samples 265 | sample_idx = sample_result['sample_idx'] 266 | instruction = sample_result['instruction'][:50] + "..." if len(sample_result['instruction']) > 50 else sample_result['instruction'] 267 | bbox = str(sample_result['bbox']) 268 | predicted = str(sample_result['predicted_coords']) if sample_result['predicted_coords'] else "None" 269 | correct = "PASS" if sample_result['is_correct'] else "FAIL" 270 | error = "YES" if sample_result['failed'] else "NO" 271 | pred_time = sample_result.get('prediction_time', 0.0) 272 | 273 | f.write(f"| {sample_idx} | {instruction} | {bbox} | {predicted} | {correct} | {error} | {pred_time:.2f} |\n") 274 | 275 | if len(result['results']) > 10: 276 | f.write(f"\n*Showing first 10 of {len(result['results'])} samples*\n") 277 | 278 | print(f"\nResults saved to: {output_file}") 279 | 280 | 281 | def save_visualizations(all_results: List[dict], samples, output_dir: str = "output") -> None: 282 | """ 283 | Save visualizations of predicted coordinates vs bboxes to an output folder. 284 | 285 | Args: 286 | all_results: List of evaluation results for each model 287 | samples: List of sample dicts with image, bbox, instruction keys 288 | output_dir: Output directory path 289 | """ 290 | os.makedirs(output_dir, exist_ok=True) 291 | 292 | for result in all_results: 293 | model_name = result['model_name'].replace('/', '_').replace('\\', '_') 294 | model_dir = os.path.join(output_dir, model_name) 295 | os.makedirs(model_dir, exist_ok=True) 296 | 297 | print(f"Saving visualizations for {result['model_name']}...") 298 | 299 | # Save first 10 samples for visualization 300 | for i, sample_result in enumerate(tqdm(result['results'][:10], desc=f"Saving {model_name} visualizations")): 301 | # Get sample data using index 302 | sample_idx = sample_result['sample_idx'] 303 | 304 | if sample_idx < len(samples): 305 | sample = samples[sample_idx] 306 | image = sample['image'].copy() # Make a copy to avoid modifying original 307 | else: 308 | print(f"Warning: Could not find sample at index {sample_idx}") 309 | continue 310 | 311 | bbox = sample_result['bbox'] 312 | predicted_coords = sample_result['predicted_coords'] 313 | is_correct = sample_result['is_correct'] 314 | 315 | # Draw on image 316 | draw = ImageDraw.Draw(image) 317 | 318 | # Draw bounding box (ground truth) in green 319 | x1, y1, x2, y2 = bbox 320 | draw.rectangle([x1, y1, x2, y2], outline="green", width=3) 321 | draw.text((x1, y1-20), "Ground Truth", fill="green") 322 | 323 | # Draw predicted click in red or blue 324 | if predicted_coords is not None: 325 | px, py = predicted_coords 326 | color = "blue" if is_correct else "red" 327 | # Draw crosshair 328 | crosshair_size = 15 329 | draw.line([(px-crosshair_size, py), (px+crosshair_size, py)], fill=color, width=3) 330 | draw.line([(px, py-crosshair_size), (px, py+crosshair_size)], fill=color, width=3) 331 | draw.text((px+10, py-20), f"Predicted ({px},{py})", fill=color) 332 | 333 | # Add status text 334 | status = "CORRECT" if is_correct else "INCORRECT" 335 | status_color = "blue" if is_correct else "red" 336 | draw.text((10, 10), f"Status: {status}", fill=status_color) 337 | draw.text((10, 30), f"Instruction: {sample_result['instruction'][:50]}...", fill="black") 338 | 339 | # Save image 340 | filename = f"sample_{i+1:02d}_idx{sample_idx}_{status.lower()}.png" 341 | filepath = os.path.join(model_dir, filename) 342 | image.save(filepath) 343 | 344 | print(f"Visualizations saved to: {model_dir}") 345 | 346 | 347 | def save_prediction_visualization(image: Image.Image, instruction: str, predictions: List[dict], 348 | output_file: str = "interactive_prediction.png") -> None: 349 | """ 350 | Save visualization of multiple model predictions on a single image. 351 | 352 | Args: 353 | image: PIL Image to visualize 354 | instruction: Instruction text 355 | predictions: List of prediction dicts with keys: model_name, coords, error 356 | output_file: Output file path 357 | """ 358 | # Create a copy of the image 359 | vis_image = image.copy() 360 | draw = ImageDraw.Draw(vis_image) 361 | 362 | # Colors for different models 363 | colors = ["red", "blue", "orange", "purple", "brown", "pink", "gray", "olive"] 364 | 365 | # Draw predictions 366 | for i, pred in enumerate(predictions): 367 | color = colors[i % len(colors)] 368 | model_name = pred['model_name'] 369 | coords = pred.get('coords') 370 | error = pred.get('error') 371 | 372 | if coords is not None: 373 | px, py = coords 374 | # Draw crosshair 375 | crosshair_size = 20 376 | draw.line([(px-crosshair_size, py), (px+crosshair_size, py)], fill=color, width=4) 377 | draw.line([(px, py-crosshair_size), (px, py+crosshair_size)], fill=color, width=4) 378 | # Draw model name 379 | draw.text((px+15, py+15), f"{model_name}: ({px},{py})", fill=color) 380 | else: 381 | # Draw error text 382 | draw.text((10, 50 + i*20), f"{model_name}: ERROR - {error}", fill=color) 383 | 384 | # Add instruction at the top 385 | draw.text((10, 10), f"Instruction: {instruction}", fill="black") 386 | 387 | # Save image 388 | vis_image.save(output_file) 389 | print(f"Prediction visualization saved to: {output_file}") 390 | 391 | 392 | def take_screenshot() -> Image.Image: 393 | """ 394 | Take a screenshot of the current screen. 395 | 396 | Returns: 397 | PIL Image of the screenshot 398 | """ 399 | try: 400 | import pyautogui 401 | screenshot = pyautogui.screenshot() 402 | return screenshot 403 | except ImportError: 404 | print("pyautogui not installed. Please install it with: pip install pyautogui") 405 | raise 406 | except Exception as e: 407 | print(f"Error taking screenshot: {e}") 408 | raise 409 | 410 | ``` -------------------------------------------------------------------------------- /libs/python/agent/agent/callbacks/trajectory_saver.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Trajectory saving callback handler for ComputerAgent. 3 | """ 4 | 5 | import os 6 | import json 7 | import uuid 8 | from datetime import datetime 9 | import base64 10 | from pathlib import Path 11 | from typing import List, Dict, Any, Optional, Union, override 12 | from PIL import Image, ImageDraw 13 | import io 14 | from copy import deepcopy 15 | 16 | from .base import AsyncCallbackHandler 17 | 18 | def sanitize_image_urls(data: Any) -> Any: 19 | """ 20 | Recursively search for 'image_url' keys and set their values to '[omitted]'. 21 | 22 | Args: 23 | data: Any data structure (dict, list, or primitive type) 24 | 25 | Returns: 26 | A deep copy of the data with all 'image_url' values replaced with '[omitted]' 27 | """ 28 | if isinstance(data, dict): 29 | # Create a copy of the dictionary 30 | sanitized = {} 31 | for key, value in data.items(): 32 | if key == "image_url": 33 | sanitized[key] = "[omitted]" 34 | else: 35 | # Recursively sanitize the value 36 | sanitized[key] = sanitize_image_urls(value) 37 | return sanitized 38 | 39 | elif isinstance(data, list): 40 | # Recursively sanitize each item in the list 41 | return [sanitize_image_urls(item) for item in data] 42 | 43 | else: 44 | # For primitive types (str, int, bool, None, etc.), return as-is 45 | return data 46 | 47 | 48 | def extract_computer_call_outputs(items: List[Dict[str, Any]], screenshot_dir: Optional[Path]) -> List[Dict[str, Any]]: 49 | """ 50 | Save any base64-encoded screenshots from computer_call_output entries to files and 51 | replace their image_url with the saved file path when a call_id is present. 52 | 53 | Only operates if screenshot_dir is provided and exists; otherwise returns items unchanged. 54 | 55 | Args: 56 | items: List of message/result dicts potentially containing computer_call_output entries 57 | screenshot_dir: Directory to write screenshots into 58 | 59 | Returns: 60 | A new list with updated image_url fields when applicable. 61 | """ 62 | if not items: 63 | return items 64 | if not screenshot_dir or not screenshot_dir.exists(): 65 | return items 66 | 67 | updated: List[Dict[str, Any]] = [] 68 | for item in items: 69 | # work on a shallow copy; deep copy nested 'output' if we modify it 70 | msg = dict(item) 71 | try: 72 | if msg.get("type") == "computer_call_output": 73 | call_id = msg.get("call_id") 74 | output = msg.get("output", {}) 75 | image_url = output.get("image_url") 76 | if call_id and isinstance(image_url, str) and image_url.startswith("data:"): 77 | # derive extension from MIME type e.g. data:image/png;base64, 78 | try: 79 | ext = image_url.split(";", 1)[0].split("/")[-1] 80 | if not ext: 81 | ext = "png" 82 | except Exception: 83 | ext = "png" 84 | out_path = screenshot_dir / f"{call_id}.{ext}" 85 | # write file if it doesn't exist 86 | if not out_path.exists(): 87 | try: 88 | b64_payload = image_url.split(",", 1)[1] 89 | img_bytes = base64.b64decode(b64_payload) 90 | out_path.parent.mkdir(parents=True, exist_ok=True) 91 | with open(out_path, "wb") as f: 92 | f.write(img_bytes) 93 | except Exception: 94 | # if anything fails, skip modifying this message 95 | pass 96 | # update image_url to file path 97 | new_output = dict(output) 98 | new_output["image_url"] = str(out_path) 99 | msg["output"] = new_output 100 | except Exception: 101 | # do not block on malformed entries; keep original 102 | pass 103 | updated.append(msg) 104 | return updated 105 | 106 | class TrajectorySaverCallback(AsyncCallbackHandler): 107 | """ 108 | Callback handler that saves agent trajectories to disk. 109 | 110 | Saves each run as a separate trajectory with unique ID, and each turn 111 | within the trajectory gets its own folder with screenshots and responses. 112 | """ 113 | 114 | def __init__(self, trajectory_dir: str, reset_on_run: bool = True, screenshot_dir: Optional[str] = None): 115 | """ 116 | Initialize trajectory saver. 117 | 118 | Args: 119 | trajectory_dir: Base directory to save trajectories 120 | reset_on_run: If True, reset trajectory_id/turn/artifact on each run. 121 | If False, continue using existing trajectory_id if set. 122 | """ 123 | self.trajectory_dir = Path(trajectory_dir) 124 | self.trajectory_id: Optional[str] = None 125 | self.current_turn: int = 0 126 | self.current_artifact: int = 0 127 | self.model: Optional[str] = None 128 | self.total_usage: Dict[str, Any] = {} 129 | self.reset_on_run = reset_on_run 130 | # Optional directory to store extracted screenshots from metadata/new_items 131 | self.screenshot_dir: Optional[Path] = Path(screenshot_dir) if screenshot_dir else None 132 | 133 | # Ensure trajectory directory exists 134 | self.trajectory_dir.mkdir(parents=True, exist_ok=True) 135 | 136 | def _get_turn_dir(self) -> Path: 137 | """Get the directory for the current turn.""" 138 | if not self.trajectory_id: 139 | raise ValueError("Trajectory not initialized - call _on_run_start first") 140 | 141 | # format: trajectory_id/turn_000 142 | turn_dir = self.trajectory_dir / self.trajectory_id / f"turn_{self.current_turn:03d}" 143 | turn_dir.mkdir(parents=True, exist_ok=True) 144 | return turn_dir 145 | 146 | def _save_artifact(self, name: str, artifact: Union[str, bytes, Dict[str, Any]]) -> None: 147 | """Save an artifact to the current turn directory.""" 148 | turn_dir = self._get_turn_dir() 149 | if isinstance(artifact, bytes): 150 | # format: turn_000/0000_name.png 151 | artifact_filename = f"{self.current_artifact:04d}_{name}" 152 | artifact_path = turn_dir / f"{artifact_filename}.png" 153 | with open(artifact_path, "wb") as f: 154 | f.write(artifact) 155 | else: 156 | # format: turn_000/0000_name.json 157 | artifact_filename = f"{self.current_artifact:04d}_{name}" 158 | artifact_path = turn_dir / f"{artifact_filename}.json" 159 | # add created_at 160 | if isinstance(artifact, dict): 161 | artifact = artifact.copy() 162 | artifact["created_at"] = str(uuid.uuid1().time) 163 | with open(artifact_path, "w") as f: 164 | json.dump(sanitize_image_urls(artifact), f, indent=2) 165 | self.current_artifact += 1 166 | 167 | def _update_usage(self, usage: Dict[str, Any]) -> None: 168 | """Update total usage statistics.""" 169 | def add_dicts(target: Dict[str, Any], source: Dict[str, Any]) -> None: 170 | for key, value in source.items(): 171 | if isinstance(value, dict): 172 | if key not in target: 173 | target[key] = {} 174 | add_dicts(target[key], value) 175 | else: 176 | if key not in target: 177 | target[key] = 0 178 | target[key] += value 179 | add_dicts(self.total_usage, usage) 180 | 181 | @override 182 | async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None: 183 | """Initialize trajectory tracking for a new run.""" 184 | model = kwargs.get("model", "unknown") 185 | 186 | # Only reset trajectory state if reset_on_run is True or no trajectory exists 187 | if self.reset_on_run or not self.trajectory_id: 188 | model_name_short = model.split("+")[-1].split("/")[-1].lower()[:16] 189 | if "+" in model: 190 | model_name_short = model.split("+")[0].lower()[:4] + "_" + model_name_short 191 | # strip non-alphanumeric characters from model_name_short 192 | model_name_short = ''.join(c for c in model_name_short if c.isalnum() or c == '_') 193 | 194 | # id format: yyyy-mm-dd_model_hhmmss_uuid[:4] 195 | now = datetime.now() 196 | self.trajectory_id = f"{now.strftime('%Y-%m-%d')}_{model_name_short}_{now.strftime('%H%M%S')}_{str(uuid.uuid4())[:4]}" 197 | self.current_turn = 0 198 | self.current_artifact = 0 199 | self.model = model 200 | self.total_usage = {} 201 | 202 | # Create trajectory directory 203 | trajectory_path = self.trajectory_dir / self.trajectory_id 204 | trajectory_path.mkdir(parents=True, exist_ok=True) 205 | 206 | # Save trajectory metadata (optionally extract screenshots to screenshot_dir) 207 | kwargs_to_save = kwargs.copy() 208 | try: 209 | if "messages" in kwargs_to_save: 210 | kwargs_to_save["messages"] = extract_computer_call_outputs( 211 | kwargs_to_save["messages"], self.screenshot_dir 212 | ) 213 | except Exception: 214 | # If extraction fails, fall back to original messages 215 | pass 216 | metadata = { 217 | "trajectory_id": self.trajectory_id, 218 | "created_at": str(uuid.uuid1().time), 219 | "status": "running", 220 | "kwargs": kwargs_to_save, 221 | } 222 | 223 | with open(trajectory_path / "metadata.json", "w") as f: 224 | json.dump(metadata, f, indent=2) 225 | else: 226 | # Continue with existing trajectory - just update model if needed 227 | self.model = model 228 | 229 | @override 230 | async def on_run_end(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> None: 231 | """Finalize run tracking by updating metadata with completion status, usage, and new items.""" 232 | if not self.trajectory_id: 233 | return 234 | 235 | # Update metadata with completion status, total usage, and new items 236 | trajectory_path = self.trajectory_dir / self.trajectory_id 237 | metadata_path = trajectory_path / "metadata.json" 238 | 239 | # Read existing metadata 240 | if metadata_path.exists(): 241 | with open(metadata_path, "r") as f: 242 | metadata = json.load(f) 243 | else: 244 | metadata = {} 245 | 246 | # Update metadata with completion info 247 | # Optionally extract screenshots from new_items before persisting 248 | new_items_to_save = new_items 249 | try: 250 | new_items_to_save = extract_computer_call_outputs(new_items, self.screenshot_dir) 251 | except Exception: 252 | pass 253 | 254 | metadata.update({ 255 | "status": "completed", 256 | "completed_at": str(uuid.uuid1().time), 257 | "total_usage": self.total_usage, 258 | "new_items": new_items_to_save, 259 | "total_turns": self.current_turn 260 | }) 261 | 262 | # Save updated metadata 263 | with open(metadata_path, "w") as f: 264 | json.dump(metadata, f, indent=2) 265 | 266 | @override 267 | async def on_api_start(self, kwargs: Dict[str, Any]) -> None: 268 | if not self.trajectory_id: 269 | return 270 | 271 | self._save_artifact("api_start", { "kwargs": kwargs }) 272 | 273 | @override 274 | async def on_api_end(self, kwargs: Dict[str, Any], result: Any) -> None: 275 | """Save API call result.""" 276 | if not self.trajectory_id: 277 | return 278 | 279 | self._save_artifact("api_result", { "kwargs": kwargs, "result": result }) 280 | 281 | @override 282 | async def on_screenshot(self, screenshot: Union[str, bytes], name: str = "screenshot") -> None: 283 | """Save a screenshot.""" 284 | if isinstance(screenshot, str): 285 | screenshot = base64.b64decode(screenshot) 286 | self._save_artifact(name, screenshot) 287 | 288 | @override 289 | async def on_usage(self, usage: Dict[str, Any]) -> None: 290 | """Called when usage information is received.""" 291 | self._update_usage(usage) 292 | 293 | @override 294 | async def on_responses(self, kwargs: Dict[str, Any], responses: Dict[str, Any]) -> None: 295 | """Save responses to the current turn directory and update usage statistics.""" 296 | if not self.trajectory_id: 297 | return 298 | 299 | # Save responses 300 | turn_dir = self._get_turn_dir() 301 | response_data = { 302 | "timestamp": str(uuid.uuid1().time), 303 | "model": self.model, 304 | "kwargs": kwargs, 305 | "response": responses 306 | } 307 | 308 | self._save_artifact("agent_response", response_data) 309 | 310 | # Increment turn counter 311 | self.current_turn += 1 312 | 313 | def _draw_crosshair_on_image(self, image_bytes: bytes, x: int, y: int) -> bytes: 314 | """ 315 | Draw a red dot and crosshair at the specified coordinates on the image. 316 | 317 | Args: 318 | image_bytes: The original image as bytes 319 | x: X coordinate for the crosshair 320 | y: Y coordinate for the crosshair 321 | 322 | Returns: 323 | Modified image as bytes with red dot and crosshair 324 | """ 325 | # Open the image 326 | image = Image.open(io.BytesIO(image_bytes)) 327 | draw = ImageDraw.Draw(image) 328 | 329 | # Draw crosshair lines (red, 2px thick) 330 | crosshair_size = 20 331 | line_width = 2 332 | color = "red" 333 | 334 | # Horizontal line 335 | draw.line([(x - crosshair_size, y), (x + crosshair_size, y)], fill=color, width=line_width) 336 | # Vertical line 337 | draw.line([(x, y - crosshair_size), (x, y + crosshair_size)], fill=color, width=line_width) 338 | 339 | # Draw center dot (filled circle) 340 | dot_radius = 3 341 | draw.ellipse([(x - dot_radius, y - dot_radius), (x + dot_radius, y + dot_radius)], fill=color) 342 | 343 | # Convert back to bytes 344 | output = io.BytesIO() 345 | image.save(output, format='PNG') 346 | return output.getvalue() 347 | 348 | @override 349 | async def on_computer_call_end(self, item: Dict[str, Any], result: List[Dict[str, Any]]) -> None: 350 | """ 351 | Called when a computer call has completed. 352 | Saves screenshots and computer call output. 353 | """ 354 | if not self.trajectory_id: 355 | return 356 | 357 | self._save_artifact("computer_call_result", { "item": item, "result": result }) 358 | 359 | # Check if action has x/y coordinates and there's a screenshot in the result 360 | action = item.get("action", {}) 361 | if "x" in action and "y" in action: 362 | # Look for screenshot in the result 363 | for result_item in result: 364 | if (result_item.get("type") == "computer_call_output" and 365 | result_item.get("output", {}).get("type") == "input_image"): 366 | 367 | image_url = result_item["output"]["image_url"] 368 | 369 | # Extract base64 image data 370 | if image_url.startswith("data:image/"): 371 | # Format: data:image/png;base64,<base64_data> 372 | base64_data = image_url.split(",", 1)[1] 373 | else: 374 | # Assume it's just base64 data 375 | base64_data = image_url 376 | 377 | try: 378 | # Decode the image 379 | image_bytes = base64.b64decode(base64_data) 380 | 381 | # Draw crosshair at the action coordinates 382 | annotated_image = self._draw_crosshair_on_image( 383 | image_bytes, 384 | int(action["x"]), 385 | int(action["y"]) 386 | ) 387 | 388 | # Save as screenshot_action 389 | self._save_artifact("screenshot_action", annotated_image) 390 | 391 | except Exception as e: 392 | # If annotation fails, just log and continue 393 | print(f"Failed to annotate screenshot: {e}") 394 | 395 | break # Only process the first screenshot found 396 | 397 | # Increment turn counter 398 | self.current_turn += 1 ``` -------------------------------------------------------------------------------- /tests/test_files.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | File System Interface Tests 3 | Tests for the file system methods of the Computer interface (macOS). 4 | Required environment variables: 5 | - CUA_API_KEY: API key for Cua cloud provider 6 | - CUA_CONTAINER_NAME: Name of the container to use 7 | """ 8 | 9 | import os 10 | import asyncio 11 | import pytest 12 | from pathlib import Path 13 | import sys 14 | import traceback 15 | 16 | # Load environment variables from .env file 17 | project_root = Path(__file__).parent.parent 18 | env_file = project_root / ".env" 19 | print(f"Loading environment from: {env_file}") 20 | from dotenv import load_dotenv 21 | 22 | load_dotenv(env_file) 23 | 24 | # Add paths to sys.path if needed 25 | pythonpath = os.environ.get("PYTHONPATH", "") 26 | for path in pythonpath.split(":"): 27 | if path and path not in sys.path: 28 | sys.path.insert(0, path) # Insert at beginning to prioritize 29 | print(f"Added to sys.path: {path}") 30 | 31 | from computer import Computer, VMProviderType 32 | 33 | @pytest.fixture(scope="session") 34 | async def computer(): 35 | """Shared Computer instance for all test cases.""" 36 | # Create a remote Linux computer with Cua 37 | computer = Computer( 38 | os_type="linux", 39 | api_key=os.getenv("CUA_API_KEY"), 40 | name=str(os.getenv("CUA_CONTAINER_NAME")), 41 | provider_type=VMProviderType.CLOUD, 42 | ) 43 | 44 | # Create a local macOS computer with Cua 45 | # computer = Computer() 46 | 47 | # Connect to host computer 48 | # computer = Computer(use_host_computer_server=True) 49 | 50 | try: 51 | await computer.run() 52 | yield computer 53 | finally: 54 | await computer.disconnect() 55 | 56 | @pytest.mark.asyncio(loop_scope="session") 57 | async def test_file_exists(computer): 58 | tmp_path = "test_file_exists.txt" 59 | # Ensure file does not exist 60 | if await computer.interface.file_exists(tmp_path): 61 | await computer.interface.delete_file(tmp_path) 62 | exists = await computer.interface.file_exists(tmp_path) 63 | assert exists is False, f"File {tmp_path} should not exist" 64 | # Create file and check again 65 | await computer.interface.write_text(tmp_path, "hello") 66 | exists = await computer.interface.file_exists(tmp_path) 67 | assert exists is True, f"File {tmp_path} should exist" 68 | await computer.interface.delete_file(tmp_path) 69 | 70 | 71 | @pytest.mark.asyncio(loop_scope="session") 72 | async def test_directory_exists(computer): 73 | tmp_dir = "test_directory_exists" 74 | if await computer.interface.directory_exists(tmp_dir): 75 | # Remove all files in directory before removing directory 76 | files = await computer.interface.list_dir(tmp_dir) 77 | for fname in files: 78 | await computer.interface.delete_file(f"{tmp_dir}/{fname}") 79 | # Remove the directory itself 80 | await computer.interface.delete_dir(tmp_dir) 81 | exists = await computer.interface.directory_exists(tmp_dir) 82 | assert exists is False, f"Directory {tmp_dir} should not exist" 83 | await computer.interface.create_dir(tmp_dir) 84 | exists = await computer.interface.directory_exists(tmp_dir) 85 | assert exists is True, f"Directory {tmp_dir} should exist" 86 | # Cleanup: remove files and directory 87 | files = await computer.interface.list_dir(tmp_dir) 88 | for fname in files: 89 | await computer.interface.delete_file(f"{tmp_dir}/{fname}") 90 | await computer.interface.delete_dir(tmp_dir) 91 | 92 | 93 | @pytest.mark.asyncio(loop_scope="session") 94 | async def test_list_dir(computer): 95 | tmp_dir = "test_list_dir" 96 | if not await computer.interface.directory_exists(tmp_dir): 97 | await computer.interface.create_dir(tmp_dir) 98 | files = ["foo.txt", "bar.txt"] 99 | for fname in files: 100 | await computer.interface.write_text(f"{tmp_dir}/{fname}", "hi") 101 | result = await computer.interface.list_dir(tmp_dir) 102 | assert set(result) >= set(files), f"Directory {tmp_dir} should contain files {files}" 103 | for fname in files: 104 | await computer.interface.delete_file(f"{tmp_dir}/{fname}") 105 | await computer.interface.delete_dir(tmp_dir) 106 | 107 | 108 | @pytest.mark.asyncio(loop_scope="session") 109 | async def test_read_write_text(computer): 110 | tmp_path = "test_rw_text.txt" 111 | content = "sample text" 112 | await computer.interface.write_text(tmp_path, content) 113 | read = await computer.interface.read_text(tmp_path) 114 | assert read == content, "File content should match" 115 | await computer.interface.delete_file(tmp_path) 116 | 117 | 118 | @pytest.mark.asyncio(loop_scope="session") 119 | async def test_delete_file(computer): 120 | tmp_path = "test_delete_file.txt" 121 | await computer.interface.write_text(tmp_path, "bye") 122 | exists = await computer.interface.file_exists(tmp_path) 123 | assert exists is True, "File should exist" 124 | await computer.interface.delete_file(tmp_path) 125 | exists = await computer.interface.file_exists(tmp_path) 126 | assert exists is False, "File should not exist" 127 | 128 | 129 | @pytest.mark.asyncio(loop_scope="session") 130 | async def test_create_dir(computer): 131 | tmp_dir = "test_create_dir" 132 | if await computer.interface.directory_exists(tmp_dir): 133 | await computer.interface.delete_dir(tmp_dir) 134 | await computer.interface.create_dir(tmp_dir) 135 | exists = await computer.interface.directory_exists(tmp_dir) 136 | assert exists is True, "Directory should exist" 137 | await computer.interface.delete_dir(tmp_dir) 138 | 139 | 140 | @pytest.mark.asyncio(loop_scope="session") 141 | async def test_read_bytes_basic(computer): 142 | """Test basic read_bytes functionality.""" 143 | tmp_path = "test_read_bytes.bin" 144 | test_data = b"Hello, World! This is binary data \x00\x01\x02\x03" 145 | 146 | # Write binary data using write_text (assuming it handles bytes) 147 | await computer.interface.write_text(tmp_path, test_data.decode('latin-1')) 148 | 149 | # Read all bytes 150 | read_data = await computer.interface.read_bytes(tmp_path) 151 | assert read_data == test_data, "Binary data should match" 152 | 153 | await computer.interface.delete_file(tmp_path) 154 | 155 | 156 | @pytest.mark.asyncio(loop_scope="session") 157 | async def test_read_bytes_with_offset_and_length(computer): 158 | """Test read_bytes with offset and length parameters.""" 159 | tmp_path = "test_read_bytes_offset.bin" 160 | test_data = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" 161 | 162 | # Write test data 163 | await computer.interface.write_text(tmp_path, test_data.decode('latin-1')) 164 | 165 | # Test reading with offset only 166 | read_data = await computer.interface.read_bytes(tmp_path, offset=5) 167 | expected = test_data[5:] 168 | assert read_data == expected, f"Data from offset 5 should match. Got: {read_data}, Expected: {expected}" 169 | 170 | # Test reading with offset and length 171 | read_data = await computer.interface.read_bytes(tmp_path, offset=10, length=5) 172 | expected = test_data[10:15] 173 | assert read_data == expected, f"Data from offset 10, length 5 should match. Got: {read_data}, Expected: {expected}" 174 | 175 | # Test reading from beginning with length 176 | read_data = await computer.interface.read_bytes(tmp_path, offset=0, length=10) 177 | expected = test_data[:10] 178 | assert read_data == expected, f"Data from beginning, length 10 should match. Got: {read_data}, Expected: {expected}" 179 | 180 | await computer.interface.delete_file(tmp_path) 181 | 182 | 183 | @pytest.mark.asyncio(loop_scope="session") 184 | async def test_get_file_size(computer): 185 | """Test get_file_size functionality.""" 186 | tmp_path = "test_file_size.txt" 187 | test_content = "A" * 1000 # 1000 bytes 188 | 189 | await computer.interface.write_text(tmp_path, test_content) 190 | 191 | file_size = await computer.interface.get_file_size(tmp_path) 192 | assert file_size == 1000, f"File size should be 1000 bytes, got {file_size}" 193 | 194 | await computer.interface.delete_file(tmp_path) 195 | 196 | 197 | @pytest.mark.asyncio(loop_scope="session") 198 | async def test_read_large_file(computer): 199 | """Test reading a file larger than 10MB to verify chunked reading.""" 200 | tmp_path = "test_large_file.bin" 201 | 202 | # Create a file larger than 10MB (10 * 1024 * 1024 = 10,485,760 bytes) 203 | total_size = 12 * 1024 * 1024 # 12MB 204 | 205 | print(f"Creating large file of {total_size} bytes ({total_size / (1024*1024):.1f}MB)...") 206 | 207 | # Create large file content (this will test the chunked writing functionality) 208 | large_content = b"X" * total_size 209 | 210 | # Write the large file using write_bytes (will automatically use chunked writing) 211 | await computer.interface.write_bytes(tmp_path, large_content) 212 | 213 | # Verify file size 214 | file_size = await computer.interface.get_file_size(tmp_path) 215 | assert file_size == total_size, f"Large file size should be {total_size} bytes, got {file_size}" 216 | 217 | print(f"Large file created successfully: {file_size} bytes") 218 | 219 | # Test reading the entire large file (should use chunked reading) 220 | print("Reading large file...") 221 | read_data = await computer.interface.read_bytes(tmp_path) 222 | assert len(read_data) == total_size, f"Read data size should match file size. Got {len(read_data)}, expected {total_size}" 223 | 224 | # Verify content (should be all 'X' characters) 225 | expected_data = b"X" * total_size 226 | assert read_data == expected_data, "Large file content should be all 'X' characters" 227 | 228 | print("Large file read successfully!") 229 | 230 | # Test reading with offset and length on large file 231 | offset = 5 * 1024 * 1024 # 5MB offset 232 | length = 2 * 1024 * 1024 # 2MB length 233 | read_data = await computer.interface.read_bytes(tmp_path, offset=offset, length=length) 234 | assert len(read_data) == length, f"Partial read size should be {length}, got {len(read_data)}" 235 | assert read_data == b"X" * length, "Partial read content should be all 'X' characters" 236 | 237 | print("Large file partial read successful!") 238 | 239 | # Clean up 240 | await computer.interface.delete_file(tmp_path) 241 | print("Large file test completed successfully!") 242 | 243 | @pytest.mark.asyncio(loop_scope="session") 244 | async def test_read_write_text_with_encoding(computer): 245 | """Test reading and writing text files with different encodings.""" 246 | print("Testing text file operations with different encodings...") 247 | 248 | tmp_path = "test_encoding.txt" 249 | 250 | # Test UTF-8 encoding (default) 251 | utf8_content = "Hello, 世界! 🌍 Ñoño café" 252 | await computer.interface.write_text(tmp_path, utf8_content, encoding='utf-8') 253 | read_utf8 = await computer.interface.read_text(tmp_path, encoding='utf-8') 254 | assert read_utf8 == utf8_content, "UTF-8 content should match" 255 | 256 | # Test ASCII encoding 257 | ascii_content = "Hello, World! Simple ASCII text." 258 | await computer.interface.write_text(tmp_path, ascii_content, encoding='ascii') 259 | read_ascii = await computer.interface.read_text(tmp_path, encoding='ascii') 260 | assert read_ascii == ascii_content, "ASCII content should match" 261 | 262 | # Test Latin-1 encoding 263 | latin1_content = "Café, naïve, résumé" 264 | await computer.interface.write_text(tmp_path, latin1_content, encoding='latin-1') 265 | read_latin1 = await computer.interface.read_text(tmp_path, encoding='latin-1') 266 | assert read_latin1 == latin1_content, "Latin-1 content should match" 267 | 268 | # Clean up 269 | await computer.interface.delete_file(tmp_path) 270 | print("Text encoding test completed successfully!") 271 | 272 | @pytest.mark.asyncio(loop_scope="session") 273 | async def test_write_text_append_mode(computer): 274 | """Test appending text to files.""" 275 | print("Testing text file append mode...") 276 | 277 | tmp_path = "test_append.txt" 278 | 279 | # Write initial content 280 | initial_content = "First line\n" 281 | await computer.interface.write_text(tmp_path, initial_content) 282 | 283 | # Append more content 284 | append_content = "Second line\n" 285 | await computer.interface.write_text(tmp_path, append_content, append=True) 286 | 287 | # Read and verify 288 | final_content = await computer.interface.read_text(tmp_path) 289 | expected_content = initial_content + append_content 290 | assert final_content == expected_content, f"Expected '{expected_content}', got '{final_content}'" 291 | 292 | # Append one more line 293 | third_content = "Third line\n" 294 | await computer.interface.write_text(tmp_path, third_content, append=True) 295 | 296 | # Read and verify final result 297 | final_content = await computer.interface.read_text(tmp_path) 298 | expected_content = initial_content + append_content + third_content 299 | assert final_content == expected_content, f"Expected '{expected_content}', got '{final_content}'" 300 | 301 | # Clean up 302 | await computer.interface.delete_file(tmp_path) 303 | print("Text append test completed successfully!") 304 | 305 | @pytest.mark.asyncio(loop_scope="session") 306 | async def test_large_text_file(computer): 307 | """Test reading and writing large text files (>5MB) to verify chunked operations.""" 308 | print("Testing large text file operations...") 309 | 310 | tmp_path = "test_large_text.txt" 311 | 312 | # Create a large text content (approximately 6MB) 313 | # Each line is about 100 characters, so 60,000 lines ≈ 6MB 314 | line_template = "This is line {:06d} with some additional text to make it longer and reach about 100 chars.\n" 315 | large_content = "" 316 | num_lines = 60000 317 | 318 | print(f"Generating large text content with {num_lines} lines...") 319 | for i in range(num_lines): 320 | large_content += line_template.format(i) 321 | 322 | content_size_mb = len(large_content.encode('utf-8')) / (1024 * 1024) 323 | print(f"Generated text content size: {content_size_mb:.2f} MB") 324 | 325 | # Write the large text file 326 | print("Writing large text file...") 327 | await computer.interface.write_text(tmp_path, large_content) 328 | 329 | # Read the entire file back 330 | print("Reading large text file...") 331 | read_content = await computer.interface.read_text(tmp_path) 332 | 333 | # Verify content matches 334 | assert read_content == large_content, "Large text file content should match exactly" 335 | 336 | # Test partial reading by reading as bytes and decoding specific portions 337 | print("Testing partial text reading...") 338 | 339 | # Read first 1000 characters worth of bytes 340 | first_1000_chars = large_content[:1000] 341 | first_1000_bytes = first_1000_chars.encode('utf-8') 342 | read_bytes = await computer.interface.read_bytes(tmp_path, offset=0, length=len(first_1000_bytes)) 343 | decoded_partial = read_bytes.decode('utf-8') 344 | assert decoded_partial == first_1000_chars, "Partial text reading should match" 345 | 346 | # Test appending to large file 347 | print("Testing append to large text file...") 348 | append_text = "\n--- APPENDED CONTENT ---\nThis content was appended to the large file.\n" 349 | await computer.interface.write_text(tmp_path, append_text, append=True) 350 | 351 | # Read and verify appended content 352 | final_content = await computer.interface.read_text(tmp_path) 353 | expected_final = large_content + append_text 354 | assert final_content == expected_final, "Appended large text file should match" 355 | 356 | # Clean up 357 | await computer.interface.delete_file(tmp_path) 358 | print("Large text file test completed successfully!") 359 | 360 | @pytest.mark.asyncio(loop_scope="session") 361 | async def test_text_file_edge_cases(computer): 362 | """Test edge cases for text file operations.""" 363 | print("Testing text file edge cases...") 364 | 365 | tmp_path = "test_edge_cases.txt" 366 | 367 | # Test empty file 368 | empty_content = "" 369 | await computer.interface.write_text(tmp_path, empty_content) 370 | read_empty = await computer.interface.read_text(tmp_path) 371 | assert read_empty == empty_content, "Empty file should return empty string" 372 | 373 | # Test file with only whitespace 374 | whitespace_content = " \n\t\r\n \n" 375 | await computer.interface.write_text(tmp_path, whitespace_content) 376 | read_whitespace = await computer.interface.read_text(tmp_path) 377 | assert read_whitespace == whitespace_content, "Whitespace content should be preserved" 378 | 379 | # Test file with special characters and newlines 380 | special_content = "Line 1\nLine 2\r\nLine 3\tTabbed\nSpecial: !@#$%^&*()\n" 381 | await computer.interface.write_text(tmp_path, special_content) 382 | read_special = await computer.interface.read_text(tmp_path) 383 | assert read_special == special_content, "Special characters should be preserved" 384 | 385 | # Test very long single line (no newlines) 386 | long_line = "A" * 10000 # 10KB single line 387 | await computer.interface.write_text(tmp_path, long_line) 388 | read_long_line = await computer.interface.read_text(tmp_path) 389 | assert read_long_line == long_line, "Long single line should be preserved" 390 | 391 | # Clean up 392 | await computer.interface.delete_file(tmp_path) 393 | print("Text file edge cases test completed successfully!") 394 | 395 | if __name__ == "__main__": 396 | # Run tests directly 397 | pytest.main([__file__, "-v"]) 398 | ``` -------------------------------------------------------------------------------- /docs/src/app/(home)/[[...slug]]/page.tsx: -------------------------------------------------------------------------------- ```typescript 1 | import { getApiVersions, source } from '@/lib/source'; 2 | import { getMDXComponents } from '@/mdx-components'; 3 | import { buttonVariants } from 'fumadocs-ui/components/ui/button'; 4 | import { 5 | Popover, 6 | PopoverContent, 7 | PopoverTrigger, 8 | } from 'fumadocs-ui/components/ui/popover'; 9 | import { createRelativeLink } from 'fumadocs-ui/mdx'; 10 | import { 11 | DocsBody, 12 | DocsDescription, 13 | DocsPage, 14 | DocsTitle, 15 | } from 'fumadocs-ui/page'; 16 | import { cn } from 'fumadocs-ui/utils/cn'; 17 | import { ChevronDown, CodeXml, ExternalLink } from 'lucide-react'; 18 | import type { Metadata } from 'next'; 19 | import Link from 'next/link'; 20 | import { notFound, redirect } from 'next/navigation'; 21 | 22 | export default async function Page(props: { 23 | params: Promise<{ slug?: string[] }>; 24 | }) { 25 | const params = await props.params; 26 | const slug = params.slug || []; 27 | const page = source.getPage(slug); 28 | if (!page) notFound(); //redirect('/docs'); 29 | 30 | // Detect if this is an API reference page: /api/[section] or /api/[section]/[version] 31 | let apiSection: string | null = null; 32 | let apiVersionSlug: string[] = []; 33 | if (slug[0] === 'api' && slug.length >= 2) { 34 | apiSection = slug[1]; 35 | if (slug.length > 2) { 36 | apiVersionSlug = slug.slice(2); 37 | } 38 | } 39 | 40 | let versionItems: { label: string; slug: string[] }[] = []; 41 | if (apiSection) { 42 | versionItems = await getApiVersions(apiSection); 43 | } 44 | 45 | const macos = page.data.macos; 46 | const windows = page.data.windows; 47 | const linux = page.data.linux; 48 | const pypi = page.data.pypi; 49 | const npm = page.data.npm; 50 | const github = page.data.github; 51 | 52 | const MDXContent = page.data.body; 53 | 54 | // Platform icons component 55 | const PlatformIcons = () => { 56 | const hasAnyPlatform = macos || windows || linux; 57 | if (!hasAnyPlatform && !pypi) return null; 58 | 59 | return ( 60 | <div className="flex flex-col gap-2"> 61 | {hasAnyPlatform && ( 62 | <div className="flex flex-row gap-2 items-left dark:text-neutral-400"> 63 | {windows && ( 64 | <svg 65 | xmlns="http://www.w3.org/2000/svg" 66 | fill="currentColor" 67 | className="h-5" 68 | viewBox="0 0 448 512"> 69 | <title>Windows</title> 70 | <path d="M0 93.7l183.6-25.3v177.4H0V93.7zm0 324.6l183.6 25.3V268.4H0v149.9zm203.8 28L448 480V268.4H203.8v177.9zm0-380.6v180.1H448V32L203.8 65.7z" /> 71 | </svg> 72 | )} 73 | {macos && ( 74 | <svg 75 | xmlns="http://www.w3.org/2000/svg" 76 | fill="currentColor" 77 | className="h-5" 78 | viewBox="0 0 384 512"> 79 | <title>macOS</title> 80 | <path d="M318.7 268.7c-.2-36.7 16.4-64.4 50-84.8-18.8-26.9-47.2-41.7-84.7-44.6-35.5-2.8-74.3 20.7-88.5 20.7-15 0-49.4-19.7-76.4-19.7C63.3 141.2 4 184.8 4 273.5q0 39.3 14.4 81.2c12.8 36.7 59 126.7 107.2 125.2 25.2-.6 43-17.9 75.8-17.9 31.8 0 48.3 17.9 76.4 17.9 48.6-.7 90.4-82.5 102.6-119.3-65.2-30.7-61.7-90-61.7-91.9zm-56.6-164.2c27.3-32.4 24.8-61.9 24-72.5-24.1 1.4-52 16.4-67.9 34.9-17.5 19.8-27.8 44.3-25.6 71.9 26.1 2 49.9-11.4 69.5-34.3z" /> 81 | </svg> 82 | )} 83 | {linux && ( 84 | <svg 85 | xmlns="http://www.w3.org/2000/svg" 86 | fill="currentColor" 87 | className="h-5" 88 | viewBox="0 0 448 512"> 89 | <title>Linux</title> 90 | <path d="M220.8 123.3c1 .5 1.8 1.7 3 1.7 1.1 0 2.8-.4 2.9-1.5 .2-1.4-1.9-2.3-3.2-2.9-1.7-.7-3.9-1-5.5-.1-.4 .2-.8 .7-.6 1.1 .3 1.3 2.3 1.1 3.4 1.7zm-21.9 1.7c1.2 0 2-1.2 3-1.7 1.1-.6 3.1-.4 3.5-1.6 .2-.4-.2-.9-.6-1.1-1.6-.9-3.8-.6-5.5 .1-1.3 .6-3.4 1.5-3.2 2.9 .1 1 1.8 1.5 2.8 1.4zM420 403.8c-3.6-4-5.3-11.6-7.2-19.7-1.8-8.1-3.9-16.8-10.5-22.4-1.3-1.1-2.6-2.1-4-2.9-1.3-.8-2.7-1.5-4.1-2 9.2-27.3 5.6-54.5-3.7-79.1-11.4-30.1-31.3-56.4-46.5-74.4-17.1-21.5-33.7-41.9-33.4-72C311.1 85.4 315.7 .1 234.8 0 132.4-.2 158 103.4 156.9 135.2c-1.7 23.4-6.4 41.8-22.5 64.7-18.9 22.5-45.5 58.8-58.1 96.7-6 17.9-8.8 36.1-6.2 53.3-6.5 5.8-11.4 14.7-16.6 20.2-4.2 4.3-10.3 5.9-17 8.3s-14 6-18.5 14.5c-2.1 3.9-2.8 8.1-2.8 12.4 0 3.9 .6 7.9 1.2 11.8 1.2 8.1 2.5 15.7 .8 20.8-5.2 14.4-5.9 24.4-2.2 31.7 3.8 7.3 11.4 10.5 20.1 12.3 17.3 3.6 40.8 2.7 59.3 12.5 19.8 10.4 39.9 14.1 55.9 10.4 11.6-2.6 21.1-9.6 25.9-20.2 12.5-.1 26.3-5.4 48.3-6.6 14.9-1.2 33.6 5.3 55.1 4.1 .6 2.3 1.4 4.6 2.5 6.7v.1c8.3 16.7 23.8 24.3 40.3 23 16.6-1.3 34.1-11 48.3-27.9 13.6-16.4 36-23.2 50.9-32.2 7.4-4.5 13.4-10.1 13.9-18.3 .4-8.2-4.4-17.3-15.5-29.7zM223.7 87.3c9.8-22.2 34.2-21.8 44-.4 6.5 14.2 3.6 30.9-4.3 40.4-1.6-.8-5.9-2.6-12.6-4.9 1.1-1.2 3.1-2.7 3.9-4.6 4.8-11.8-.2-27-9.1-27.3-7.3-.5-13.9 10.8-11.8 23-4.1-2-9.4-3.5-13-4.4-1-6.9-.3-14.6 2.9-21.8zM183 75.8c10.1 0 20.8 14.2 19.1 33.5-3.5 1-7.1 2.5-10.2 4.6 1.2-8.9-3.3-20.1-9.6-19.6-8.4 .7-9.8 21.2-1.8 28.1 1 .8 1.9-.2-5.9 5.5-15.6-14.6-10.5-52.1 8.4-52.1zm-13.6 60.7c6.2-4.6 13.6-10 14.1-10.5 4.7-4.4 13.5-14.2 27.9-14.2 7.1 0 15.6 2.3 25.9 8.9 6.3 4.1 11.3 4.4 22.6 9.3 8.4 3.5 13.7 9.7 10.5 18.2-2.6 7.1-11 14.4-22.7 18.1-11.1 3.6-19.8 16-38.2 14.9-3.9-.2-7-1-9.6-2.1-8-3.5-12.2-10.4-20-15-8.6-4.8-13.2-10.4-14.7-15.3-1.4-4.9 0-9 4.2-12.3zm3.3 334c-2.7 35.1-43.9 34.4-75.3 18-29.9-15.8-68.6-6.5-76.5-21.9-2.4-4.7-2.4-12.7 2.6-26.4v-.2c2.4-7.6 .6-16-.6-23.9-1.2-7.8-1.8-15 .9-20 3.5-6.7 8.5-9.1 14.8-11.3 10.3-3.7 11.8-3.4 19.6-9.9 5.5-5.7 9.5-12.9 14.3-18 5.1-5.5 10-8.1 17.7-6.9 8.1 1.2 15.1 6.8 21.9 16l19.6 35.6c9.5 19.9 43.1 48.4 41 68.9zm-1.4-25.9c-4.1-6.6-9.6-13.6-14.4-19.6 7.1 0 14.2-2.2 16.7-8.9 2.3-6.2 0-14.9-7.4-24.9-13.5-18.2-38.3-32.5-38.3-32.5-13.5-8.4-21.1-18.7-24.6-29.9s-3-23.3-.3-35.2c5.2-22.9 18.6-45.2 27.2-59.2 2.3-1.7 .8 3.2-8.7 20.8-8.5 16.1-24.4 53.3-2.6 82.4 .6-20.7 5.5-41.8 13.8-61.5 12-27.4 37.3-74.9 39.3-112.7 1.1 .8 4.6 3.2 6.2 4.1 4.6 2.7 8.1 6.7 12.6 10.3 12.4 10 28.5 9.2 42.4 1.2 6.2-3.5 11.2-7.5 15.9-9 9.9-3.1 17.8-8.6 22.3-15 7.7 30.4 25.7 74.3 37.2 95.7 6.1 11.4 18.3 35.5 23.6 64.6 3.3-.1 7 .4 10.9 1.4 13.8-35.7-11.7-74.2-23.3-84.9-4.7-4.6-4.9-6.6-2.6-6.5 12.6 11.2 29.2 33.7 35.2 59 2.8 11.6 3.3 23.7 .4 35.7 16.4 6.8 35.9 17.9 30.7 34.8-2.2-.1-3.2 0-4.2 0 3.2-10.1-3.9-17.6-22.8-26.1-19.6-8.6-36-8.6-38.3 12.5-12.1 4.2-18.3 14.7-21.4 27.3-2.8 11.2-3.6 24.7-4.4 39.9-.5 7.7-3.6 18-6.8 29-32.1 22.9-76.7 32.9-114.3 7.2zm257.4-11.5c-.9 16.8-41.2 19.9-63.2 46.5-13.2 15.7-29.4 24.4-43.6 25.5s-26.5-4.8-33.7-19.3c-4.7-11.1-2.4-23.1 1.1-36.3 3.7-14.2 9.2-28.8 9.9-40.6 .8-15.2 1.7-28.5 4.2-38.7 2.6-10.3 6.6-17.2 13.7-21.1 .3-.2 .7-.3 1-.5 .8 13.2 7.3 26.6 18.8 29.5 12.6 3.3 30.7-7.5 38.4-16.3 9-.3 15.7-.9 22.6 5.1 9.9 8.5 7.1 30.3 17.1 41.6 10.6 11.6 14 19.5 13.7 24.6zM173.3 148.7c2 1.9 4.7 4.5 8 7.1 6.6 5.2 15.8 10.6 27.3 10.6 11.6 0 22.5-5.9 31.8-10.8 4.9-2.6 10.9-7 14.8-10.4s5.9-6.3 3.1-6.6-2.6 2.6-6 5.1c-4.4 3.2-9.7 7.4-13.9 9.8-7.4 4.2-19.5 10.2-29.9 10.2s-18.7-4.8-24.9-9.7c-3.1-2.5-5.7-5-7.7-6.9-1.5-1.4-1.9-4.6-4.3-4.9-1.4-.1-1.8 3.7 1.7 6.5z" /> 91 | </svg> 92 | )} 93 | </div> 94 | )} 95 | 96 | <div className="flex flex-row gap-2 items-left"> 97 | {pypi && ( 98 | <a 99 | target="_blank" 100 | href={`https://pypi.org/project/${pypi}/`} 101 | rel="noreferrer"> 102 | <img 103 | src={`https://img.shields.io/pypi/v/${pypi}?color=blue`} 104 | className="h-5" 105 | alt="PyPI" 106 | /> 107 | </a> 108 | )} 109 | {npm && ( 110 | <a 111 | target="_blank" 112 | href={`https://www.npmjs.com/package/${npm}`} 113 | rel="noreferrer"> 114 | <img 115 | src={`https://img.shields.io/npm/v/${npm}?color=bf4c4b`} 116 | className="h-5" 117 | alt="NPM" 118 | /> 119 | </a> 120 | )} 121 | </div> 122 | </div> 123 | ); 124 | }; 125 | 126 | const tocHeader = () => { 127 | return ( 128 | <div className="w-fit"> 129 | <PlatformIcons /> 130 | <div className="flex gap-2 mt-2"> 131 | {github && 132 | github.length > 0 && 133 | (github.length === 1 ? ( 134 | <a 135 | href={github[0]} 136 | rel="noreferrer noopener" 137 | target="_blank" 138 | className="inline-flex gap-2 w-fit items-center justify-center rounded-md text-sm font-medium transition-colors duration-100 disabled:pointer-events-none disabled:opacity-50 focus-visible:outline-none hover:bg-fd-accent hover:text-fd-accent-foreground p-1.5 [&_svg]:size-5 text-fd-muted-foreground md:[&_svg]:size-4.5" 139 | aria-label="Source" 140 | data-active="false"> 141 | <svg role="img" viewBox="0 0 24 24" fill="currentColor"> 142 | <path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"></path> 143 | </svg> 144 | Source 145 | <ExternalLink className="w-4 h-4 ml-auto" /> 146 | </a> 147 | ) : ( 148 | <Popover> 149 | <PopoverTrigger className="inline-flex gap-2 w-fit items-center justify-center rounded-md text-sm font-medium transition-colors duration-100 disabled:pointer-events-none disabled:opacity-50 focus-visible:outline-none hover:bg-fd-accent hover:text-fd-accent-foreground p-1.5 [&_svg]:size-5 text-fd-muted-foreground md:[&_svg]:size-4.5"> 150 | <svg role="img" viewBox="0 0 24 24" fill="currentColor"> 151 | <path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"></path> 152 | </svg> 153 | Source 154 | <ChevronDown className="h-4 w-4" /> 155 | </PopoverTrigger> 156 | <PopoverContent className="w-48 p-1"> 157 | <div className="flex flex-col gap-1"> 158 | {github.map((link, index) => ( 159 | <a 160 | key={index} 161 | href={link} 162 | rel="noreferrer noopener" 163 | target="_blank" 164 | className="inline-flex gap-2 w-full items-center rounded-md p-2 text-sm hover:bg-fd-accent hover:text-fd-accent-foreground"> 165 | {link.includes('python') 166 | ? 'Python' 167 | : link.includes('typescript') 168 | ? 'TypeScript' 169 | : `Source ${index + 1}`} 170 | <ExternalLink className="w-4 h-4 ml-auto" /> 171 | </a> 172 | ))} 173 | </div> 174 | </PopoverContent> 175 | </Popover> 176 | ))} 177 | {slug.includes('libraries') && ( 178 | <a 179 | className="inline-flex gap-2 w-fit items-center justify-center rounded-md text-sm font-medium transition-colors duration-100 disabled:pointer-events-none disabled:opacity-50 focus-visible:outline-none hover:bg-fd-accent hover:text-fd-accent-foreground p-1.5 [&_svg]:size-5 text-fd-muted-foreground md:[&_svg]:size-4.5" 180 | href={`/api/${page.data.title.toLowerCase()}`}> 181 | <CodeXml size={12} /> 182 | Reference 183 | </a> 184 | )} 185 | </div> 186 | <hr className="my-2 border-t border-fd-border" /> 187 | </div> 188 | ); 189 | }; 190 | 191 | return ( 192 | <DocsPage 193 | toc={page.data.toc} 194 | tableOfContent={{ header: tocHeader() }} 195 | full={page.data.full}> 196 | <div className="flex flex-row w-full items-start"> 197 | <div className="flex-1"> 198 | <div className="flex flex-row w-full"> 199 | <DocsTitle>{page.data.title}</DocsTitle> 200 | 201 | <div className="ml-auto"> 202 | {apiSection && versionItems.length > 1 && ( 203 | <Popover> 204 | <PopoverTrigger 205 | className={cn( 206 | buttonVariants({ 207 | color: 'secondary', 208 | size: 'sm', 209 | className: 'gap-2', 210 | }) 211 | )}> 212 | {(() => { 213 | // Find the current version label 214 | let currentLabel = 'Current'; 215 | if (apiVersionSlug.length > 0) { 216 | const found = versionItems.find( 217 | (item) => 218 | item.label !== 'Current' && 219 | apiVersionSlug[0] === item.label 220 | ); 221 | if (found) currentLabel = found.label; 222 | } 223 | return ( 224 | <> 225 | API Version: {currentLabel} 226 | <ChevronDown className="size-3.5 text-fd-muted-foreground" /> 227 | </> 228 | ); 229 | })()} 230 | </PopoverTrigger> 231 | <PopoverContent className="flex flex-col overflow-auto"> 232 | {versionItems.map((item) => { 233 | // Build the href for each version 234 | const href = 235 | item.label === 'Current' 236 | ? `/api/${apiSection}` 237 | : `/api/${apiSection}/${item.label}`; 238 | // Highlight current version 239 | const isCurrent = 240 | (item.label === 'Current' && 241 | apiVersionSlug.length === 0) || 242 | (item.label !== 'Current' && 243 | apiVersionSlug[0] === item.label); 244 | return ( 245 | <Link 246 | key={item.label} 247 | href={href} 248 | className={cn( 249 | 'px-3 py-1 rounded hover:bg-fd-muted', 250 | isCurrent && 'font-bold bg-fd-muted' 251 | )}> 252 | API version: {item.label} 253 | </Link> 254 | ); 255 | })} 256 | </PopoverContent> 257 | </Popover> 258 | )} 259 | </div> 260 | </div> 261 | <DocsDescription className="text-md mt-1"> 262 | {page.data.description} 263 | </DocsDescription> 264 | </div> 265 | </div> 266 | <DocsBody> 267 | <MDXContent 268 | components={getMDXComponents({ 269 | // this allows you to link to other pages with relative file paths 270 | a: createRelativeLink(source, page), 271 | })} 272 | /> 273 | </DocsBody> 274 | </DocsPage> 275 | ); 276 | } 277 | 278 | export async function generateStaticParams() { 279 | return source.generateParams(); 280 | } 281 | 282 | export async function generateMetadata(props: { 283 | params: Promise<{ slug?: string[] }>; 284 | }): Promise<Metadata> { 285 | const params = await props.params; 286 | const page = source.getPage(params.slug); 287 | if (!page) notFound(); 288 | 289 | let title = `${page.data.title} | Cua Docs`; 290 | if (page.url.includes('api')) title = `${page.data.title} | Cua API Docs`; 291 | if (page.url.includes('guide')) 292 | title = ` Guide: ${page.data.title} | Cua Docs`; 293 | 294 | return { 295 | title, 296 | description: page.data.description, 297 | openGraph: { 298 | title, 299 | description: page.data.description, 300 | type: 'article', 301 | siteName: 'Cua Docs', 302 | url: 'https://trycua.com/docs', 303 | }, 304 | }; 305 | } 306 | ``` -------------------------------------------------------------------------------- /libs/python/mcp-server/mcp_server/server.py: -------------------------------------------------------------------------------- ```python 1 | import asyncio 2 | import base64 3 | import inspect 4 | import logging 5 | import os 6 | import signal 7 | import sys 8 | import traceback 9 | import uuid 10 | from typing import Any, Dict, List, Optional, Union, Tuple 11 | 12 | import anyio 13 | 14 | # Configure logging to output to stderr for debug visibility 15 | logging.basicConfig( 16 | level=logging.DEBUG, # Changed to DEBUG 17 | format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", 18 | stream=sys.stderr, 19 | ) 20 | logger = logging.getLogger("mcp-server") 21 | 22 | # More visible startup message 23 | logger.debug("MCP Server module loading...") 24 | 25 | try: 26 | from mcp.server.fastmcp import Context, FastMCP 27 | # Use the canonical Image type 28 | from mcp.server.fastmcp.utilities.types import Image 29 | 30 | logger.debug("Successfully imported FastMCP") 31 | except ImportError as e: 32 | logger.error(f"Failed to import FastMCP: {e}") 33 | traceback.print_exc(file=sys.stderr) 34 | sys.exit(1) 35 | 36 | try: 37 | from computer import Computer 38 | from agent import ComputerAgent 39 | 40 | logger.debug("Successfully imported Computer and Agent modules") 41 | except ImportError as e: 42 | logger.error(f"Failed to import Computer/Agent modules: {e}") 43 | traceback.print_exc(file=sys.stderr) 44 | sys.exit(1) 45 | 46 | try: 47 | from .session_manager import get_session_manager, initialize_session_manager, shutdown_session_manager 48 | logger.debug("Successfully imported session manager") 49 | except ImportError as e: 50 | logger.error(f"Failed to import session manager: {e}") 51 | traceback.print_exc(file=sys.stderr) 52 | sys.exit(1) 53 | 54 | def get_env_bool(key: str, default: bool = False) -> bool: 55 | """Get boolean value from environment variable.""" 56 | return os.getenv(key, str(default)).lower() in ("true", "1", "yes") 57 | 58 | async def _maybe_call_ctx_method(ctx: Context, method_name: str, *args, **kwargs) -> None: 59 | """Call a context helper if it exists, awaiting the result when necessary.""" 60 | method = getattr(ctx, method_name, None) 61 | if not callable(method): 62 | return 63 | result = method(*args, **kwargs) 64 | if inspect.isawaitable(result): 65 | await result 66 | 67 | def _normalise_message_content(content: Union[str, List[Dict[str, Any]]]) -> List[Dict[str, Any]]: 68 | """Normalise message content to a list of structured parts.""" 69 | if isinstance(content, list): 70 | return content 71 | if content is None: 72 | return [] 73 | return [{"type": "output_text", "text": str(content)}] 74 | 75 | def _extract_text_from_content(content: Union[str, List[Dict[str, Any]]]) -> str: 76 | """Extract textual content for inclusion in the aggregated result string.""" 77 | if isinstance(content, str): 78 | return content 79 | texts: List[str] = [] 80 | for part in content or []: 81 | if not isinstance(part, dict): 82 | continue 83 | if part.get("type") in {"output_text", "text"} and part.get("text"): 84 | texts.append(str(part["text"])) 85 | return "\n".join(texts) 86 | 87 | def _serialise_tool_content(content: Any) -> str: 88 | """Convert tool outputs into a string for aggregation.""" 89 | if isinstance(content, str): 90 | return content 91 | if isinstance(content, list): 92 | texts: List[str] = [] 93 | for part in content: 94 | if isinstance(part, dict) and part.get("type") in {"output_text", "text"} and part.get("text"): 95 | texts.append(str(part["text"])) 96 | if texts: 97 | return "\n".join(texts) 98 | if content is None: 99 | return "" 100 | return str(content) 101 | 102 | def serve() -> FastMCP: 103 | """Create and configure the MCP server.""" 104 | # NOTE: Do not pass model_config here; FastMCP 2.12.x doesn't support it. 105 | server = FastMCP(name="cua-agent") 106 | 107 | @server.tool(structured_output=False) 108 | async def screenshot_cua(ctx: Context, session_id: Optional[str] = None) -> Any: 109 | """ 110 | Take a screenshot of the current MacOS VM screen and return the image. 111 | 112 | Args: 113 | session_id: Optional session ID for multi-client support. If not provided, a new session will be created. 114 | """ 115 | session_manager = get_session_manager() 116 | 117 | async with session_manager.get_session(session_id) as session: 118 | screenshot = await session.computer.interface.screenshot() 119 | # Returning Image object is fine when structured_output=False 120 | return Image(format="png", data=screenshot) 121 | 122 | @server.tool(structured_output=False) 123 | async def run_cua_task(ctx: Context, task: str, session_id: Optional[str] = None) -> Any: 124 | """ 125 | Run a Computer-Use Agent (CUA) task in a MacOS VM and return (combined text, final screenshot). 126 | 127 | Args: 128 | task: The task description for the agent to execute 129 | session_id: Optional session ID for multi-client support. If not provided, a new session will be created. 130 | """ 131 | session_manager = get_session_manager() 132 | task_id = str(uuid.uuid4()) 133 | 134 | try: 135 | logger.info(f"Starting CUA task: {task} (task_id: {task_id})") 136 | 137 | async with session_manager.get_session(session_id) as session: 138 | # Register this task with the session 139 | await session_manager.register_task(session.session_id, task_id) 140 | 141 | try: 142 | # Get model name 143 | model_name = os.getenv("CUA_MODEL_NAME", "anthropic/claude-3-5-sonnet-20241022") 144 | logger.info(f"Using model: {model_name}") 145 | 146 | # Create agent with the new v0.4.x API 147 | agent = ComputerAgent( 148 | model=model_name, 149 | only_n_most_recent_images=int(os.getenv("CUA_MAX_IMAGES", "3")), 150 | verbosity=logging.INFO, 151 | tools=[session.computer], 152 | ) 153 | 154 | messages = [{"role": "user", "content": task}] 155 | 156 | # Collect all results 157 | aggregated_messages: List[str] = [] 158 | async for result in agent.run(messages): 159 | logger.info("Agent processing step") 160 | ctx.info("Agent processing step") 161 | 162 | outputs = result.get("output", []) 163 | for output in outputs: 164 | output_type = output.get("type") 165 | 166 | if output_type == "message": 167 | logger.debug("Streaming assistant message: %s", output) 168 | content = _normalise_message_content(output.get("content")) 169 | aggregated_text = _extract_text_from_content(content) 170 | if aggregated_text: 171 | aggregated_messages.append(aggregated_text) 172 | await _maybe_call_ctx_method( 173 | ctx, 174 | "yield_message", 175 | role=output.get("role", "assistant"), 176 | content=content, 177 | ) 178 | 179 | elif output_type in {"tool_use", "computer_call", "function_call"}: 180 | logger.debug("Streaming tool call: %s", output) 181 | call_id = output.get("id") or output.get("call_id") 182 | tool_name = output.get("name") or output.get("action", {}).get("type") 183 | tool_input = output.get("input") or output.get("arguments") or output.get("action") 184 | if call_id: 185 | await _maybe_call_ctx_method( 186 | ctx, 187 | "yield_tool_call", 188 | name=tool_name, 189 | call_id=call_id, 190 | input=tool_input, 191 | ) 192 | 193 | elif output_type in {"tool_result", "computer_call_output", "function_call_output"}: 194 | logger.debug("Streaming tool output: %s", output) 195 | call_id = output.get("call_id") or output.get("id") 196 | content = output.get("content") or output.get("output") 197 | aggregated_text = _serialise_tool_content(content) 198 | if aggregated_text: 199 | aggregated_messages.append(aggregated_text) 200 | if call_id: 201 | await _maybe_call_ctx_method( 202 | ctx, 203 | "yield_tool_output", 204 | call_id=call_id, 205 | output=content, 206 | is_error=output.get("status") == "failed" or output.get("is_error", False), 207 | ) 208 | 209 | logger.info("CUA task completed successfully") 210 | ctx.info("CUA task completed successfully") 211 | 212 | screenshot_image = Image( 213 | format="png", 214 | data=await session.computer.interface.screenshot(), 215 | ) 216 | 217 | return ( 218 | "\n".join(aggregated_messages).strip() or "Task completed with no text output.", 219 | screenshot_image, 220 | ) 221 | 222 | finally: 223 | # Unregister the task from the session 224 | await session_manager.unregister_task(session.session_id, task_id) 225 | 226 | except Exception as e: 227 | error_msg = f"Error running CUA task: {str(e)}\n{traceback.format_exc()}" 228 | logger.error(error_msg) 229 | ctx.error(error_msg) 230 | 231 | # Try to get a screenshot from the session if available 232 | try: 233 | if session_id: 234 | async with session_manager.get_session(session_id) as session: 235 | screenshot = await session.computer.interface.screenshot() 236 | return ( 237 | f"Error during task execution: {str(e)}", 238 | Image(format="png", data=screenshot), 239 | ) 240 | except Exception: 241 | pass 242 | 243 | # If we can't get a screenshot, return a placeholder 244 | return ( 245 | f"Error during task execution: {str(e)}", 246 | Image(format="png", data=b""), 247 | ) 248 | 249 | @server.tool(structured_output=False) 250 | async def run_multi_cua_tasks(ctx: Context, tasks: List[str], session_id: Optional[str] = None, concurrent: bool = False) -> Any: 251 | """ 252 | Run multiple CUA tasks and return a list of (combined text, screenshot). 253 | 254 | Args: 255 | tasks: List of task descriptions to execute 256 | session_id: Optional session ID for multi-client support. If not provided, a new session will be created. 257 | concurrent: If True, run tasks concurrently. If False, run sequentially (default). 258 | """ 259 | total_tasks = len(tasks) 260 | if total_tasks == 0: 261 | ctx.report_progress(1.0) 262 | return [] 263 | 264 | session_manager = get_session_manager() 265 | 266 | if concurrent and total_tasks > 1: 267 | # Run tasks concurrently 268 | logger.info(f"Running {total_tasks} tasks concurrently") 269 | ctx.info(f"Running {total_tasks} tasks concurrently") 270 | 271 | # Create tasks with progress tracking 272 | async def run_task_with_progress(task_index: int, task: str) -> Tuple[int, Tuple[str, Image]]: 273 | ctx.report_progress(task_index / total_tasks) 274 | result = await run_cua_task(ctx, task, session_id) 275 | ctx.report_progress((task_index + 1) / total_tasks) 276 | return task_index, result 277 | 278 | # Create all task coroutines 279 | task_coroutines = [run_task_with_progress(i, task) for i, task in enumerate(tasks)] 280 | 281 | # Wait for all tasks to complete 282 | results_with_indices = await asyncio.gather(*task_coroutines, return_exceptions=True) 283 | 284 | # Sort results by original task order and handle exceptions 285 | results: List[Tuple[str, Image]] = [] 286 | for result in results_with_indices: 287 | if isinstance(result, Exception): 288 | logger.error(f"Task failed with exception: {result}") 289 | ctx.error(f"Task failed: {str(result)}") 290 | results.append((f"Task failed: {str(result)}", Image(format="png", data=b""))) 291 | else: 292 | _, task_result = result 293 | results.append(task_result) 294 | 295 | return results 296 | else: 297 | # Run tasks sequentially (original behavior) 298 | logger.info(f"Running {total_tasks} tasks sequentially") 299 | ctx.info(f"Running {total_tasks} tasks sequentially") 300 | 301 | results: List[Tuple[str, Image]] = [] 302 | for i, task in enumerate(tasks): 303 | logger.info(f"Running task {i+1}/{total_tasks}: {task}") 304 | ctx.info(f"Running task {i+1}/{total_tasks}: {task}") 305 | 306 | ctx.report_progress(i / total_tasks) 307 | task_result = await run_cua_task(ctx, task, session_id) 308 | results.append(task_result) 309 | ctx.report_progress((i + 1) / total_tasks) 310 | 311 | return results 312 | 313 | @server.tool(structured_output=False) 314 | async def get_session_stats(ctx: Context) -> Dict[str, Any]: 315 | """ 316 | Get statistics about active sessions and resource usage. 317 | """ 318 | session_manager = get_session_manager() 319 | return session_manager.get_session_stats() 320 | 321 | @server.tool(structured_output=False) 322 | async def cleanup_session(ctx: Context, session_id: str) -> str: 323 | """ 324 | Cleanup a specific session and release its resources. 325 | 326 | Args: 327 | session_id: The session ID to cleanup 328 | """ 329 | session_manager = get_session_manager() 330 | await session_manager.cleanup_session(session_id) 331 | return f"Session {session_id} cleanup initiated" 332 | 333 | return server 334 | 335 | 336 | server = serve() 337 | 338 | async def run_server(): 339 | """Run the MCP server with proper lifecycle management.""" 340 | session_manager = None 341 | try: 342 | logger.debug("Starting MCP server...") 343 | 344 | # Initialize session manager 345 | session_manager = await initialize_session_manager() 346 | logger.info("Session manager initialized") 347 | 348 | # Set up signal handlers for graceful shutdown 349 | def signal_handler(signum, frame): 350 | logger.info(f"Received signal {signum}, initiating graceful shutdown...") 351 | # Create a task to shutdown gracefully 352 | asyncio.create_task(graceful_shutdown()) 353 | 354 | signal.signal(signal.SIGINT, signal_handler) 355 | signal.signal(signal.SIGTERM, signal_handler) 356 | 357 | # Start the server 358 | logger.info("Starting FastMCP server...") 359 | # Use run_stdio_async directly instead of server.run() to avoid nested event loops 360 | await server.run_stdio_async() 361 | 362 | except Exception as e: 363 | logger.error(f"Error starting server: {e}") 364 | traceback.print_exc(file=sys.stderr) 365 | raise 366 | finally: 367 | # Ensure cleanup happens 368 | if session_manager: 369 | logger.info("Shutting down session manager...") 370 | await shutdown_session_manager() 371 | 372 | async def graceful_shutdown(): 373 | """Gracefully shutdown the server and all sessions.""" 374 | logger.info("Initiating graceful shutdown...") 375 | try: 376 | await shutdown_session_manager() 377 | logger.info("Graceful shutdown completed") 378 | except Exception as e: 379 | logger.error(f"Error during graceful shutdown: {e}") 380 | finally: 381 | # Exit the process 382 | import os 383 | os._exit(0) 384 | 385 | def main(): 386 | """Run the MCP server with proper async lifecycle management.""" 387 | try: 388 | # Use anyio.run instead of asyncio.run to avoid nested event loop issues 389 | anyio.run(run_server) 390 | except KeyboardInterrupt: 391 | logger.info("Server interrupted by user") 392 | except Exception as e: 393 | logger.error(f"Error starting server: {e}") 394 | traceback.print_exc(file=sys.stderr) 395 | sys.exit(1) 396 | 397 | if __name__ == "__main__": 398 | main() 399 | ``` -------------------------------------------------------------------------------- /libs/lume/src/Commands/Logs.swift: -------------------------------------------------------------------------------- ```swift 1 | import ArgumentParser 2 | import Foundation 3 | 4 | struct Logs: ParsableCommand { 5 | static let configuration = CommandConfiguration( 6 | abstract: "View lume serve logs", 7 | subcommands: [Info.self, Error.self, All.self], 8 | defaultSubcommand: All.self 9 | ) 10 | 11 | // Common functionality for reading log files 12 | static func readLogFile(path: String, lines: Int? = nil, follow: Bool = false) -> String { 13 | let fileManager = FileManager.default 14 | 15 | // Check if file exists 16 | guard fileManager.fileExists(atPath: path) else { 17 | return "Log file not found at \(path)" 18 | } 19 | 20 | do { 21 | // Read file content 22 | let content = try String(contentsOfFile: path, encoding: .utf8) 23 | 24 | // If lines parameter is provided, return only the specified number of lines from the end 25 | if let lineCount = lines { 26 | let allLines = content.components(separatedBy: .newlines) 27 | let startIndex = max(0, allLines.count - lineCount) 28 | let lastLines = Array(allLines[startIndex...]) 29 | return lastLines.joined(separator: "\n") 30 | } 31 | 32 | return content 33 | } catch { 34 | return "Error reading log file: \(error.localizedDescription)" 35 | } 36 | } 37 | 38 | // Method for tailing a log file (following new changes) 39 | static func tailLogFile(path: String, initialLines: Int? = 10) { 40 | let fileManager = FileManager.default 41 | 42 | // Check if file exists 43 | guard fileManager.fileExists(atPath: path) else { 44 | print("Log file not found at \(path)") 45 | return 46 | } 47 | 48 | do { 49 | // Get initial content with only the specified number of lines from the end 50 | var lastPosition: UInt64 = 0 51 | let fileHandle = try FileHandle(forReadingFrom: URL(fileURLWithPath: path)) 52 | 53 | // First, print the last few lines of the file 54 | if let lines = initialLines { 55 | let content = try String(contentsOfFile: path, encoding: .utf8) 56 | let allLines = content.components(separatedBy: .newlines) 57 | let startIndex = max(0, allLines.count - lines) 58 | let lastLines = Array(allLines[startIndex...]) 59 | print(lastLines.joined(separator: "\n")) 60 | } 61 | 62 | // Get current file size 63 | lastPosition = UInt64(try fileManager.attributesOfItem(atPath: path)[.size] as? UInt64 ?? 0) 64 | 65 | // Set up for continuous monitoring 66 | print("\nTailing log file... Press Ctrl+C to stop") 67 | 68 | // Monitor file for changes 69 | while true { 70 | // Brief pause to reduce CPU usage 71 | Thread.sleep(forTimeInterval: 0.5) 72 | 73 | // Get current size 74 | let currentSize = try fileManager.attributesOfItem(atPath: path)[.size] as? UInt64 ?? 0 75 | 76 | // If file has grown 77 | if currentSize > lastPosition { 78 | // Seek to where we last read 79 | fileHandle.seek(toFileOffset: lastPosition) 80 | 81 | // Read new content 82 | if let newData = try? fileHandle.readToEnd() { 83 | if let newContent = String(data: newData, encoding: .utf8) { 84 | // Print new content without trailing newline 85 | if newContent.hasSuffix("\n") { 86 | print(newContent, terminator: "") 87 | } else { 88 | print(newContent) 89 | } 90 | } 91 | } 92 | 93 | // Update position 94 | lastPosition = currentSize 95 | } 96 | 97 | // Handle file rotation (if file became smaller) 98 | else if currentSize < lastPosition { 99 | // File was probably rotated, start from beginning 100 | lastPosition = 0 101 | fileHandle.seek(toFileOffset: 0) 102 | 103 | if let newData = try? fileHandle.readToEnd() { 104 | if let newContent = String(data: newData, encoding: .utf8) { 105 | print(newContent, terminator: "") 106 | } 107 | } 108 | 109 | lastPosition = currentSize 110 | } 111 | } 112 | } catch { 113 | print("Error tailing log file: \(error.localizedDescription)") 114 | } 115 | } 116 | 117 | // MARK: - Info Logs Subcommand 118 | 119 | struct Info: ParsableCommand { 120 | static let configuration = CommandConfiguration( 121 | commandName: "info", 122 | abstract: "View info logs from the daemon" 123 | ) 124 | 125 | @Option(name: .shortAndLong, help: "Number of lines to display from the end of the file") 126 | var lines: Int? 127 | 128 | @Flag(name: .shortAndLong, help: "Follow log file continuously (like tail -f)") 129 | var follow: Bool = false 130 | 131 | func run() throws { 132 | let logPath = "/tmp/lume_daemon.log" 133 | 134 | print("=== Info Logs ===") 135 | 136 | if follow { 137 | // Use tailing functionality to continuously monitor the log 138 | Logs.tailLogFile(path: logPath, initialLines: lines ?? 10) 139 | } else { 140 | // Regular one-time viewing of logs 141 | let content = Logs.readLogFile(path: logPath, lines: lines) 142 | print(content) 143 | } 144 | } 145 | } 146 | 147 | // MARK: - Error Logs Subcommand 148 | 149 | struct Error: ParsableCommand { 150 | static let configuration = CommandConfiguration( 151 | commandName: "error", 152 | abstract: "View error logs from the daemon" 153 | ) 154 | 155 | @Option(name: .shortAndLong, help: "Number of lines to display from the end of the file") 156 | var lines: Int? 157 | 158 | @Flag(name: .shortAndLong, help: "Follow log file continuously (like tail -f)") 159 | var follow: Bool = false 160 | 161 | func run() throws { 162 | let logPath = "/tmp/lume_daemon.error.log" 163 | 164 | print("=== Error Logs ===") 165 | 166 | if follow { 167 | // Use tailing functionality to continuously monitor the log 168 | Logs.tailLogFile(path: logPath, initialLines: lines ?? 10) 169 | } else { 170 | // Regular one-time viewing of logs 171 | let content = Logs.readLogFile(path: logPath, lines: lines) 172 | print(content) 173 | } 174 | } 175 | } 176 | 177 | // MARK: - All Logs Subcommand 178 | 179 | struct All: ParsableCommand { 180 | static let configuration = CommandConfiguration( 181 | commandName: "all", 182 | abstract: "View both info and error logs from the daemon" 183 | ) 184 | 185 | @Option(name: .shortAndLong, help: "Number of lines to display from the end of each file") 186 | var lines: Int? 187 | 188 | @Flag(name: .shortAndLong, help: "Follow log files continuously (like tail -f)") 189 | var follow: Bool = false 190 | 191 | // Custom implementation to tail both logs simultaneously 192 | private func tailBothLogs(infoPath: String, errorPath: String, initialLines: Int? = 10) { 193 | let fileManager = FileManager.default 194 | var infoExists = fileManager.fileExists(atPath: infoPath) 195 | var errorExists = fileManager.fileExists(atPath: errorPath) 196 | 197 | if !infoExists && !errorExists { 198 | print("Neither info nor error log files found") 199 | return 200 | } 201 | 202 | // Print initial content 203 | print("=== Info Logs ===") 204 | if infoExists { 205 | if let lines = initialLines { 206 | let content = (try? String(contentsOfFile: infoPath, encoding: .utf8)) ?? "" 207 | let allLines = content.components(separatedBy: .newlines) 208 | let startIndex = max(0, allLines.count - lines) 209 | let lastLines = Array(allLines[startIndex...]) 210 | print(lastLines.joined(separator: "\n")) 211 | } 212 | } else { 213 | print("Info log file not found") 214 | } 215 | 216 | print("\n=== Error Logs ===") 217 | if errorExists { 218 | if let lines = initialLines { 219 | let content = (try? String(contentsOfFile: errorPath, encoding: .utf8)) ?? "" 220 | let allLines = content.components(separatedBy: .newlines) 221 | let startIndex = max(0, allLines.count - lines) 222 | let lastLines = Array(allLines[startIndex...]) 223 | print(lastLines.joined(separator: "\n")) 224 | } 225 | } else { 226 | print("Error log file not found") 227 | } 228 | 229 | print("\nTailing both log files... Press Ctrl+C to stop") 230 | 231 | // Initialize file handles and positions 232 | var infoHandle: FileHandle? = nil 233 | var errorHandle: FileHandle? = nil 234 | var infoPosition: UInt64 = 0 235 | var errorPosition: UInt64 = 0 236 | 237 | // Set up file handles 238 | if infoExists { 239 | do { 240 | infoHandle = try FileHandle(forReadingFrom: URL(fileURLWithPath: infoPath)) 241 | infoPosition = UInt64(try fileManager.attributesOfItem(atPath: infoPath)[.size] as? UInt64 ?? 0) 242 | } catch { 243 | print("Error opening info log file: \(error.localizedDescription)") 244 | } 245 | } 246 | 247 | if errorExists { 248 | do { 249 | errorHandle = try FileHandle(forReadingFrom: URL(fileURLWithPath: errorPath)) 250 | errorPosition = UInt64(try fileManager.attributesOfItem(atPath: errorPath)[.size] as? UInt64 ?? 0) 251 | } catch { 252 | print("Error opening error log file: \(error.localizedDescription)") 253 | } 254 | } 255 | 256 | // Monitor both files for changes 257 | while true { 258 | Thread.sleep(forTimeInterval: 0.5) 259 | 260 | // Check for new content in info log 261 | if let handle = infoHandle { 262 | do { 263 | // Re-check existence in case file was deleted 264 | infoExists = fileManager.fileExists(atPath: infoPath) 265 | if !infoExists { 266 | print("\n[Info log file was removed]") 267 | infoHandle = nil 268 | continue 269 | } 270 | 271 | let currentSize = try fileManager.attributesOfItem(atPath: infoPath)[.size] as? UInt64 ?? 0 272 | 273 | if currentSize > infoPosition { 274 | handle.seek(toFileOffset: infoPosition) 275 | if let newData = try? handle.readToEnd() { 276 | if let newContent = String(data: newData, encoding: .utf8) { 277 | print("\n--- New Info Log Content ---") 278 | if newContent.hasSuffix("\n") { 279 | print(newContent, terminator: "") 280 | } else { 281 | print(newContent) 282 | } 283 | } 284 | } 285 | infoPosition = currentSize 286 | } else if currentSize < infoPosition { 287 | // File was rotated 288 | print("\n[Info log was rotated]") 289 | infoPosition = 0 290 | handle.seek(toFileOffset: 0) 291 | if let newData = try? handle.readToEnd() { 292 | if let newContent = String(data: newData, encoding: .utf8) { 293 | print("\n--- New Info Log Content ---") 294 | print(newContent, terminator: "") 295 | } 296 | } 297 | infoPosition = currentSize 298 | } 299 | } catch { 300 | print("\nError reading info log: \(error.localizedDescription)") 301 | } 302 | } else if fileManager.fileExists(atPath: infoPath) && !infoExists { 303 | // File exists again after being deleted 304 | do { 305 | infoHandle = try FileHandle(forReadingFrom: URL(fileURLWithPath: infoPath)) 306 | infoPosition = 0 307 | infoExists = true 308 | print("\n[Info log file reappeared]") 309 | } catch { 310 | print("\nError reopening info log: \(error.localizedDescription)") 311 | } 312 | } 313 | 314 | // Check for new content in error log 315 | if let handle = errorHandle { 316 | do { 317 | // Re-check existence in case file was deleted 318 | errorExists = fileManager.fileExists(atPath: errorPath) 319 | if !errorExists { 320 | print("\n[Error log file was removed]") 321 | errorHandle = nil 322 | continue 323 | } 324 | 325 | let currentSize = try fileManager.attributesOfItem(atPath: errorPath)[.size] as? UInt64 ?? 0 326 | 327 | if currentSize > errorPosition { 328 | handle.seek(toFileOffset: errorPosition) 329 | if let newData = try? handle.readToEnd() { 330 | if let newContent = String(data: newData, encoding: .utf8) { 331 | print("\n--- New Error Log Content ---") 332 | if newContent.hasSuffix("\n") { 333 | print(newContent, terminator: "") 334 | } else { 335 | print(newContent) 336 | } 337 | } 338 | } 339 | errorPosition = currentSize 340 | } else if currentSize < errorPosition { 341 | // File was rotated 342 | print("\n[Error log was rotated]") 343 | errorPosition = 0 344 | handle.seek(toFileOffset: 0) 345 | if let newData = try? handle.readToEnd() { 346 | if let newContent = String(data: newData, encoding: .utf8) { 347 | print("\n--- New Error Log Content ---") 348 | print(newContent, terminator: "") 349 | } 350 | } 351 | errorPosition = currentSize 352 | } 353 | } catch { 354 | print("\nError reading error log: \(error.localizedDescription)") 355 | } 356 | } else if fileManager.fileExists(atPath: errorPath) && !errorExists { 357 | // File exists again after being deleted 358 | do { 359 | errorHandle = try FileHandle(forReadingFrom: URL(fileURLWithPath: errorPath)) 360 | errorPosition = 0 361 | errorExists = true 362 | print("\n[Error log file reappeared]") 363 | } catch { 364 | print("\nError reopening error log: \(error.localizedDescription)") 365 | } 366 | } 367 | } 368 | } 369 | 370 | func run() throws { 371 | let infoLogPath = "/tmp/lume_daemon.log" 372 | let errorLogPath = "/tmp/lume_daemon.error.log" 373 | 374 | if follow { 375 | // Use custom tailing implementation for both logs 376 | tailBothLogs(infoPath: infoLogPath, errorPath: errorLogPath, initialLines: lines ?? 10) 377 | } else { 378 | // Regular one-time viewing of logs 379 | let infoContent = Logs.readLogFile(path: infoLogPath, lines: lines) 380 | let errorContent = Logs.readLogFile(path: errorLogPath, lines: lines) 381 | 382 | print("=== Info Logs ===") 383 | print(infoContent) 384 | print("\n=== Error Logs ===") 385 | print(errorContent) 386 | } 387 | } 388 | } 389 | } 390 | ```