From ab02ae46c7ec563c106cf3f7128a0c9dc623ad4c Mon Sep 17 00:00:00 2001 From: Thales Maciel Date: Thu, 12 Mar 2026 22:03:25 -0300 Subject: [PATCH] Add model-native workspace file operations Remove shell-escaped file mutation from the stable workspace flow by adding explicit file and patch tools across the CLI, SDK, and MCP surfaces. This adds workspace file list/read/write plus unified text patch application, backed by new guest and manager file primitives that stay scoped to started workspaces and /workspace only. Patch application is preflighted on the host, file writes stay text-only and bounded, and the existing diff/export/reset semantics remain intact. The milestone also updates the 3.2.0 roadmap, public contract, docs, examples, and versioning, and includes focused coverage for the new helper module and dispatch paths. Validation: - uv lock - UV_CACHE_DIR=.uv-cache make check - UV_CACHE_DIR=.uv-cache make dist-check - real guest-backed smoke for workspace file read, patch apply, exec, export, and delete --- CHANGELOG.md | 10 + README.md | 16 +- docs/first-run.md | 24 +- docs/install.md | 12 +- docs/integrations.md | 5 + docs/public-contract.md | 24 + docs/roadmap/llm-chat-ergonomics.md | 9 +- .../3.2.0-model-native-workspace-file-ops.md | 8 +- examples/python_workspace.py | 21 + pyproject.toml | 2 +- .../linux-x86_64/guest/pyro_guest_agent.py | 166 ++++++ src/pyro_mcp/api.py | 100 ++++ src/pyro_mcp/cli.py | 280 +++++++++ src/pyro_mcp/contract.py | 16 + .../linux-x86_64/guest/pyro_guest_agent.py | 166 ++++++ .../runtime_bundle/linux-x86_64/manifest.json | 2 +- src/pyro_mcp/vm_environments.py | 2 +- src/pyro_mcp/vm_guest.py | 104 ++++ src/pyro_mcp/vm_manager.py | 530 ++++++++++++++++++ src/pyro_mcp/workspace_files.py | 456 +++++++++++++++ tests/test_api.py | 327 ++++++++++- tests/test_cli.py | 194 +++++++ tests/test_public_contract.py | 36 ++ tests/test_server.py | 61 ++ tests/test_vm_manager.py | 85 +++ tests/test_workspace_files.py | 427 ++++++++++++++ uv.lock | 2 +- 27 files changed, 3068 insertions(+), 17 deletions(-) create mode 100644 src/pyro_mcp/workspace_files.py create mode 100644 tests/test_workspace_files.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 27b4d47..2f230fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,16 @@ All notable user-visible changes to `pyro-mcp` are documented here. +## 3.2.0 + +- Added model-native live workspace file operations across the CLI, Python SDK, and MCP server + with `workspace file list|read|write` so agents can inspect and edit text files without shell + quoting tricks or host-side temp-file glue. +- Added `workspace patch apply` for explicit unified text diff application under `/workspace`, + with supported add/modify/delete patch forms and clear recovery guidance via `workspace reset`. +- Kept file operations scoped to started workspaces and `/workspace`, while preserving the existing + diff/export/snapshot/service/shell model around the stable workspace product. + ## 3.1.0 - Added explicit workspace lifecycle stop/start operations across the CLI, Python SDK, and MCP diff --git a/README.md b/README.md index 9171bd6..e129c00 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ It exposes the same runtime in three public forms: - Stable workspace walkthrough GIF: [docs/assets/workspace-first-run.gif](docs/assets/workspace-first-run.gif) - Terminal walkthrough GIF: [docs/assets/first-run.gif](docs/assets/first-run.gif) - PyPI package: [pypi.org/project/pyro-mcp](https://pypi.org/project/pyro-mcp/) -- What's new in 3.1.0: [CHANGELOG.md#310](CHANGELOG.md#310) +- What's new in 3.2.0: [CHANGELOG.md#320](CHANGELOG.md#320) - Host requirements: [docs/host-requirements.md](docs/host-requirements.md) - Integration targets: [docs/integrations.md](docs/integrations.md) - Public contract: [docs/public-contract.md](docs/public-contract.md) @@ -59,7 +59,7 @@ What success looks like: ```bash Platform: linux-x86_64 Runtime: PASS -Catalog version: 3.1.0 +Catalog version: 3.2.0 ... [pull] phase=install environment=debian:12 [pull] phase=ready environment=debian:12 @@ -88,6 +88,8 @@ for the published package, or `uv run pyro ...` from a source checkout. uv tool install pyro-mcp WORKSPACE_ID="$(pyro workspace create debian:12 --seed-path ./repo --json | python -c 'import json,sys; print(json.load(sys.stdin)["workspace_id"])')" pyro workspace sync push "$WORKSPACE_ID" ./changes +pyro workspace file read "$WORKSPACE_ID" note.txt +pyro workspace patch apply "$WORKSPACE_ID" --patch "$(cat fix.patch)" pyro workspace exec "$WORKSPACE_ID" -- cat note.txt pyro workspace snapshot create "$WORKSPACE_ID" checkpoint pyro workspace service start "$WORKSPACE_ID" web --ready-file .web-ready -- sh -lc 'touch .web-ready && while true; do sleep 60; done' @@ -102,6 +104,7 @@ That stable workspace path gives you: - initial host-in seeding with `--seed-path` - later host-in updates with `workspace sync push` +- model-native file inspection and text edits with `workspace file *` and `workspace patch apply` - one-shot commands with `workspace exec` and persistent PTYs with `workspace shell *` - long-running processes with `workspace service *` - explicit checkpoints with `workspace snapshot *` @@ -118,6 +121,7 @@ After the quickstart works: - enable outbound guest networking for one workspace with `uvx --from pyro-mcp pyro workspace create debian:12 --network-policy egress` - add literal or file-backed secrets with `uvx --from pyro-mcp pyro workspace create debian:12 --secret API_TOKEN=expected --secret-file PIP_TOKEN=./token.txt` - map one persisted secret into one exec, shell, or service call with `--secret-env API_TOKEN` +- inspect and edit files without shell quoting with `uvx --from pyro-mcp pyro workspace file read WORKSPACE_ID src/app.py`, `uvx --from pyro-mcp pyro workspace file write WORKSPACE_ID src/app.py --text 'print("hi")'`, and `uvx --from pyro-mcp pyro workspace patch apply WORKSPACE_ID --patch "$(cat fix.patch)"` - diff the live workspace against its create-time baseline with `uvx --from pyro-mcp pyro workspace diff WORKSPACE_ID` - capture a checkpoint with `uvx --from pyro-mcp pyro workspace snapshot create WORKSPACE_ID checkpoint` - reset a broken workspace with `uvx --from pyro-mcp pyro workspace reset WORKSPACE_ID --snapshot checkpoint` @@ -180,7 +184,7 @@ uvx --from pyro-mcp pyro env list Expected output: ```bash -Catalog version: 3.1.0 +Catalog version: 3.2.0 debian:12 [installed|not installed] Debian 12 environment with Git preinstalled for common agent workflows. debian:12-base [installed|not installed] Minimal Debian 12 environment for shell and core Unix tooling. debian:12-build [installed|not installed] Debian 12 environment with Git and common build tools preinstalled. @@ -260,6 +264,10 @@ pyro workspace create debian:12 --network-policy egress pyro workspace create debian:12 --seed-path ./repo --secret API_TOKEN=expected pyro workspace create debian:12 --network-policy egress+published-ports pyro workspace sync push WORKSPACE_ID ./changes --dest src +pyro workspace file list WORKSPACE_ID src --recursive +pyro workspace file read WORKSPACE_ID src/note.txt +pyro workspace file write WORKSPACE_ID src/app.py --text 'print("hi")' +pyro workspace patch apply WORKSPACE_ID --patch "$(cat fix.patch)" pyro workspace exec WORKSPACE_ID -- cat src/note.txt pyro workspace exec WORKSPACE_ID --secret-env API_TOKEN -- sh -lc 'test "$API_TOKEN" = "expected"' pyro workspace diff WORKSPACE_ID @@ -292,7 +300,7 @@ Persistent workspaces start in `/workspace` and keep command history until you d machine consumption, add `--json` and read the returned `workspace_id`. Use `--seed-path` when you want the workspace to start from a host directory or a local `.tar` / `.tar.gz` / `.tgz` archive instead of an empty workspace. Use `pyro workspace sync push` when you want to import -later host-side changes into a started workspace. Sync is non-atomic in `3.1.0`; if it fails +later host-side changes into a started workspace. Sync is non-atomic in `3.2.0`; if it fails partway through, prefer `pyro workspace reset` to recover from `baseline` or one named snapshot. Use `pyro workspace diff` to compare the live `/workspace` tree to its immutable create-time baseline, and `pyro workspace export` to copy one changed file or directory back to the host. Use diff --git a/docs/first-run.md b/docs/first-run.md index 82216f3..51c6bb3 100644 --- a/docs/first-run.md +++ b/docs/first-run.md @@ -22,7 +22,7 @@ Networking: tun=yes ip_forward=yes ```bash $ uvx --from pyro-mcp pyro env list -Catalog version: 3.1.0 +Catalog version: 3.2.0 debian:12 [installed|not installed] Debian 12 environment with Git preinstalled for common agent workflows. debian:12-base [installed|not installed] Minimal Debian 12 environment for shell and core Unix tooling. debian:12-build [installed|not installed] Debian 12 environment with Git and common build tools preinstalled. @@ -76,6 +76,8 @@ a source checkout. $ uvx --from pyro-mcp pyro workspace create debian:12 --seed-path ./repo --json | tee /tmp/pyro-workspace.json $ export WORKSPACE_ID="$(python -c 'import json,sys; print(json.load(sys.stdin)["workspace_id"])' < /tmp/pyro-workspace.json)" $ uvx --from pyro-mcp pyro workspace sync push "$WORKSPACE_ID" ./changes +$ uvx --from pyro-mcp pyro workspace file read "$WORKSPACE_ID" note.txt +$ uvx --from pyro-mcp pyro workspace patch apply "$WORKSPACE_ID" --patch "$(cat fix.patch)" $ uvx --from pyro-mcp pyro workspace exec "$WORKSPACE_ID" -- cat note.txt $ uvx --from pyro-mcp pyro workspace snapshot create "$WORKSPACE_ID" checkpoint $ uvx --from pyro-mcp pyro workspace service start "$WORKSPACE_ID" web --ready-file .web-ready -- sh -lc 'touch .web-ready && while true; do sleep 60; done' @@ -95,6 +97,10 @@ $ uvx --from pyro-mcp pyro workspace delete "$WORKSPACE_ID" $ uvx --from pyro-mcp pyro demo $ uvx --from pyro-mcp pyro workspace create debian:12 --seed-path ./repo $ uvx --from pyro-mcp pyro workspace sync push WORKSPACE_ID ./changes +$ uvx --from pyro-mcp pyro workspace file list WORKSPACE_ID src --recursive +$ uvx --from pyro-mcp pyro workspace file read WORKSPACE_ID src/app.py +$ uvx --from pyro-mcp pyro workspace file write WORKSPACE_ID src/app.py --text 'print("hi")' +$ uvx --from pyro-mcp pyro workspace patch apply WORKSPACE_ID --patch "$(cat fix.patch)" $ uvx --from pyro-mcp pyro workspace create debian:12 --network-policy egress $ uvx --from pyro-mcp pyro workspace create debian:12 --secret API_TOKEN=expected --secret-file PIP_TOKEN=./token.txt $ uvx --from pyro-mcp pyro workspace exec WORKSPACE_ID --secret-env API_TOKEN -- sh -lc 'test "$API_TOKEN" = "expected"' @@ -128,6 +134,17 @@ Command count: 0 $ uvx --from pyro-mcp pyro workspace sync push WORKSPACE_ID ./changes --dest src [workspace-sync] workspace_id=... mode=directory source=... destination=/workspace/src entry_count=... bytes_written=... execution_mode=guest_vsock +$ uvx --from pyro-mcp pyro workspace file list WORKSPACE_ID src --recursive +Workspace file path: /workspace/src +- /workspace/src/note.txt [file] bytes=... + +$ uvx --from pyro-mcp pyro workspace file read WORKSPACE_ID src/note.txt +hello from synced workspace +[workspace-file-read] workspace_id=... path=/workspace/src/note.txt size_bytes=... truncated=False execution_mode=guest_vsock + +$ uvx --from pyro-mcp pyro workspace patch apply WORKSPACE_ID --patch "$(cat fix.patch)" +[workspace-patch] workspace_id=... total=... added=... modified=... deleted=... execution_mode=guest_vsock + $ uvx --from pyro-mcp pyro workspace exec WORKSPACE_ID -- cat src/note.txt hello from synced workspace [workspace-exec] workspace_id=... sequence=1 cwd=/workspace execution_mode=guest_vsock exit_code=0 duration_ms=... @@ -231,11 +248,12 @@ State: started Use `--seed-path` when the workspace should start from a host directory or a local `.tar` / `.tar.gz` / `.tgz` archive instead of an empty `/workspace`. Use `pyro workspace sync push` when you need to import later host-side changes into a started -workspace. Sync is non-atomic in `3.1.0`; if it fails partway through, prefer `pyro workspace reset` +workspace. Sync is non-atomic in `3.2.0`; if it fails partway through, prefer `pyro workspace reset` to recover from `baseline` or one named snapshot. Use `pyro workspace diff` to compare the current `/workspace` tree to its immutable create-time baseline, `pyro workspace snapshot *` to create named checkpoints, and `pyro workspace export` to copy one changed file or directory back to the -host. Use `pyro workspace exec` for one-shot commands and `pyro workspace shell *` when you +host. Use `pyro workspace file *` and `pyro workspace patch apply` for model-native text edits, +`pyro workspace exec` for one-shot commands, and `pyro workspace shell *` when you need a persistent interactive PTY session in that same workspace. Use `pyro workspace service *` when the workspace needs long-running background processes with typed readiness checks. Internal service state and logs stay outside `/workspace`, so service runtime data does not appear in diff --git a/docs/install.md b/docs/install.md index 092fcc4..67a35cb 100644 --- a/docs/install.md +++ b/docs/install.md @@ -85,7 +85,7 @@ uvx --from pyro-mcp pyro env list Expected output: ```bash -Catalog version: 3.1.0 +Catalog version: 3.2.0 debian:12 [installed|not installed] Debian 12 environment with Git preinstalled for common agent workflows. debian:12-base [installed|not installed] Minimal Debian 12 environment for shell and core Unix tooling. debian:12-build [installed|not installed] Debian 12 environment with Git and common build tools preinstalled. @@ -142,6 +142,8 @@ for the published package, or `uv run pyro ...` from a source checkout. uv tool install pyro-mcp WORKSPACE_ID="$(pyro workspace create debian:12 --seed-path ./repo --json | python -c 'import json,sys; print(json.load(sys.stdin)["workspace_id"])')" pyro workspace sync push "$WORKSPACE_ID" ./changes +pyro workspace file read "$WORKSPACE_ID" note.txt +pyro workspace patch apply "$WORKSPACE_ID" --patch "$(cat fix.patch)" pyro workspace exec "$WORKSPACE_ID" -- cat note.txt pyro workspace snapshot create "$WORKSPACE_ID" checkpoint pyro workspace service start "$WORKSPACE_ID" web --ready-file .web-ready -- sh -lc 'touch .web-ready && while true; do sleep 60; done' @@ -154,6 +156,7 @@ This is the stable persistent-workspace contract: - `workspace create` seeds `/workspace` - `workspace sync push` imports later host-side changes +- `workspace file *` and `workspace patch apply` cover model-native text inspection and edits - `workspace exec` and `workspace shell *` keep work inside one sandbox - `workspace service *` manages long-running processes with typed readiness - `workspace snapshot *` and `workspace reset` make reset-over-repair explicit @@ -208,6 +211,7 @@ After the CLI path works, you can move on to: - live workspace updates: `pyro workspace sync push WORKSPACE_ID ./changes` - guest networking policy: `pyro workspace create debian:12 --network-policy egress` - workspace secrets: `pyro workspace create debian:12 --secret API_TOKEN=expected --secret-file PIP_TOKEN=./token.txt` +- model-native file editing: `pyro workspace file read WORKSPACE_ID src/app.py`, `pyro workspace file write WORKSPACE_ID src/app.py --text 'print("hi")'`, and `pyro workspace patch apply WORKSPACE_ID --patch "$(cat fix.patch)"` - baseline diff: `pyro workspace diff WORKSPACE_ID` - snapshots and reset: `pyro workspace snapshot create WORKSPACE_ID checkpoint` and `pyro workspace reset WORKSPACE_ID --snapshot checkpoint` - host export: `pyro workspace export WORKSPACE_ID note.txt --output ./note.txt` @@ -229,6 +233,10 @@ pyro workspace create debian:12 --network-policy egress pyro workspace create debian:12 --seed-path ./repo --secret API_TOKEN=expected pyro workspace create debian:12 --network-policy egress+published-ports pyro workspace sync push WORKSPACE_ID ./changes --dest src +pyro workspace file list WORKSPACE_ID src --recursive +pyro workspace file read WORKSPACE_ID src/note.txt +pyro workspace file write WORKSPACE_ID src/app.py --text 'print("hi")' +pyro workspace patch apply WORKSPACE_ID --patch "$(cat fix.patch)" pyro workspace exec WORKSPACE_ID -- cat src/note.txt pyro workspace exec WORKSPACE_ID --secret-env API_TOKEN -- sh -lc 'test "$API_TOKEN" = "expected"' pyro workspace diff WORKSPACE_ID @@ -261,7 +269,7 @@ Workspace commands default to the persistent `/workspace` directory inside the g the identifier programmatically, use `--json` and read the `workspace_id` field. Use `--seed-path` when the workspace should start from a host directory or a local `.tar` / `.tar.gz` / `.tgz` archive. Use `pyro workspace sync push` for later host-side changes to a started workspace. Sync -is non-atomic in `3.1.0`; if it fails partway through, prefer `pyro workspace reset` to recover +is non-atomic in `3.2.0`; if it fails partway through, prefer `pyro workspace reset` to recover from `baseline` or one named snapshot. Use `pyro workspace diff` to compare the current workspace tree to its immutable create-time baseline, `pyro workspace snapshot *` to capture named checkpoints, and `pyro workspace export` to copy one changed file or directory back to the host. Use diff --git a/docs/integrations.md b/docs/integrations.md index ffaa2a3..f047697 100644 --- a/docs/integrations.md +++ b/docs/integrations.md @@ -32,6 +32,7 @@ Recommended surface: - `vm_run` - `workspace_create(seed_path=...)` + `workspace_sync_push` + `workspace_exec` when the agent needs persistent workspace state +- `workspace_file_list` / `workspace_file_read` / `workspace_file_write` / `workspace_patch_apply` when the agent needs model-native file inspection and text edits inside one live workspace - `workspace_create(..., secrets=...)` + `workspace_exec(..., secret_env=...)` when the workspace needs private tokens or authenticated setup - `workspace_create(..., network_policy="egress+published-ports")` + `start_service(..., published_ports=[...])` when the host must probe one workspace service - `workspace_diff` + `workspace_export` when the agent needs explicit baseline comparison or host-out file transfer @@ -73,6 +74,7 @@ Recommended default: - `Pyro.run_in_vm(...)` - `Pyro.create_workspace(seed_path=...)` + `Pyro.push_workspace_sync(...)` + `Pyro.exec_workspace(...)` when repeated workspace commands are required +- `Pyro.list_workspace_files(...)` / `Pyro.read_workspace_file(...)` / `Pyro.write_workspace_file(...)` / `Pyro.apply_workspace_patch(...)` when the agent needs model-native file inspection and text edits inside one live workspace - `Pyro.create_workspace(..., secrets=...)` + `Pyro.exec_workspace(..., secret_env=...)` when the workspace needs private tokens or authenticated setup - `Pyro.create_workspace(..., network_policy="egress+published-ports")` + `Pyro.start_service(..., published_ports=[...])` when the host must probe one workspace service - `Pyro.diff_workspace(...)` + `Pyro.export_workspace(...)` when the agent needs baseline comparison or host-out file transfer @@ -88,6 +90,9 @@ Lifecycle note: `/workspace` that starts from host content - use `push_workspace_sync(...)` when later host-side changes need to be imported into that running workspace without recreating it +- use `list_workspace_files(...)`, `read_workspace_file(...)`, `write_workspace_file(...)`, and + `apply_workspace_patch(...)` when the agent should inspect or edit workspace files without shell + quoting tricks - use `create_workspace(..., secrets=...)` plus `secret_env` on exec, shell, or service start when the agent needs private tokens or authenticated startup inside that workspace - use `create_workspace(..., network_policy="egress+published-ports")` plus diff --git a/docs/public-contract.md b/docs/public-contract.md index e5f5fca..bb4770e 100644 --- a/docs/public-contract.md +++ b/docs/public-contract.md @@ -29,7 +29,11 @@ Top-level commands: - `pyro workspace stop` - `pyro workspace start` - `pyro workspace exec` +- `pyro workspace file list` +- `pyro workspace file read` +- `pyro workspace file write` - `pyro workspace export` +- `pyro workspace patch apply` - `pyro workspace disk export` - `pyro workspace disk list` - `pyro workspace disk read` @@ -79,6 +83,9 @@ Behavioral guarantees: - `pyro workspace sync push WORKSPACE_ID SOURCE_PATH [--dest WORKSPACE_PATH]` imports later host-side directory or archive content into a started workspace. - `pyro workspace stop WORKSPACE_ID` stops one persistent workspace without deleting its `/workspace`, snapshots, or command history. - `pyro workspace start WORKSPACE_ID` restarts one stopped workspace without resetting `/workspace`. +- `pyro workspace file list WORKSPACE_ID [PATH] [--recursive]` returns metadata for one live path under `/workspace`. +- `pyro workspace file read WORKSPACE_ID PATH [--max-bytes N]` reads one regular text file under `/workspace`. +- `pyro workspace file write WORKSPACE_ID PATH --text TEXT` creates or replaces one regular text file under `/workspace`, creating missing parent directories automatically. - `pyro workspace export WORKSPACE_ID PATH --output HOST_PATH` exports one file or directory from `/workspace` back to the host. - `pyro workspace disk export WORKSPACE_ID --output HOST_PATH` copies the stopped guest-backed workspace rootfs as raw ext4 to the host. - `pyro workspace disk list WORKSPACE_ID [PATH] [--recursive]` inspects a stopped guest-backed workspace rootfs offline without booting the guest. @@ -92,6 +99,7 @@ Behavioral guarantees: - `pyro workspace exec --secret-env SECRET_NAME[=ENV_VAR]` maps one persisted secret into one exec call. - `pyro workspace service start --secret-env SECRET_NAME[=ENV_VAR]` maps one persisted secret into one service start call. - `pyro workspace exec` runs in the persistent `/workspace` for that workspace and does not auto-clean. +- `pyro workspace patch apply WORKSPACE_ID --patch TEXT` applies one unified text patch with add/modify/delete operations under `/workspace`. - `pyro workspace shell open --secret-env SECRET_NAME[=ENV_VAR]` maps one persisted secret into the opened shell environment. - `pyro workspace shell *` manages persistent PTY sessions inside a started workspace. - `pyro workspace logs` returns persisted command history for that workspace until `pyro workspace delete`. @@ -121,7 +129,11 @@ Supported public entrypoints: - `Pyro.push_workspace_sync(workspace_id, source_path, *, dest="/workspace")` - `Pyro.stop_workspace(workspace_id)` - `Pyro.start_workspace(workspace_id)` +- `Pyro.list_workspace_files(workspace_id, path="/workspace", recursive=False)` +- `Pyro.read_workspace_file(workspace_id, path, *, max_bytes=65536)` +- `Pyro.write_workspace_file(workspace_id, path, *, text)` - `Pyro.export_workspace(workspace_id, path, *, output_path)` +- `Pyro.apply_workspace_patch(workspace_id, *, patch)` - `Pyro.export_workspace_disk(workspace_id, *, output_path)` - `Pyro.list_workspace_disk(workspace_id, path="/workspace", recursive=False)` - `Pyro.read_workspace_disk(workspace_id, path, *, max_bytes=65536)` @@ -165,7 +177,11 @@ Stable public method names: - `push_workspace_sync(workspace_id, source_path, *, dest="/workspace")` - `stop_workspace(workspace_id)` - `start_workspace(workspace_id)` +- `list_workspace_files(workspace_id, path="/workspace", recursive=False)` +- `read_workspace_file(workspace_id, path, *, max_bytes=65536)` +- `write_workspace_file(workspace_id, path, *, text)` - `export_workspace(workspace_id, path, *, output_path)` +- `apply_workspace_patch(workspace_id, *, patch)` - `export_workspace_disk(workspace_id, *, output_path)` - `list_workspace_disk(workspace_id, path="/workspace", recursive=False)` - `read_workspace_disk(workspace_id, path, *, max_bytes=65536)` @@ -209,7 +225,9 @@ Behavioral defaults: - `Pyro.push_workspace_sync(...)` imports later host-side directory or archive content into a started workspace. - `Pyro.stop_workspace(...)` stops one persistent workspace without deleting its `/workspace`, snapshots, or command history. - `Pyro.start_workspace(...)` restarts one stopped workspace without resetting `/workspace`. +- `Pyro.list_workspace_files(...)`, `Pyro.read_workspace_file(...)`, and `Pyro.write_workspace_file(...)` provide structured live `/workspace` inspection and text edits without shell quoting. - `Pyro.export_workspace(...)` exports one file or directory from `/workspace` to an explicit host path. +- `Pyro.apply_workspace_patch(...)` applies unified text patches for add/modify/delete operations under `/workspace`. - `Pyro.export_workspace_disk(...)` copies the stopped guest-backed workspace rootfs as raw ext4 to an explicit host path. - `Pyro.list_workspace_disk(...)` inspects a stopped guest-backed workspace rootfs offline without booting the guest. - `Pyro.read_workspace_disk(...)` reads one regular file from a stopped guest-backed workspace rootfs offline. @@ -256,7 +274,11 @@ Persistent workspace tools: - `workspace_stop` - `workspace_start` - `workspace_exec` +- `workspace_file_list` +- `workspace_file_read` +- `workspace_file_write` - `workspace_export` +- `workspace_patch_apply` - `workspace_disk_export` - `workspace_disk_list` - `workspace_disk_read` @@ -291,7 +313,9 @@ Behavioral defaults: - `workspace_sync_push` imports later host-side directory or archive content into a started workspace, with an optional `dest` under `/workspace`. - `workspace_stop` stops one persistent workspace without deleting its `/workspace`, snapshots, or command history. - `workspace_start` restarts one stopped workspace without resetting `/workspace`. +- `workspace_file_list`, `workspace_file_read`, and `workspace_file_write` provide structured live `/workspace` inspection and text edits without shell quoting. - `workspace_export` exports one file or directory from `/workspace` to an explicit host path. +- `workspace_patch_apply` applies unified text patches for add/modify/delete operations under `/workspace`. - `workspace_disk_export` copies the stopped guest-backed workspace rootfs as raw ext4 to an explicit host path. - `workspace_disk_list` inspects a stopped guest-backed workspace rootfs offline without booting the guest. - `workspace_disk_read` reads one regular file from a stopped guest-backed workspace rootfs offline. diff --git a/docs/roadmap/llm-chat-ergonomics.md b/docs/roadmap/llm-chat-ergonomics.md index 07549f7..5dcd8cb 100644 --- a/docs/roadmap/llm-chat-ergonomics.md +++ b/docs/roadmap/llm-chat-ergonomics.md @@ -6,7 +6,7 @@ goal: make the core agent-workspace use cases feel trivial from a chat-driven LLM interface. -Current baseline is `3.1.0`: +Current baseline is `3.2.0`: - the stable workspace contract exists across CLI, SDK, and MCP - one-shot `pyro run` still exists as the narrow entrypoint @@ -45,12 +45,17 @@ More concretely, the model should not need to: ## Milestones -1. [`3.2.0` Model-Native Workspace File Ops](llm-chat-ergonomics/3.2.0-model-native-workspace-file-ops.md) +1. [`3.2.0` Model-Native Workspace File Ops](llm-chat-ergonomics/3.2.0-model-native-workspace-file-ops.md) - Done 2. [`3.3.0` Workspace Naming And Discovery](llm-chat-ergonomics/3.3.0-workspace-naming-and-discovery.md) 3. [`3.4.0` Tool Profiles And Canonical Chat Flows](llm-chat-ergonomics/3.4.0-tool-profiles-and-canonical-chat-flows.md) 4. [`3.5.0` Chat-Friendly Shell Output](llm-chat-ergonomics/3.5.0-chat-friendly-shell-output.md) 5. [`3.6.0` Use-Case Recipes And Smoke Packs](llm-chat-ergonomics/3.6.0-use-case-recipes-and-smoke-packs.md) +Completed so far: + +- `3.2.0` added model-native `workspace file *` and `workspace patch apply` so chat-driven agents + can inspect and edit `/workspace` without shell-escaped file mutation flows. + ## Expected Outcome After this roadmap, the product should still look like an agent workspace, not diff --git a/docs/roadmap/llm-chat-ergonomics/3.2.0-model-native-workspace-file-ops.md b/docs/roadmap/llm-chat-ergonomics/3.2.0-model-native-workspace-file-ops.md index 056528b..ea4e59c 100644 --- a/docs/roadmap/llm-chat-ergonomics/3.2.0-model-native-workspace-file-ops.md +++ b/docs/roadmap/llm-chat-ergonomics/3.2.0-model-native-workspace-file-ops.md @@ -1,6 +1,6 @@ # `3.2.0` Model-Native Workspace File Ops -Status: Planned +Status: Done ## Goal @@ -57,3 +57,9 @@ Planned additions: - docs and examples that show model-native file editing instead of shell-heavy file writes - at least one real smoke scenario centered on a repro-plus-fix loop + +## Outcome + +- shipped `workspace file list|read|write` and `workspace patch apply` across CLI, SDK, and MCP +- kept the surface scoped to started workspaces and `/workspace` +- updated docs, help text, examples, and smoke coverage around model-native editing flows diff --git a/examples/python_workspace.py b/examples/python_workspace.py index 335c125..019a1d6 100644 --- a/examples/python_workspace.py +++ b/examples/python_workspace.py @@ -31,6 +31,27 @@ def main() -> None: workspace_id = str(created["workspace_id"]) try: pyro.push_workspace_sync(workspace_id, sync_dir) + files = pyro.list_workspace_files(workspace_id, path="/workspace", recursive=True) + print(f"workspace_entries={len(files['entries'])}") + note = pyro.read_workspace_file(workspace_id, "note.txt") + print(note["content"], end="") + written = pyro.write_workspace_file( + workspace_id, + "src/app.py", + text="print('hello from file ops')\n", + ) + print(f"written_bytes={written['bytes_written']}") + patched = pyro.apply_workspace_patch( + workspace_id, + patch=( + "--- a/note.txt\n" + "+++ b/note.txt\n" + "@@ -1 +1 @@\n" + "-hello from sync\n" + "+hello from patch\n" + ), + ) + print(f"patch_changed={patched['changed']}") result = pyro.exec_workspace(workspace_id, command="cat note.txt") print(result["stdout"], end="") secret_result = pyro.exec_workspace( diff --git a/pyproject.toml b/pyproject.toml index b4a1ffe..6314939 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "pyro-mcp" -version = "3.1.0" +version = "3.2.0" description = "Stable Firecracker workspaces, one-shot sandboxes, and MCP tools for coding agents." readme = "README.md" license = { file = "LICENSE" } diff --git a/runtime_sources/linux-x86_64/guest/pyro_guest_agent.py b/runtime_sources/linux-x86_64/guest/pyro_guest_agent.py index 607c76d..a469d7b 100644 --- a/runtime_sources/linux-x86_64/guest/pyro_guest_agent.py +++ b/runtime_sources/linux-x86_64/guest/pyro_guest_agent.py @@ -3,6 +3,7 @@ from __future__ import annotations +import base64 import codecs import fcntl import io @@ -31,6 +32,7 @@ WORKSPACE_ROOT = PurePosixPath("/workspace") SHELL_ROOT = Path("/run/pyro-shells") SERVICE_ROOT = Path("/run/pyro-services") SECRET_ROOT = Path("/run/pyro-secrets") +WORKSPACE_FILE_MAX_BYTES = 1024 * 1024 SERVICE_NAME_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]{0,63}$") SHELL_SIGNAL_MAP = { "HUP": signal.SIGHUP, @@ -328,6 +330,153 @@ def _prepare_export_archive(path: str) -> dict[str, Any]: raise +def _workspace_entry(path_text: str, host_path: Path) -> dict[str, Any]: + try: + stat_result = os.lstat(host_path) + except FileNotFoundError as exc: + raise RuntimeError(f"workspace path does not exist: {path_text}") from exc + if host_path.is_symlink(): + return { + "path": path_text, + "artifact_type": "symlink", + "size_bytes": stat_result.st_size, + "link_target": os.readlink(host_path), + } + if host_path.is_dir(): + return { + "path": path_text, + "artifact_type": "directory", + "size_bytes": 0, + "link_target": None, + } + if host_path.is_file(): + return { + "path": path_text, + "artifact_type": "file", + "size_bytes": stat_result.st_size, + "link_target": None, + } + raise RuntimeError(f"unsupported workspace path type: {path_text}") + + +def _join_workspace_path(base: str, child_name: str) -> str: + base_path = PurePosixPath(base) + return str(base_path / child_name) if str(base_path) != "/" else f"/{child_name}" + + +def _list_workspace(path: str, *, recursive: bool) -> dict[str, Any]: + normalized_path, host_path = _normalize_destination(path) + entry = _workspace_entry(str(normalized_path), host_path) + if entry["artifact_type"] != "directory": + return { + "path": str(normalized_path), + "artifact_type": entry["artifact_type"], + "entries": [entry], + } + entries: list[dict[str, Any]] = [] + + def walk(current_path: str, current_host_path: Path) -> None: + children: list[tuple[dict[str, Any], Path]] = [] + with os.scandir(current_host_path) as iterator: + for child in iterator: + child_host_path = Path(child.path) + children.append( + ( + _workspace_entry( + _join_workspace_path(current_path, child.name), + child_host_path, + ), + child_host_path, + ) + ) + children.sort(key=lambda item: str(item[0]["path"])) + for child_entry, child_host_path in children: + entries.append(child_entry) + if recursive and child_entry["artifact_type"] == "directory": + walk(str(child_entry["path"]), child_host_path) + + walk(str(normalized_path), host_path) + return { + "path": str(normalized_path), + "artifact_type": "directory", + "entries": entries, + } + + +def _read_workspace_file(path: str, *, max_bytes: int) -> dict[str, Any]: + if max_bytes <= 0: + raise RuntimeError("max_bytes must be positive") + if max_bytes > WORKSPACE_FILE_MAX_BYTES: + raise RuntimeError( + f"max_bytes must be at most {WORKSPACE_FILE_MAX_BYTES} bytes" + ) + normalized_path, host_path = _normalize_destination(path) + entry = _workspace_entry(str(normalized_path), host_path) + if entry["artifact_type"] != "file": + raise RuntimeError("workspace file read only supports regular files") + raw_bytes = host_path.read_bytes() + if len(raw_bytes) > max_bytes: + raise RuntimeError( + f"workspace file exceeds the maximum supported size of {max_bytes} bytes" + ) + return { + "path": str(normalized_path), + "size_bytes": len(raw_bytes), + "content_b64": base64.b64encode(raw_bytes).decode("ascii"), + } + + +def _ensure_no_symlink_parents_for_write(root: Path, target_path: Path, path_text: str) -> None: + relative_path = target_path.relative_to(root) + current = root + for part in relative_path.parts[:-1]: + current = current / part + if current.is_symlink(): + raise RuntimeError( + f"workspace path would traverse through a symlinked parent: {path_text}" + ) + + +def _write_workspace_file(path: str, *, text: str) -> dict[str, Any]: + raw_bytes = text.encode("utf-8") + if len(raw_bytes) > WORKSPACE_FILE_MAX_BYTES: + raise RuntimeError( + f"text must be at most {WORKSPACE_FILE_MAX_BYTES} bytes when encoded as UTF-8" + ) + normalized_path, host_path = _normalize_destination(path) + _ensure_no_symlink_parents_for_write(Path("/workspace"), host_path, str(normalized_path)) + if host_path.exists() or host_path.is_symlink(): + entry = _workspace_entry(str(normalized_path), host_path) + if entry["artifact_type"] != "file": + raise RuntimeError("workspace file write only supports regular file targets") + host_path.parent.mkdir(parents=True, exist_ok=True) + with tempfile.NamedTemporaryFile( + prefix=".pyro-workspace-write-", + dir=host_path.parent, + delete=False, + ) as handle: + temp_path = Path(handle.name) + handle.write(raw_bytes) + os.replace(temp_path, host_path) + return { + "path": str(normalized_path), + "size_bytes": len(raw_bytes), + "bytes_written": len(raw_bytes), + } + + +def _delete_workspace_path(path: str) -> dict[str, Any]: + normalized_path, host_path = _normalize_destination(path) + entry = _workspace_entry(str(normalized_path), host_path) + if entry["artifact_type"] == "directory": + raise RuntimeError("workspace file delete does not support directories") + host_path.unlink(missing_ok=False) + return { + "path": str(normalized_path), + "deleted": True, + } + + def _run_command( command: str, timeout_seconds: int, @@ -931,6 +1080,23 @@ def _dispatch(request: dict[str, Any], conn: socket.socket) -> dict[str, Any]: raise RuntimeError("archive_size must not be negative") payload = _read_exact(conn, archive_size) return _install_secrets_archive(payload) + if action == "list_workspace": + return _list_workspace( + str(request.get("path", "/workspace")), + recursive=bool(request.get("recursive", False)), + ) + if action == "read_workspace_file": + return _read_workspace_file( + str(request.get("path", "/workspace")), + max_bytes=int(request.get("max_bytes", WORKSPACE_FILE_MAX_BYTES)), + ) + if action == "write_workspace_file": + return _write_workspace_file( + str(request.get("path", "/workspace")), + text=str(request.get("text", "")), + ) + if action == "delete_workspace_path": + return _delete_workspace_path(str(request.get("path", "/workspace"))) if action == "open_shell": shell_id = str(request.get("shell_id", "")).strip() if shell_id == "": diff --git a/src/pyro_mcp/api.py b/src/pyro_mcp/api.py index 78042b7..3a63bc7 100644 --- a/src/pyro_mcp/api.py +++ b/src/pyro_mcp/api.py @@ -157,6 +157,56 @@ class Pyro: def diff_workspace(self, workspace_id: str) -> dict[str, Any]: return self._manager.diff_workspace(workspace_id) + def list_workspace_files( + self, + workspace_id: str, + *, + path: str = "/workspace", + recursive: bool = False, + ) -> dict[str, Any]: + return self._manager.list_workspace_files( + workspace_id, + path=path, + recursive=recursive, + ) + + def read_workspace_file( + self, + workspace_id: str, + path: str, + *, + max_bytes: int = 65536, + ) -> dict[str, Any]: + return self._manager.read_workspace_file( + workspace_id, + path, + max_bytes=max_bytes, + ) + + def write_workspace_file( + self, + workspace_id: str, + path: str, + *, + text: str, + ) -> dict[str, Any]: + return self._manager.write_workspace_file( + workspace_id, + path, + text=text, + ) + + def apply_workspace_patch( + self, + workspace_id: str, + *, + patch: str, + ) -> dict[str, Any]: + return self._manager.apply_workspace_patch( + workspace_id, + patch=patch, + ) + def export_workspace_disk( self, workspace_id: str, @@ -529,6 +579,56 @@ class Pyro: """Compare `/workspace` to the immutable create-time baseline.""" return self.diff_workspace(workspace_id) + @server.tool() + async def workspace_file_list( + workspace_id: str, + path: str = "/workspace", + recursive: bool = False, + ) -> dict[str, Any]: + """List metadata for files and directories under one live workspace path.""" + return self.list_workspace_files( + workspace_id, + path=path, + recursive=recursive, + ) + + @server.tool() + async def workspace_file_read( + workspace_id: str, + path: str, + max_bytes: int = 65536, + ) -> dict[str, Any]: + """Read one regular text file from a live workspace path.""" + return self.read_workspace_file( + workspace_id, + path, + max_bytes=max_bytes, + ) + + @server.tool() + async def workspace_file_write( + workspace_id: str, + path: str, + text: str, + ) -> dict[str, Any]: + """Create or replace one regular text file under `/workspace`.""" + return self.write_workspace_file( + workspace_id, + path, + text=text, + ) + + @server.tool() + async def workspace_patch_apply( + workspace_id: str, + patch: str, + ) -> dict[str, Any]: + """Apply a unified text patch inside one live workspace.""" + return self.apply_workspace_patch( + workspace_id, + patch=patch, + ) + @server.tool() async def workspace_disk_export( workspace_id: str, diff --git a/src/pyro_mcp/cli.py b/src/pyro_mcp/cli.py index e866053..ac4416f 100644 --- a/src/pyro_mcp/cli.py +++ b/src/pyro_mcp/cli.py @@ -22,6 +22,7 @@ from pyro_mcp.vm_manager import ( DEFAULT_SERVICE_READY_TIMEOUT_SECONDS, DEFAULT_VCPU_COUNT, DEFAULT_WORKSPACE_DISK_READ_MAX_BYTES, + DEFAULT_WORKSPACE_FILE_READ_MAX_BYTES, WORKSPACE_GUEST_PATH, WORKSPACE_SHELL_SIGNAL_NAMES, ) @@ -322,6 +323,72 @@ def _print_workspace_diff_human(payload: dict[str, Any]) -> None: print(patch, end="" if patch.endswith("\n") else "\n") +def _print_workspace_file_list_human(payload: dict[str, Any]) -> None: + print( + f"Workspace path: {str(payload.get('path', WORKSPACE_GUEST_PATH))} " + f"(recursive={'yes' if bool(payload.get('recursive')) else 'no'})" + ) + entries = payload.get("entries") + if not isinstance(entries, list) or not entries: + print("No workspace entries found.") + return + for entry in entries: + if not isinstance(entry, dict): + continue + line = ( + f"{str(entry.get('path', 'unknown'))} " + f"[{str(entry.get('artifact_type', 'unknown'))}] " + f"size={int(entry.get('size_bytes', 0))}" + ) + link_target = entry.get("link_target") + if isinstance(link_target, str) and link_target != "": + line += f" -> {link_target}" + print(line) + + +def _print_workspace_file_read_human(payload: dict[str, Any]) -> None: + _write_stream(str(payload.get("content", "")), stream=sys.stdout) + print( + "[workspace-file-read] " + f"workspace_id={str(payload.get('workspace_id', 'unknown'))} " + f"path={str(payload.get('path', 'unknown'))} " + f"size_bytes={int(payload.get('size_bytes', 0))} " + f"truncated={'yes' if bool(payload.get('truncated', False)) else 'no'}", + file=sys.stderr, + flush=True, + ) + + +def _print_workspace_file_write_human(payload: dict[str, Any]) -> None: + print( + "[workspace-file-write] " + f"workspace_id={str(payload.get('workspace_id', 'unknown'))} " + f"path={str(payload.get('path', 'unknown'))} " + f"bytes_written={int(payload.get('bytes_written', 0))} " + f"execution_mode={str(payload.get('execution_mode', 'unknown'))}" + ) + + +def _print_workspace_patch_human(payload: dict[str, Any]) -> None: + summary = payload.get("summary") + if isinstance(summary, dict): + print( + "[workspace-patch] " + f"workspace_id={str(payload.get('workspace_id', 'unknown'))} " + f"total={int(summary.get('total', 0))} " + f"added={int(summary.get('added', 0))} " + f"modified={int(summary.get('modified', 0))} " + f"deleted={int(summary.get('deleted', 0))} " + f"execution_mode={str(payload.get('execution_mode', 'unknown'))}" + ) + return + print( + "[workspace-patch] " + f"workspace_id={str(payload.get('workspace_id', 'unknown'))} " + f"execution_mode={str(payload.get('execution_mode', 'unknown'))}" + ) + + def _print_workspace_logs_human(payload: dict[str, Any]) -> None: entries = payload.get("entries") if not isinstance(entries, list) or not entries: @@ -733,6 +800,8 @@ def _build_parser() -> argparse.ArgumentParser: Examples: pyro workspace create debian:12 --seed-path ./repo pyro workspace sync push WORKSPACE_ID ./repo --dest src + pyro workspace file read WORKSPACE_ID src/app.py + pyro workspace patch apply WORKSPACE_ID --patch "$(cat fix.patch)" pyro workspace exec WORKSPACE_ID -- sh -lc 'printf "hello\\n" > note.txt' pyro workspace stop WORKSPACE_ID pyro workspace disk list WORKSPACE_ID @@ -996,6 +1065,145 @@ def _build_parser() -> argparse.ArgumentParser: action="store_true", help="Print structured JSON instead of human-readable output.", ) + workspace_file_parser = workspace_subparsers.add_parser( + "file", + help="List, read, and write workspace files without shell quoting.", + description=( + "Use workspace file operations for model-native tree inspection and text edits " + "inside one started workspace." + ), + epilog=dedent( + """ + Examples: + pyro workspace file list WORKSPACE_ID + pyro workspace file read WORKSPACE_ID src/app.py + pyro workspace file write WORKSPACE_ID src/app.py --text 'print("hi")' + """ + ), + formatter_class=_HelpFormatter, + ) + workspace_file_subparsers = workspace_file_parser.add_subparsers( + dest="workspace_file_command", + required=True, + metavar="FILE", + ) + workspace_file_list_parser = workspace_file_subparsers.add_parser( + "list", + help="List metadata for one live workspace path.", + description="List files, directories, and symlinks under one started workspace path.", + epilog="Example:\n pyro workspace file list WORKSPACE_ID src --recursive", + formatter_class=_HelpFormatter, + ) + workspace_file_list_parser.add_argument("workspace_id", metavar="WORKSPACE_ID") + workspace_file_list_parser.add_argument( + "path", + nargs="?", + default=WORKSPACE_GUEST_PATH, + metavar="PATH", + help="Workspace path to inspect. Relative values resolve inside `/workspace`.", + ) + workspace_file_list_parser.add_argument( + "--recursive", + action="store_true", + help="Walk directories recursively.", + ) + workspace_file_list_parser.add_argument( + "--json", + action="store_true", + help="Print structured JSON instead of human-readable output.", + ) + workspace_file_read_parser = workspace_file_subparsers.add_parser( + "read", + help="Read one regular text file from a started workspace.", + description=( + "Read one regular text file under `/workspace`. This is bounded and does not " + "follow symlinks." + ), + epilog="Example:\n pyro workspace file read WORKSPACE_ID src/app.py", + formatter_class=_HelpFormatter, + ) + workspace_file_read_parser.add_argument("workspace_id", metavar="WORKSPACE_ID") + workspace_file_read_parser.add_argument("path", metavar="PATH") + workspace_file_read_parser.add_argument( + "--max-bytes", + type=int, + default=DEFAULT_WORKSPACE_FILE_READ_MAX_BYTES, + help="Maximum number of bytes to return in the decoded text response.", + ) + workspace_file_read_parser.add_argument( + "--json", + action="store_true", + help="Print structured JSON instead of human-readable output.", + ) + workspace_file_write_parser = workspace_file_subparsers.add_parser( + "write", + help="Create or replace one regular text file in a started workspace.", + description=( + "Write one UTF-8 text file under `/workspace`. Missing parent directories are " + "created automatically." + ), + epilog=( + "Example:\n" + " pyro workspace file write WORKSPACE_ID src/app.py --text 'print(\"hi\")'" + ), + formatter_class=_HelpFormatter, + ) + workspace_file_write_parser.add_argument("workspace_id", metavar="WORKSPACE_ID") + workspace_file_write_parser.add_argument("path", metavar="PATH") + workspace_file_write_parser.add_argument( + "--text", + required=True, + help="UTF-8 text content to write into the target file.", + ) + workspace_file_write_parser.add_argument( + "--json", + action="store_true", + help="Print structured JSON instead of human-readable output.", + ) + workspace_patch_parser = workspace_subparsers.add_parser( + "patch", + help="Apply unified text patches inside a started workspace.", + description=( + "Apply add/modify/delete unified text patches under `/workspace` without shell " + "editing tricks." + ), + epilog=dedent( + """ + Example: + pyro workspace patch apply WORKSPACE_ID --patch "$(cat fix.patch)" + + Patch application is preflighted but not fully transactional. If an apply fails + partway through, prefer `pyro workspace reset WORKSPACE_ID`. + """ + ), + formatter_class=_HelpFormatter, + ) + workspace_patch_subparsers = workspace_patch_parser.add_subparsers( + dest="workspace_patch_command", + required=True, + metavar="PATCH", + ) + workspace_patch_apply_parser = workspace_patch_subparsers.add_parser( + "apply", + help="Apply one unified text patch to a started workspace.", + description=( + "Apply one unified text patch for add, modify, and delete operations under " + "`/workspace`." + ), + epilog="Example:\n pyro workspace patch apply WORKSPACE_ID --patch \"$(cat fix.patch)\"", + formatter_class=_HelpFormatter, + ) + workspace_patch_apply_parser.add_argument("workspace_id", metavar="WORKSPACE_ID") + workspace_patch_apply_parser.add_argument( + "--patch", + required=True, + help="Unified text patch to apply under `/workspace`.", + ) + workspace_patch_apply_parser.add_argument( + "--json", + action="store_true", + help="Print structured JSON instead of human-readable output.", + ) workspace_snapshot_parser = workspace_subparsers.add_parser( "snapshot", help="Create, list, and delete workspace snapshots.", @@ -2005,6 +2213,78 @@ def main() -> None: raise SystemExit(1) from exc _print_workspace_diff_human(payload) return + if args.workspace_command == "file": + if args.workspace_file_command == "list": + try: + payload = pyro.list_workspace_files( + args.workspace_id, + path=args.path, + recursive=bool(args.recursive), + ) + except Exception as exc: # noqa: BLE001 + if bool(args.json): + _print_json({"ok": False, "error": str(exc)}) + else: + print(f"[error] {exc}", file=sys.stderr, flush=True) + raise SystemExit(1) from exc + if bool(args.json): + _print_json(payload) + else: + _print_workspace_file_list_human(payload) + return + if args.workspace_file_command == "read": + try: + payload = pyro.read_workspace_file( + args.workspace_id, + args.path, + max_bytes=args.max_bytes, + ) + except Exception as exc: # noqa: BLE001 + if bool(args.json): + _print_json({"ok": False, "error": str(exc)}) + else: + print(f"[error] {exc}", file=sys.stderr, flush=True) + raise SystemExit(1) from exc + if bool(args.json): + _print_json(payload) + else: + _print_workspace_file_read_human(payload) + return + if args.workspace_file_command == "write": + try: + payload = pyro.write_workspace_file( + args.workspace_id, + args.path, + text=args.text, + ) + except Exception as exc: # noqa: BLE001 + if bool(args.json): + _print_json({"ok": False, "error": str(exc)}) + else: + print(f"[error] {exc}", file=sys.stderr, flush=True) + raise SystemExit(1) from exc + if bool(args.json): + _print_json(payload) + else: + _print_workspace_file_write_human(payload) + return + if args.workspace_command == "patch" and args.workspace_patch_command == "apply": + try: + payload = pyro.apply_workspace_patch( + args.workspace_id, + patch=args.patch, + ) + except Exception as exc: # noqa: BLE001 + if bool(args.json): + _print_json({"ok": False, "error": str(exc)}) + else: + print(f"[error] {exc}", file=sys.stderr, flush=True) + raise SystemExit(1) from exc + if bool(args.json): + _print_json(payload) + else: + _print_workspace_patch_human(payload) + return if args.workspace_command == "snapshot": if args.workspace_snapshot_command == "create": try: diff --git a/src/pyro_mcp/contract.py b/src/pyro_mcp/contract.py index aeef937..bcf117b 100644 --- a/src/pyro_mcp/contract.py +++ b/src/pyro_mcp/contract.py @@ -12,7 +12,9 @@ PUBLIC_CLI_WORKSPACE_SUBCOMMANDS = ( "diff", "exec", "export", + "file", "logs", + "patch", "reset", "service", "shell", @@ -23,6 +25,8 @@ PUBLIC_CLI_WORKSPACE_SUBCOMMANDS = ( "sync", ) PUBLIC_CLI_WORKSPACE_DISK_SUBCOMMANDS = ("export", "list", "read") +PUBLIC_CLI_WORKSPACE_FILE_SUBCOMMANDS = ("list", "read", "write") +PUBLIC_CLI_WORKSPACE_PATCH_SUBCOMMANDS = ("apply",) PUBLIC_CLI_WORKSPACE_SERVICE_SUBCOMMANDS = ("list", "logs", "start", "status", "stop") PUBLIC_CLI_WORKSPACE_SHELL_SUBCOMMANDS = ("close", "open", "read", "signal", "write") PUBLIC_CLI_WORKSPACE_SNAPSHOT_SUBCOMMANDS = ("create", "delete", "list") @@ -44,6 +48,10 @@ PUBLIC_CLI_WORKSPACE_DISK_READ_FLAGS = ("--max-bytes", "--json") PUBLIC_CLI_WORKSPACE_EXEC_FLAGS = ("--timeout-seconds", "--secret-env", "--json") PUBLIC_CLI_WORKSPACE_DIFF_FLAGS = ("--json",) PUBLIC_CLI_WORKSPACE_EXPORT_FLAGS = ("--output", "--json") +PUBLIC_CLI_WORKSPACE_FILE_LIST_FLAGS = ("--recursive", "--json") +PUBLIC_CLI_WORKSPACE_FILE_READ_FLAGS = ("--max-bytes", "--json") +PUBLIC_CLI_WORKSPACE_FILE_WRITE_FLAGS = ("--text", "--json") +PUBLIC_CLI_WORKSPACE_PATCH_APPLY_FLAGS = ("--patch", "--json") PUBLIC_CLI_WORKSPACE_RESET_FLAGS = ("--snapshot", "--json") PUBLIC_CLI_WORKSPACE_SERVICE_LIST_FLAGS = ("--json",) PUBLIC_CLI_WORKSPACE_SERVICE_LOGS_FLAGS = ("--tail-lines", "--all", "--json") @@ -90,6 +98,7 @@ PUBLIC_CLI_RUN_FLAGS = ( ) PUBLIC_SDK_METHODS = ( + "apply_workspace_patch", "close_shell", "create_server", "create_snapshot", @@ -108,6 +117,7 @@ PUBLIC_SDK_METHODS = ( "list_services", "list_snapshots", "list_workspace_disk", + "list_workspace_files", "logs_service", "logs_workspace", "network_info_vm", @@ -117,6 +127,7 @@ PUBLIC_SDK_METHODS = ( "push_workspace_sync", "read_shell", "read_workspace_disk", + "read_workspace_file", "reap_expired", "reset_workspace", "run_in_vm", @@ -131,6 +142,7 @@ PUBLIC_SDK_METHODS = ( "stop_vm", "stop_workspace", "write_shell", + "write_workspace_file", ) PUBLIC_MCP_TOOLS = ( @@ -165,7 +177,11 @@ PUBLIC_MCP_TOOLS = ( "workspace_diff", "workspace_exec", "workspace_export", + "workspace_file_list", + "workspace_file_read", + "workspace_file_write", "workspace_logs", + "workspace_patch_apply", "workspace_reset", "workspace_start", "workspace_status", diff --git a/src/pyro_mcp/runtime_bundle/linux-x86_64/guest/pyro_guest_agent.py b/src/pyro_mcp/runtime_bundle/linux-x86_64/guest/pyro_guest_agent.py index 607c76d..a469d7b 100755 --- a/src/pyro_mcp/runtime_bundle/linux-x86_64/guest/pyro_guest_agent.py +++ b/src/pyro_mcp/runtime_bundle/linux-x86_64/guest/pyro_guest_agent.py @@ -3,6 +3,7 @@ from __future__ import annotations +import base64 import codecs import fcntl import io @@ -31,6 +32,7 @@ WORKSPACE_ROOT = PurePosixPath("/workspace") SHELL_ROOT = Path("/run/pyro-shells") SERVICE_ROOT = Path("/run/pyro-services") SECRET_ROOT = Path("/run/pyro-secrets") +WORKSPACE_FILE_MAX_BYTES = 1024 * 1024 SERVICE_NAME_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]{0,63}$") SHELL_SIGNAL_MAP = { "HUP": signal.SIGHUP, @@ -328,6 +330,153 @@ def _prepare_export_archive(path: str) -> dict[str, Any]: raise +def _workspace_entry(path_text: str, host_path: Path) -> dict[str, Any]: + try: + stat_result = os.lstat(host_path) + except FileNotFoundError as exc: + raise RuntimeError(f"workspace path does not exist: {path_text}") from exc + if host_path.is_symlink(): + return { + "path": path_text, + "artifact_type": "symlink", + "size_bytes": stat_result.st_size, + "link_target": os.readlink(host_path), + } + if host_path.is_dir(): + return { + "path": path_text, + "artifact_type": "directory", + "size_bytes": 0, + "link_target": None, + } + if host_path.is_file(): + return { + "path": path_text, + "artifact_type": "file", + "size_bytes": stat_result.st_size, + "link_target": None, + } + raise RuntimeError(f"unsupported workspace path type: {path_text}") + + +def _join_workspace_path(base: str, child_name: str) -> str: + base_path = PurePosixPath(base) + return str(base_path / child_name) if str(base_path) != "/" else f"/{child_name}" + + +def _list_workspace(path: str, *, recursive: bool) -> dict[str, Any]: + normalized_path, host_path = _normalize_destination(path) + entry = _workspace_entry(str(normalized_path), host_path) + if entry["artifact_type"] != "directory": + return { + "path": str(normalized_path), + "artifact_type": entry["artifact_type"], + "entries": [entry], + } + entries: list[dict[str, Any]] = [] + + def walk(current_path: str, current_host_path: Path) -> None: + children: list[tuple[dict[str, Any], Path]] = [] + with os.scandir(current_host_path) as iterator: + for child in iterator: + child_host_path = Path(child.path) + children.append( + ( + _workspace_entry( + _join_workspace_path(current_path, child.name), + child_host_path, + ), + child_host_path, + ) + ) + children.sort(key=lambda item: str(item[0]["path"])) + for child_entry, child_host_path in children: + entries.append(child_entry) + if recursive and child_entry["artifact_type"] == "directory": + walk(str(child_entry["path"]), child_host_path) + + walk(str(normalized_path), host_path) + return { + "path": str(normalized_path), + "artifact_type": "directory", + "entries": entries, + } + + +def _read_workspace_file(path: str, *, max_bytes: int) -> dict[str, Any]: + if max_bytes <= 0: + raise RuntimeError("max_bytes must be positive") + if max_bytes > WORKSPACE_FILE_MAX_BYTES: + raise RuntimeError( + f"max_bytes must be at most {WORKSPACE_FILE_MAX_BYTES} bytes" + ) + normalized_path, host_path = _normalize_destination(path) + entry = _workspace_entry(str(normalized_path), host_path) + if entry["artifact_type"] != "file": + raise RuntimeError("workspace file read only supports regular files") + raw_bytes = host_path.read_bytes() + if len(raw_bytes) > max_bytes: + raise RuntimeError( + f"workspace file exceeds the maximum supported size of {max_bytes} bytes" + ) + return { + "path": str(normalized_path), + "size_bytes": len(raw_bytes), + "content_b64": base64.b64encode(raw_bytes).decode("ascii"), + } + + +def _ensure_no_symlink_parents_for_write(root: Path, target_path: Path, path_text: str) -> None: + relative_path = target_path.relative_to(root) + current = root + for part in relative_path.parts[:-1]: + current = current / part + if current.is_symlink(): + raise RuntimeError( + f"workspace path would traverse through a symlinked parent: {path_text}" + ) + + +def _write_workspace_file(path: str, *, text: str) -> dict[str, Any]: + raw_bytes = text.encode("utf-8") + if len(raw_bytes) > WORKSPACE_FILE_MAX_BYTES: + raise RuntimeError( + f"text must be at most {WORKSPACE_FILE_MAX_BYTES} bytes when encoded as UTF-8" + ) + normalized_path, host_path = _normalize_destination(path) + _ensure_no_symlink_parents_for_write(Path("/workspace"), host_path, str(normalized_path)) + if host_path.exists() or host_path.is_symlink(): + entry = _workspace_entry(str(normalized_path), host_path) + if entry["artifact_type"] != "file": + raise RuntimeError("workspace file write only supports regular file targets") + host_path.parent.mkdir(parents=True, exist_ok=True) + with tempfile.NamedTemporaryFile( + prefix=".pyro-workspace-write-", + dir=host_path.parent, + delete=False, + ) as handle: + temp_path = Path(handle.name) + handle.write(raw_bytes) + os.replace(temp_path, host_path) + return { + "path": str(normalized_path), + "size_bytes": len(raw_bytes), + "bytes_written": len(raw_bytes), + } + + +def _delete_workspace_path(path: str) -> dict[str, Any]: + normalized_path, host_path = _normalize_destination(path) + entry = _workspace_entry(str(normalized_path), host_path) + if entry["artifact_type"] == "directory": + raise RuntimeError("workspace file delete does not support directories") + host_path.unlink(missing_ok=False) + return { + "path": str(normalized_path), + "deleted": True, + } + + def _run_command( command: str, timeout_seconds: int, @@ -931,6 +1080,23 @@ def _dispatch(request: dict[str, Any], conn: socket.socket) -> dict[str, Any]: raise RuntimeError("archive_size must not be negative") payload = _read_exact(conn, archive_size) return _install_secrets_archive(payload) + if action == "list_workspace": + return _list_workspace( + str(request.get("path", "/workspace")), + recursive=bool(request.get("recursive", False)), + ) + if action == "read_workspace_file": + return _read_workspace_file( + str(request.get("path", "/workspace")), + max_bytes=int(request.get("max_bytes", WORKSPACE_FILE_MAX_BYTES)), + ) + if action == "write_workspace_file": + return _write_workspace_file( + str(request.get("path", "/workspace")), + text=str(request.get("text", "")), + ) + if action == "delete_workspace_path": + return _delete_workspace_path(str(request.get("path", "/workspace"))) if action == "open_shell": shell_id = str(request.get("shell_id", "")).strip() if shell_id == "": diff --git a/src/pyro_mcp/runtime_bundle/linux-x86_64/manifest.json b/src/pyro_mcp/runtime_bundle/linux-x86_64/manifest.json index 3dcc733..46e5ccf 100644 --- a/src/pyro_mcp/runtime_bundle/linux-x86_64/manifest.json +++ b/src/pyro_mcp/runtime_bundle/linux-x86_64/manifest.json @@ -25,7 +25,7 @@ "guest": { "agent": { "path": "guest/pyro_guest_agent.py", - "sha256": "76a0bd05b523bb952ab9eaf5a3f2e0cbf1fc458d1e44894e2c0d206b05896328" + "sha256": "81fe2523a40f9e88ee38601292b25919059be7faa049c9d02e9466453319c7dd" }, "init": { "path": "guest/pyro-init", diff --git a/src/pyro_mcp/vm_environments.py b/src/pyro_mcp/vm_environments.py index c34537e..51420b4 100644 --- a/src/pyro_mcp/vm_environments.py +++ b/src/pyro_mcp/vm_environments.py @@ -19,7 +19,7 @@ from typing import Any from pyro_mcp.runtime import DEFAULT_PLATFORM, RuntimePaths DEFAULT_ENVIRONMENT_VERSION = "1.0.0" -DEFAULT_CATALOG_VERSION = "3.1.0" +DEFAULT_CATALOG_VERSION = "3.2.0" OCI_MANIFEST_ACCEPT = ", ".join( ( "application/vnd.oci.image.index.v1+json", diff --git a/src/pyro_mcp/vm_guest.py b/src/pyro_mcp/vm_guest.py index e2f2c88..9e66ef2 100644 --- a/src/pyro_mcp/vm_guest.py +++ b/src/pyro_mcp/vm_guest.py @@ -2,6 +2,7 @@ from __future__ import annotations +import base64 import json import socket from dataclasses import dataclass @@ -47,6 +48,13 @@ class GuestArchiveExportResponse: bytes_written: int +@dataclass(frozen=True) +class GuestWorkspaceFileReadResponse: + path: str + size_bytes: int + content_bytes: bytes + + @dataclass(frozen=True) class GuestShellSummary: shell_id: str @@ -218,6 +226,102 @@ class VsockExecClient: bytes_written=int(payload.get("bytes_written", 0)), ) + def list_workspace_entries( + self, + guest_cid: int, + port: int, + *, + workspace_path: str, + recursive: bool, + timeout_seconds: int = 30, + uds_path: str | None = None, + ) -> dict[str, Any]: + return self._request_json( + guest_cid, + port, + { + "action": "list_workspace", + "path": workspace_path, + "recursive": recursive, + }, + timeout_seconds=timeout_seconds, + uds_path=uds_path, + error_message="guest workspace file list response must be a JSON object", + ) + + def read_workspace_file( + self, + guest_cid: int, + port: int, + *, + workspace_path: str, + max_bytes: int, + timeout_seconds: int = 30, + uds_path: str | None = None, + ) -> dict[str, Any]: + payload = self._request_json( + guest_cid, + port, + { + "action": "read_workspace_file", + "path": workspace_path, + "max_bytes": max_bytes, + }, + timeout_seconds=timeout_seconds, + uds_path=uds_path, + error_message="guest workspace file read response must be a JSON object", + ) + raw_content = payload.get("content_b64", "") + if not isinstance(raw_content, str): + raise RuntimeError("guest workspace file read response is missing content_b64") + payload["content_bytes"] = base64.b64decode(raw_content.encode("ascii"), validate=True) + payload.pop("content_b64", None) + return payload + + def write_workspace_file( + self, + guest_cid: int, + port: int, + *, + workspace_path: str, + text: str, + timeout_seconds: int = 30, + uds_path: str | None = None, + ) -> dict[str, Any]: + return self._request_json( + guest_cid, + port, + { + "action": "write_workspace_file", + "path": workspace_path, + "text": text, + }, + timeout_seconds=timeout_seconds, + uds_path=uds_path, + error_message="guest workspace file write response must be a JSON object", + ) + + def delete_workspace_path( + self, + guest_cid: int, + port: int, + *, + workspace_path: str, + timeout_seconds: int = 30, + uds_path: str | None = None, + ) -> dict[str, Any]: + return self._request_json( + guest_cid, + port, + { + "action": "delete_workspace_path", + "path": workspace_path, + }, + timeout_seconds=timeout_seconds, + uds_path=uds_path, + error_message="guest workspace path delete response must be a JSON object", + ) + def open_shell( self, guest_cid: int, diff --git a/src/pyro_mcp/vm_manager.py b/src/pyro_mcp/vm_manager.py index 20e7704..c3561a7 100644 --- a/src/pyro_mcp/vm_manager.py +++ b/src/pyro_mcp/vm_manager.py @@ -40,6 +40,25 @@ from pyro_mcp.workspace_disk import ( read_workspace_disk_file, scrub_workspace_runtime_paths, ) +from pyro_mcp.workspace_files import ( + DEFAULT_WORKSPACE_FILE_READ_MAX_BYTES as DEFAULT_WORKSPACE_FILE_READ_LIMIT, +) +from pyro_mcp.workspace_files import ( + WORKSPACE_FILE_MAX_BYTES as WORKSPACE_FILE_MAX_LIMIT, +) +from pyro_mcp.workspace_files import ( + WORKSPACE_PATCH_MAX_BYTES as WORKSPACE_PATCH_MAX_LIMIT, +) +from pyro_mcp.workspace_files import ( + WorkspaceTextPatch, + apply_unified_text_patch, + delete_workspace_path, + list_workspace_files, + normalize_workspace_path, + parse_unified_text_patch, + read_workspace_file, + write_workspace_file, +) from pyro_mcp.workspace_ports import DEFAULT_PUBLISHED_PORT_HOST from pyro_mcp.workspace_shells import ( create_local_shell, @@ -79,6 +98,9 @@ DEFAULT_SHELL_COLS = 120 DEFAULT_SHELL_ROWS = 30 DEFAULT_SHELL_MAX_CHARS = 65536 DEFAULT_WORKSPACE_DISK_READ_MAX_BYTES = 65536 +DEFAULT_WORKSPACE_FILE_READ_MAX_BYTES = DEFAULT_WORKSPACE_FILE_READ_LIMIT +WORKSPACE_FILE_MAX_BYTES = WORKSPACE_FILE_MAX_LIMIT +WORKSPACE_PATCH_MAX_BYTES = WORKSPACE_PATCH_MAX_LIMIT DEFAULT_SERVICE_READY_TIMEOUT_SECONDS = 30 DEFAULT_SERVICE_READY_INTERVAL_MS = 500 DEFAULT_SERVICE_LOG_TAIL_LINES = 200 @@ -818,6 +840,49 @@ def _normalize_workspace_disk_path(path: str) -> str: return normalized +def _normalize_workspace_file_path(path: str) -> str: + return normalize_workspace_path(path) + + +def _validate_workspace_file_read_max_bytes(max_bytes: int) -> int: + if max_bytes <= 0: + raise ValueError("max_bytes must be positive") + if max_bytes > WORKSPACE_FILE_MAX_BYTES: + raise ValueError( + f"max_bytes must be at most {WORKSPACE_FILE_MAX_BYTES} bytes" + ) + return max_bytes + + +def _validate_workspace_text_payload(text: str, *, field_name: str) -> str: + encoded = text.encode("utf-8") + if len(encoded) > WORKSPACE_FILE_MAX_BYTES: + raise ValueError( + f"{field_name} must be at most {WORKSPACE_FILE_MAX_BYTES} bytes when encoded as UTF-8" + ) + return text + + +def _validate_workspace_patch_text(patch: str) -> str: + if patch.strip() == "": + raise ValueError("patch must not be empty") + encoded = patch.encode("utf-8") + if len(encoded) > WORKSPACE_PATCH_MAX_BYTES: + raise ValueError( + f"patch must be at most {WORKSPACE_PATCH_MAX_BYTES} bytes when encoded as UTF-8" + ) + return patch + + +def _decode_workspace_patch_text(path: str, content_bytes: bytes) -> str: + try: + return content_bytes.decode("utf-8") + except UnicodeDecodeError as exc: + raise RuntimeError( + f"workspace patch only supports UTF-8 text files: {path}" + ) from exc + + def _normalize_archive_member_name(name: str) -> PurePosixPath: candidate = name.strip() if candidate == "": @@ -2077,6 +2142,41 @@ class VmBackend: ) -> dict[str, Any]: raise NotImplementedError + def list_workspace_entries( # pragma: no cover + self, + instance: VmInstance, + *, + workspace_path: str, + recursive: bool, + ) -> dict[str, Any]: + raise NotImplementedError + + def read_workspace_file( # pragma: no cover + self, + instance: VmInstance, + *, + workspace_path: str, + max_bytes: int, + ) -> dict[str, Any]: + raise NotImplementedError + + def write_workspace_file( # pragma: no cover + self, + instance: VmInstance, + *, + workspace_path: str, + text: str, + ) -> dict[str, Any]: + raise NotImplementedError + + def delete_workspace_path( # pragma: no cover + self, + instance: VmInstance, + *, + workspace_path: str, + ) -> dict[str, Any]: + raise NotImplementedError + def open_shell( # pragma: no cover self, instance: VmInstance, @@ -2256,6 +2356,79 @@ class MockBackend(VmBackend): "execution_mode": "host_compat", } + def list_workspace_entries( + self, + instance: VmInstance, + *, + workspace_path: str, + recursive: bool, + ) -> dict[str, Any]: + listing = list_workspace_files( + _instance_workspace_host_dir(instance), + workspace_path=workspace_path, + recursive=recursive, + ) + return { + "path": listing.path, + "artifact_type": listing.artifact_type, + "entries": [entry.to_payload() for entry in listing.entries], + "execution_mode": "host_compat", + } + + def read_workspace_file( + self, + instance: VmInstance, + *, + workspace_path: str, + max_bytes: int, + ) -> dict[str, Any]: + file_result = read_workspace_file( + _instance_workspace_host_dir(instance), + workspace_path=workspace_path, + max_bytes=max_bytes, + ) + return { + "path": file_result.path, + "size_bytes": file_result.size_bytes, + "content_bytes": file_result.content_bytes, + "execution_mode": "host_compat", + } + + def write_workspace_file( + self, + instance: VmInstance, + *, + workspace_path: str, + text: str, + ) -> dict[str, Any]: + result = write_workspace_file( + _instance_workspace_host_dir(instance), + workspace_path=workspace_path, + text=text, + ) + return { + "path": result.path, + "size_bytes": result.size_bytes, + "bytes_written": result.bytes_written, + "execution_mode": "host_compat", + } + + def delete_workspace_path( + self, + instance: VmInstance, + *, + workspace_path: str, + ) -> dict[str, Any]: + result = delete_workspace_path( + _instance_workspace_host_dir(instance), + workspace_path=workspace_path, + ) + return { + "path": result.path, + "deleted": result.deleted, + "execution_mode": "host_compat", + } + def open_shell( self, instance: VmInstance, @@ -2776,6 +2949,134 @@ class FirecrackerBackend(VmBackend): # pragma: no cover "execution_mode": "host_compat", } + def list_workspace_entries( + self, + instance: VmInstance, + *, + workspace_path: str, + recursive: bool, + ) -> dict[str, Any]: + if self._runtime_capabilities.supports_guest_exec: + guest_cid = int(instance.metadata["guest_cid"]) + port = int(instance.metadata["guest_exec_port"]) + uds_path = instance.metadata.get("guest_exec_uds_path") + payload = self._guest_exec_client.list_workspace_entries( + guest_cid, + port, + workspace_path=workspace_path, + recursive=recursive, + uds_path=uds_path, + ) + payload["execution_mode"] = instance.metadata.get("execution_mode", "pending") + return payload + instance.metadata["execution_mode"] = "host_compat" + listing = list_workspace_files( + _instance_workspace_host_dir(instance), + workspace_path=workspace_path, + recursive=recursive, + ) + return { + "path": listing.path, + "artifact_type": listing.artifact_type, + "entries": [entry.to_payload() for entry in listing.entries], + "execution_mode": "host_compat", + } + + def read_workspace_file( + self, + instance: VmInstance, + *, + workspace_path: str, + max_bytes: int, + ) -> dict[str, Any]: + if self._runtime_capabilities.supports_guest_exec: + guest_cid = int(instance.metadata["guest_cid"]) + port = int(instance.metadata["guest_exec_port"]) + uds_path = instance.metadata.get("guest_exec_uds_path") + payload = self._guest_exec_client.read_workspace_file( + guest_cid, + port, + workspace_path=workspace_path, + max_bytes=max_bytes, + uds_path=uds_path, + ) + payload["execution_mode"] = instance.metadata.get("execution_mode", "pending") + return payload + instance.metadata["execution_mode"] = "host_compat" + file_result = read_workspace_file( + _instance_workspace_host_dir(instance), + workspace_path=workspace_path, + max_bytes=max_bytes, + ) + return { + "path": file_result.path, + "size_bytes": file_result.size_bytes, + "content_bytes": file_result.content_bytes, + "execution_mode": "host_compat", + } + + def write_workspace_file( + self, + instance: VmInstance, + *, + workspace_path: str, + text: str, + ) -> dict[str, Any]: + if self._runtime_capabilities.supports_guest_exec: + guest_cid = int(instance.metadata["guest_cid"]) + port = int(instance.metadata["guest_exec_port"]) + uds_path = instance.metadata.get("guest_exec_uds_path") + payload = self._guest_exec_client.write_workspace_file( + guest_cid, + port, + workspace_path=workspace_path, + text=text, + uds_path=uds_path, + ) + payload["execution_mode"] = instance.metadata.get("execution_mode", "pending") + return payload + instance.metadata["execution_mode"] = "host_compat" + result = write_workspace_file( + _instance_workspace_host_dir(instance), + workspace_path=workspace_path, + text=text, + ) + return { + "path": result.path, + "size_bytes": result.size_bytes, + "bytes_written": result.bytes_written, + "execution_mode": "host_compat", + } + + def delete_workspace_path( + self, + instance: VmInstance, + *, + workspace_path: str, + ) -> dict[str, Any]: + if self._runtime_capabilities.supports_guest_exec: + guest_cid = int(instance.metadata["guest_cid"]) + port = int(instance.metadata["guest_exec_port"]) + uds_path = instance.metadata.get("guest_exec_uds_path") + payload = self._guest_exec_client.delete_workspace_path( + guest_cid, + port, + workspace_path=workspace_path, + uds_path=uds_path, + ) + payload["execution_mode"] = instance.metadata.get("execution_mode", "pending") + return payload + instance.metadata["execution_mode"] = "host_compat" + result = delete_workspace_path( + _instance_workspace_host_dir(instance), + workspace_path=workspace_path, + ) + return { + "path": result.path, + "deleted": result.deleted, + "execution_mode": "host_compat", + } + def open_shell( self, instance: VmInstance, @@ -3585,6 +3886,235 @@ class VmManager: diff_payload["workspace_id"] = workspace_id return diff_payload + def list_workspace_files( + self, + workspace_id: str, + *, + path: str = WORKSPACE_GUEST_PATH, + recursive: bool = False, + ) -> dict[str, Any]: + normalized_path = _normalize_workspace_file_path(path) + with self._lock: + workspace = self._load_workspace_locked(workspace_id) + instance = self._workspace_instance_for_live_operation_locked( + workspace, + operation_name="workspace_file_list", + ) + listing = self._backend.list_workspace_entries( + instance, + workspace_path=normalized_path, + recursive=recursive, + ) + with self._lock: + workspace = self._load_workspace_locked(workspace_id) + workspace.state = instance.state + workspace.firecracker_pid = instance.firecracker_pid + workspace.last_error = instance.last_error + workspace.metadata = dict(instance.metadata) + self._save_workspace_locked(workspace) + return { + "workspace_id": workspace_id, + "path": str(listing["path"]), + "recursive": recursive, + "entries": cast(list[dict[str, Any]], list(listing.get("entries", []))), + "execution_mode": str( + listing.get("execution_mode", instance.metadata.get("execution_mode", "pending")) + ), + } + + def read_workspace_file( + self, + workspace_id: str, + path: str, + *, + max_bytes: int = DEFAULT_WORKSPACE_FILE_READ_MAX_BYTES, + ) -> dict[str, Any]: + normalized_path = _normalize_workspace_file_path(path) + normalized_max_bytes = _validate_workspace_file_read_max_bytes(max_bytes) + with self._lock: + workspace = self._load_workspace_locked(workspace_id) + instance = self._workspace_instance_for_live_operation_locked( + workspace, + operation_name="workspace_file_read", + ) + payload = self._backend.read_workspace_file( + instance, + workspace_path=normalized_path, + max_bytes=WORKSPACE_FILE_MAX_BYTES, + ) + raw_bytes = cast(bytes, payload["content_bytes"]) + content = raw_bytes[:normalized_max_bytes].decode("utf-8", errors="replace") + with self._lock: + workspace = self._load_workspace_locked(workspace_id) + workspace.state = instance.state + workspace.firecracker_pid = instance.firecracker_pid + workspace.last_error = instance.last_error + workspace.metadata = dict(instance.metadata) + self._save_workspace_locked(workspace) + return { + "workspace_id": workspace_id, + "path": str(payload["path"]), + "size_bytes": int(payload["size_bytes"]), + "max_bytes": normalized_max_bytes, + "content": content, + "truncated": len(raw_bytes) > normalized_max_bytes, + "execution_mode": str( + payload.get("execution_mode", instance.metadata.get("execution_mode", "pending")) + ), + } + + def write_workspace_file( + self, + workspace_id: str, + path: str, + *, + text: str, + ) -> dict[str, Any]: + normalized_path = _normalize_workspace_file_path(path) + normalized_text = _validate_workspace_text_payload(text, field_name="text") + with self._lock: + workspace = self._load_workspace_locked(workspace_id) + instance = self._workspace_instance_for_live_operation_locked( + workspace, + operation_name="workspace_file_write", + ) + payload = self._backend.write_workspace_file( + instance, + workspace_path=normalized_path, + text=normalized_text, + ) + with self._lock: + workspace = self._load_workspace_locked(workspace_id) + workspace.state = instance.state + workspace.firecracker_pid = instance.firecracker_pid + workspace.last_error = instance.last_error + workspace.metadata = dict(instance.metadata) + self._save_workspace_locked(workspace) + return { + "workspace_id": workspace_id, + "path": str(payload["path"]), + "size_bytes": int(payload["size_bytes"]), + "bytes_written": int(payload["bytes_written"]), + "execution_mode": str( + payload.get("execution_mode", instance.metadata.get("execution_mode", "pending")) + ), + } + + def apply_workspace_patch( + self, + workspace_id: str, + *, + patch: str, + ) -> dict[str, Any]: + patch_text = _validate_workspace_patch_text(patch) + parsed_patches = parse_unified_text_patch(patch_text) + patch_by_path: dict[str, WorkspaceTextPatch] = {} + for text_patch in parsed_patches: + if text_patch.path in patch_by_path: + raise ValueError(f"patch contains duplicate file entries for {text_patch.path}") + patch_by_path[text_patch.path] = text_patch + + with self._lock: + workspace = self._load_workspace_locked(workspace_id) + instance = self._workspace_instance_for_live_operation_locked( + workspace, + operation_name="workspace_patch_apply", + ) + + planned_writes: dict[str, str] = {} + planned_deletes: list[str] = [] + summary = { + "total": 0, + "added": 0, + "modified": 0, + "deleted": 0, + } + entries: list[dict[str, str]] = [] + + for path_text in sorted(patch_by_path): + file_patch = patch_by_path[path_text] + listing: dict[str, Any] | None = None + current_text: str | None = None + exists = True + try: + listing = self._backend.list_workspace_entries( + instance, + workspace_path=file_patch.path, + recursive=False, + ) + except RuntimeError as exc: + if "does not exist" in str(exc): + exists = False + else: + raise + if exists: + if listing is None: + raise RuntimeError( + f"workspace patch could not inspect current path: {file_patch.path}" + ) + artifact_type = str(listing["artifact_type"]) + if artifact_type != "file": + raise RuntimeError( + f"workspace patch only supports regular files: {file_patch.path}" + ) + current_payload = self._backend.read_workspace_file( + instance, + workspace_path=file_patch.path, + max_bytes=WORKSPACE_FILE_MAX_BYTES, + ) + current_text = _decode_workspace_patch_text( + file_patch.path, + cast(bytes, current_payload["content_bytes"]), + ) + if file_patch.status == "added" and exists: + raise RuntimeError( + f"workspace patch cannot add an existing path: {file_patch.path}" + ) + if file_patch.status in {"modified", "deleted"} and not exists: + raise RuntimeError( + f"workspace patch cannot modify a missing path: {file_patch.path}" + ) + after_text = apply_unified_text_patch( + path=file_patch.path, + patch=file_patch, + before_text=current_text, + ) + if after_text is None: + planned_deletes.append(file_patch.path) + else: + planned_writes[file_patch.path] = after_text + summary["total"] += 1 + summary[file_patch.status] += 1 + entries.append({"path": file_patch.path, "status": file_patch.status}) + + for path_text in sorted(planned_writes): + self._backend.write_workspace_file( + instance, + workspace_path=path_text, + text=planned_writes[path_text], + ) + for path_text in sorted(planned_deletes): + self._backend.delete_workspace_path( + instance, + workspace_path=path_text, + ) + + with self._lock: + workspace = self._load_workspace_locked(workspace_id) + workspace.state = instance.state + workspace.firecracker_pid = instance.firecracker_pid + workspace.last_error = instance.last_error + workspace.metadata = dict(instance.metadata) + self._save_workspace_locked(workspace) + return { + "workspace_id": workspace_id, + "changed": bool(entries), + "summary": summary, + "entries": entries, + "patch": patch_text, + "execution_mode": instance.metadata.get("execution_mode", "pending"), + } + def create_snapshot( self, workspace_id: str, diff --git a/src/pyro_mcp/workspace_files.py b/src/pyro_mcp/workspace_files.py new file mode 100644 index 0000000..731a6ee --- /dev/null +++ b/src/pyro_mcp/workspace_files.py @@ -0,0 +1,456 @@ +"""Live workspace file operations and unified text patch helpers.""" + +from __future__ import annotations + +import os +import re +import tempfile +from dataclasses import dataclass +from pathlib import Path, PurePosixPath +from typing import Literal + +WORKSPACE_ROOT = PurePosixPath("/workspace") +DEFAULT_WORKSPACE_FILE_READ_MAX_BYTES = 65536 +WORKSPACE_FILE_MAX_BYTES = 1024 * 1024 +WORKSPACE_PATCH_MAX_BYTES = 1024 * 1024 + +WorkspaceFileArtifactType = Literal["file", "directory", "symlink"] +WorkspacePatchStatus = Literal["added", "modified", "deleted"] + +_PATCH_HUNK_RE = re.compile( + r"^@@ -(?P\d+)(?:,(?P\d+))? " + r"\+(?P\d+)(?:,(?P\d+))? @@" +) + + +@dataclass(frozen=True) +class WorkspaceFileEntry: + path: str + artifact_type: WorkspaceFileArtifactType + size_bytes: int + link_target: str | None = None + + def to_payload(self) -> dict[str, str | int | None]: + return { + "path": self.path, + "artifact_type": self.artifact_type, + "size_bytes": self.size_bytes, + "link_target": self.link_target, + } + + +@dataclass(frozen=True) +class WorkspacePathListing: + path: str + artifact_type: WorkspaceFileArtifactType + entries: list[WorkspaceFileEntry] + + +@dataclass(frozen=True) +class WorkspaceFileReadResult: + path: str + size_bytes: int + content_bytes: bytes + + +@dataclass(frozen=True) +class WorkspaceFileWriteResult: + path: str + size_bytes: int + bytes_written: int + + +@dataclass(frozen=True) +class WorkspaceFileDeleteResult: + path: str + deleted: bool + + +@dataclass(frozen=True) +class WorkspacePatchHunk: + old_start: int + old_count: int + new_start: int + new_count: int + lines: list[str] + + +@dataclass(frozen=True) +class WorkspaceTextPatch: + path: str + status: WorkspacePatchStatus + hunks: list[WorkspacePatchHunk] + + +def list_workspace_files( + workspace_dir: Path, + *, + workspace_path: str, + recursive: bool, +) -> WorkspacePathListing: + normalized_path, host_path = _workspace_host_path(workspace_dir, workspace_path) + entry = _entry_for_host_path(normalized_path, host_path) + if entry.artifact_type != "directory": + return WorkspacePathListing( + path=entry.path, + artifact_type=entry.artifact_type, + entries=[entry], + ) + + entries: list[WorkspaceFileEntry] = [] + + def walk(current_path: str, current_host_path: Path) -> None: + children: list[WorkspaceFileEntry] = [] + with os.scandir(current_host_path) as iterator: + for child in iterator: + child_entry = _entry_for_host_path( + _join_workspace_path(current_path, child.name), + Path(child.path), + ) + children.append(child_entry) + children.sort(key=lambda item: item.path) + for child_entry in children: + entries.append(child_entry) + if recursive and child_entry.artifact_type == "directory": + walk(child_entry.path, workspace_host_path(workspace_dir, child_entry.path)) + + walk(normalized_path, host_path) + return WorkspacePathListing(path=normalized_path, artifact_type="directory", entries=entries) + + +def read_workspace_file( + workspace_dir: Path, + *, + workspace_path: str, + max_bytes: int = WORKSPACE_FILE_MAX_BYTES, +) -> WorkspaceFileReadResult: + _validate_max_bytes(max_bytes) + normalized_path, host_path = _workspace_host_path(workspace_dir, workspace_path) + entry = _entry_for_host_path(normalized_path, host_path) + if entry.artifact_type != "file": + raise RuntimeError("workspace file read only supports regular files") + raw_bytes = host_path.read_bytes() + if len(raw_bytes) > max_bytes: + raise RuntimeError( + f"workspace file exceeds the maximum supported size of {max_bytes} bytes" + ) + return WorkspaceFileReadResult( + path=normalized_path, + size_bytes=len(raw_bytes), + content_bytes=raw_bytes, + ) + + +def write_workspace_file( + workspace_dir: Path, + *, + workspace_path: str, + text: str, +) -> WorkspaceFileWriteResult: + encoded = text.encode("utf-8") + if len(encoded) > WORKSPACE_FILE_MAX_BYTES: + raise ValueError( + f"text must be at most {WORKSPACE_FILE_MAX_BYTES} bytes when encoded as UTF-8" + ) + normalized_path, host_path = _workspace_host_path(workspace_dir, workspace_path) + _ensure_no_symlink_parents(workspace_dir, host_path, normalized_path) + if host_path.exists() or host_path.is_symlink(): + entry = _entry_for_host_path(normalized_path, host_path) + if entry.artifact_type != "file": + raise RuntimeError("workspace file write only supports regular file targets") + host_path.parent.mkdir(parents=True, exist_ok=True) + with tempfile.NamedTemporaryFile( + prefix=".pyro-workspace-write-", + dir=host_path.parent, + delete=False, + ) as handle: + temp_path = Path(handle.name) + handle.write(encoded) + os.replace(temp_path, host_path) + return WorkspaceFileWriteResult( + path=normalized_path, + size_bytes=len(encoded), + bytes_written=len(encoded), + ) + + +def delete_workspace_path( + workspace_dir: Path, + *, + workspace_path: str, +) -> WorkspaceFileDeleteResult: + normalized_path, host_path = _workspace_host_path(workspace_dir, workspace_path) + entry = _entry_for_host_path(normalized_path, host_path) + if entry.artifact_type == "directory": + raise RuntimeError("workspace file delete does not support directories") + host_path.unlink(missing_ok=False) + return WorkspaceFileDeleteResult(path=normalized_path, deleted=True) + + +def parse_unified_text_patch(patch_text: str) -> list[WorkspaceTextPatch]: + encoded = patch_text.encode("utf-8") + if len(encoded) > WORKSPACE_PATCH_MAX_BYTES: + raise ValueError( + f"patch must be at most {WORKSPACE_PATCH_MAX_BYTES} bytes when encoded as UTF-8" + ) + if patch_text.strip() == "": + raise ValueError("patch must not be empty") + + lines = patch_text.splitlines(keepends=True) + patches: list[WorkspaceTextPatch] = [] + index = 0 + + while index < len(lines): + line = lines[index] + if line.startswith("diff --git "): + index += 1 + continue + if line.startswith("index "): + index += 1 + continue + if _is_unsupported_patch_prelude(line): + raise ValueError(f"unsupported patch feature: {line.rstrip()}") + if not line.startswith("--- "): + if line.strip() == "": + index += 1 + continue + raise ValueError(f"invalid patch header: {line.rstrip()}") + old_path = _parse_patch_label(line[4:].rstrip("\n")) + index += 1 + if index >= len(lines) or not lines[index].startswith("+++ "): + raise ValueError("patch is missing '+++' header") + new_path = _parse_patch_label(lines[index][4:].rstrip("\n")) + index += 1 + if old_path is not None and new_path is not None and old_path != new_path: + raise ValueError("rename and copy patches are not supported") + patch_path = new_path or old_path + if patch_path is None: + raise ValueError("patch must target a workspace path") + if old_path is None: + status: WorkspacePatchStatus = "added" + elif new_path is None: + status = "deleted" + else: + status = "modified" + + hunks: list[WorkspacePatchHunk] = [] + while index < len(lines): + line = lines[index] + if line.startswith("diff --git ") or line.startswith("--- "): + break + if line.startswith("index "): + index += 1 + continue + if _is_unsupported_patch_prelude(line): + raise ValueError(f"unsupported patch feature: {line.rstrip()}") + header_match = _PATCH_HUNK_RE.match(line.rstrip("\n")) + if header_match is None: + raise ValueError(f"invalid patch hunk header: {line.rstrip()}") + old_count = int(header_match.group("old_count") or "1") + new_count = int(header_match.group("new_count") or "1") + hunk_lines: list[str] = [] + index += 1 + while index < len(lines): + hunk_line = lines[index] + if hunk_line.startswith(("diff --git ", "--- ", "@@ ")): + break + if hunk_line.startswith("@@"): + break + if hunk_line.startswith("\\ No newline at end of file"): + index += 1 + continue + if not hunk_line.startswith((" ", "+", "-")): + raise ValueError(f"invalid patch hunk line: {hunk_line.rstrip()}") + hunk_lines.append(hunk_line) + index += 1 + _validate_hunk_counts(old_count, new_count, hunk_lines) + hunks.append( + WorkspacePatchHunk( + old_start=int(header_match.group("old_start")), + old_count=old_count, + new_start=int(header_match.group("new_start")), + new_count=new_count, + lines=hunk_lines, + ) + ) + if not hunks: + raise ValueError(f"patch for {patch_path} has no hunks") + patches.append(WorkspaceTextPatch(path=patch_path, status=status, hunks=hunks)) + + if not patches: + raise ValueError("patch must contain at least one file change") + return patches + + +def apply_unified_text_patch( + *, + path: str, + patch: WorkspaceTextPatch, + before_text: str | None, +) -> str | None: + before_lines = [] if before_text is None else before_text.splitlines(keepends=True) + output_lines: list[str] = [] + cursor = 0 + for hunk in patch.hunks: + start_index = 0 if hunk.old_start == 0 else hunk.old_start - 1 + if start_index < cursor or start_index > len(before_lines): + raise RuntimeError(f"patch hunk is out of range for {path}") + output_lines.extend(before_lines[cursor:start_index]) + local_index = start_index + for hunk_line in hunk.lines: + prefix = hunk_line[:1] + payload = hunk_line[1:] + if prefix in {" ", "-"}: + if local_index >= len(before_lines): + raise RuntimeError(f"patch context does not match for {path}") + if before_lines[local_index] != payload: + raise RuntimeError(f"patch context does not match for {path}") + if prefix == " ": + output_lines.append(payload) + local_index += 1 + continue + if prefix == "+": + output_lines.append(payload) + continue + raise RuntimeError(f"invalid patch line prefix for {path}") + cursor = local_index + output_lines.extend(before_lines[cursor:]) + after_text = "".join(output_lines) + if patch.status == "deleted": + if after_text != "": + raise RuntimeError(f"delete patch did not remove all content for {path}") + return None + encoded = after_text.encode("utf-8") + if len(encoded) > WORKSPACE_FILE_MAX_BYTES: + raise RuntimeError( + f"patched file {path} exceeds the maximum supported size of " + f"{WORKSPACE_FILE_MAX_BYTES} bytes" + ) + return after_text + + +def workspace_host_path(workspace_dir: Path, workspace_path: str) -> Path: + _, host_path = _workspace_host_path(workspace_dir, workspace_path) + return host_path + + +def _workspace_host_path(workspace_dir: Path, workspace_path: str) -> tuple[str, Path]: + normalized = normalize_workspace_path(workspace_path) + suffix = PurePosixPath(normalized).relative_to(WORKSPACE_ROOT) + host_path = workspace_dir if str(suffix) in {"", "."} else workspace_dir.joinpath(*suffix.parts) + return normalized, host_path + + +def normalize_workspace_path(path: str) -> str: + candidate = path.strip() + if candidate == "": + raise ValueError("workspace path must not be empty") + raw_path = PurePosixPath(candidate) + if any(part == ".." for part in raw_path.parts): + raise ValueError("workspace path must stay inside /workspace") + if not raw_path.is_absolute(): + raw_path = WORKSPACE_ROOT / raw_path + parts = [part for part in raw_path.parts if part not in {"", "."}] + normalized = PurePosixPath("/") / PurePosixPath(*parts) + if normalized == PurePosixPath("/"): + raise ValueError("workspace path must stay inside /workspace") + if normalized.parts[: len(WORKSPACE_ROOT.parts)] != WORKSPACE_ROOT.parts: + raise ValueError("workspace path must stay inside /workspace") + return str(normalized) + + +def _entry_for_host_path(guest_path: str, host_path: Path) -> WorkspaceFileEntry: + try: + stat_result = os.lstat(host_path) + except FileNotFoundError as exc: + raise RuntimeError(f"workspace path does not exist: {guest_path}") from exc + if os.path.islink(host_path): + return WorkspaceFileEntry( + path=guest_path, + artifact_type="symlink", + size_bytes=stat_result.st_size, + link_target=os.readlink(host_path), + ) + if host_path.is_dir(): + return WorkspaceFileEntry( + path=guest_path, + artifact_type="directory", + size_bytes=0, + link_target=None, + ) + if host_path.is_file(): + return WorkspaceFileEntry( + path=guest_path, + artifact_type="file", + size_bytes=stat_result.st_size, + link_target=None, + ) + raise RuntimeError(f"unsupported workspace path type: {guest_path}") + + +def _join_workspace_path(base: str, child_name: str) -> str: + base_path = PurePosixPath(base) + return str(base_path / child_name) if str(base_path) != "/" else f"/{child_name}" + + +def _ensure_no_symlink_parents(workspace_dir: Path, target_path: Path, guest_path: str) -> None: + relative_path = target_path.relative_to(workspace_dir) + current = workspace_dir + for part in relative_path.parts[:-1]: + current = current / part + if current.is_symlink(): + raise RuntimeError( + f"workspace path would traverse through a symlinked parent: {guest_path}" + ) + + +def _validate_max_bytes(max_bytes: int) -> None: + if max_bytes <= 0: + raise ValueError("max_bytes must be positive") + if max_bytes > WORKSPACE_FILE_MAX_BYTES: + raise ValueError( + f"max_bytes must be at most {WORKSPACE_FILE_MAX_BYTES} bytes" + ) + + +def _is_unsupported_patch_prelude(line: str) -> bool: + return line.startswith( + ( + "old mode ", + "new mode ", + "deleted file mode ", + "new file mode ", + "rename from ", + "rename to ", + "copy from ", + "copy to ", + "similarity index ", + "dissimilarity index ", + "GIT binary patch", + "Binary files ", + ) + ) + + +def _parse_patch_label(label: str) -> str | None: + raw = label.split("\t", 1)[0].strip() + if raw == "/dev/null": + return None + if raw.startswith(("a/", "b/")): + raw = raw[2:] + if raw.startswith("/workspace/"): + return normalize_workspace_path(raw) + return normalize_workspace_path(raw) + + +def _validate_hunk_counts(old_count: int, new_count: int, hunk_lines: list[str]) -> None: + old_seen = 0 + new_seen = 0 + for hunk_line in hunk_lines: + prefix = hunk_line[:1] + if prefix in {" ", "-"}: + old_seen += 1 + if prefix in {" ", "+"}: + new_seen += 1 + if old_seen != old_count or new_seen != new_count: + raise ValueError("patch hunk line counts do not match the header") diff --git a/tests/test_api.py b/tests/test_api.py index 3ae927a..7030b8d 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -55,6 +55,10 @@ def test_pyro_create_server_registers_vm_run(tmp_path: Path) -> None: assert "workspace_diff" in tool_names assert "workspace_sync_push" in tool_names assert "workspace_export" in tool_names + assert "workspace_file_list" in tool_names + assert "workspace_file_read" in tool_names + assert "workspace_file_write" in tool_names + assert "workspace_patch_apply" in tool_names assert "workspace_disk_export" in tool_names assert "workspace_disk_list" in tool_names assert "workspace_disk_read" in tool_names @@ -230,6 +234,23 @@ def test_pyro_workspace_methods_delegate_to_manager(tmp_path: Path) -> None: command='sh -lc \'printf "%s\\n" "$API_TOKEN"\'', secret_env={"API_TOKEN": "API_TOKEN"}, ) + listed_files = pyro.list_workspace_files(workspace_id, path="/workspace", recursive=True) + file_read = pyro.read_workspace_file(workspace_id, "note.txt") + file_write = pyro.write_workspace_file( + workspace_id, + "src/app.py", + text="print('hello from file op')\n", + ) + patch_result = pyro.apply_workspace_patch( + workspace_id, + patch=( + "--- a/note.txt\n" + "+++ b/note.txt\n" + "@@ -1 +1 @@\n" + "-ok\n" + "+patched\n" + ), + ) diff_payload = pyro.diff_workspace(workspace_id) snapshot = pyro.create_snapshot(workspace_id, "checkpoint") snapshots = pyro.list_snapshots(workspace_id) @@ -273,13 +294,19 @@ def test_pyro_workspace_methods_delegate_to_manager(tmp_path: Path) -> None: {"name": "FILE_TOKEN", "source_kind": "file"}, ] assert executed["stdout"] == "[REDACTED]\n" + assert any(entry["path"] == "/workspace/note.txt" for entry in listed_files["entries"]) + assert file_read["content"] == "ok\n" + assert file_write["path"] == "/workspace/src/app.py" + assert file_write["bytes_written"] == len("print('hello from file op')\n".encode("utf-8")) + assert patch_result["changed"] is True + assert patch_result["entries"] == [{"path": "/workspace/note.txt", "status": "modified"}] assert created["workspace_seed"]["mode"] == "directory" assert synced["workspace_sync"]["destination"] == "/workspace/subdir" assert diff_payload["changed"] is True assert snapshot["snapshot"]["snapshot_name"] == "checkpoint" assert snapshots["count"] == 2 assert exported["output_path"] == str(export_path) - assert export_path.read_text(encoding="utf-8") == "ok\n" + assert export_path.read_text(encoding="utf-8") == "patched\n" assert shell_output["output"].count("[REDACTED]") >= 1 assert shell_closed["closed"] is True assert service["state"] == "running" @@ -540,6 +567,304 @@ def test_pyro_create_server_workspace_disk_tools_delegate() -> None: ] +def test_pyro_workspace_file_methods_delegate_to_manager() -> None: + calls: list[tuple[str, dict[str, Any]]] = [] + + class StubManager: + def list_workspace_files( + self, + workspace_id: str, + *, + path: str = "/workspace", + recursive: bool = False, + ) -> dict[str, Any]: + calls.append( + ( + "list_workspace_files", + { + "workspace_id": workspace_id, + "path": path, + "recursive": recursive, + }, + ) + ) + return {"workspace_id": workspace_id, "entries": []} + + def read_workspace_file( + self, + workspace_id: str, + path: str, + *, + max_bytes: int = 65536, + ) -> dict[str, Any]: + calls.append( + ( + "read_workspace_file", + { + "workspace_id": workspace_id, + "path": path, + "max_bytes": max_bytes, + }, + ) + ) + return {"workspace_id": workspace_id, "content": "hello\n"} + + def write_workspace_file( + self, + workspace_id: str, + path: str, + *, + text: str, + ) -> dict[str, Any]: + calls.append( + ( + "write_workspace_file", + { + "workspace_id": workspace_id, + "path": path, + "text": text, + }, + ) + ) + return {"workspace_id": workspace_id, "bytes_written": len(text.encode("utf-8"))} + + def apply_workspace_patch( + self, + workspace_id: str, + *, + patch: str, + ) -> dict[str, Any]: + calls.append( + ( + "apply_workspace_patch", + { + "workspace_id": workspace_id, + "patch": patch, + }, + ) + ) + return {"workspace_id": workspace_id, "changed": True} + + pyro = Pyro(manager=cast(Any, StubManager())) + + listed = pyro.list_workspace_files("workspace-123", path="/workspace/src", recursive=True) + read = pyro.read_workspace_file("workspace-123", "note.txt", max_bytes=4096) + written = pyro.write_workspace_file("workspace-123", "src/app.py", text="print('hi')\n") + patched = pyro.apply_workspace_patch( + "workspace-123", + patch="--- a/note.txt\n+++ b/note.txt\n@@ -1 +1 @@\n-old\n+new\n", + ) + + assert listed["entries"] == [] + assert read["content"] == "hello\n" + assert written["bytes_written"] == len("print('hi')\n".encode("utf-8")) + assert patched["changed"] is True + assert calls == [ + ( + "list_workspace_files", + { + "workspace_id": "workspace-123", + "path": "/workspace/src", + "recursive": True, + }, + ), + ( + "read_workspace_file", + { + "workspace_id": "workspace-123", + "path": "note.txt", + "max_bytes": 4096, + }, + ), + ( + "write_workspace_file", + { + "workspace_id": "workspace-123", + "path": "src/app.py", + "text": "print('hi')\n", + }, + ), + ( + "apply_workspace_patch", + { + "workspace_id": "workspace-123", + "patch": "--- a/note.txt\n+++ b/note.txt\n@@ -1 +1 @@\n-old\n+new\n", + }, + ), + ] + + +def test_pyro_create_server_workspace_file_tools_delegate() -> None: + calls: list[tuple[str, dict[str, Any]]] = [] + + class StubManager: + def list_workspace_files( + self, + workspace_id: str, + *, + path: str = "/workspace", + recursive: bool = False, + ) -> dict[str, Any]: + calls.append( + ( + "list_workspace_files", + { + "workspace_id": workspace_id, + "path": path, + "recursive": recursive, + }, + ) + ) + return {"workspace_id": workspace_id, "entries": []} + + def read_workspace_file( + self, + workspace_id: str, + path: str, + *, + max_bytes: int = 65536, + ) -> dict[str, Any]: + calls.append( + ( + "read_workspace_file", + { + "workspace_id": workspace_id, + "path": path, + "max_bytes": max_bytes, + }, + ) + ) + return {"workspace_id": workspace_id, "content": "hello\n"} + + def write_workspace_file( + self, + workspace_id: str, + path: str, + *, + text: str, + ) -> dict[str, Any]: + calls.append( + ( + "write_workspace_file", + { + "workspace_id": workspace_id, + "path": path, + "text": text, + }, + ) + ) + return {"workspace_id": workspace_id, "bytes_written": len(text.encode("utf-8"))} + + def apply_workspace_patch( + self, + workspace_id: str, + *, + patch: str, + ) -> dict[str, Any]: + calls.append( + ( + "apply_workspace_patch", + { + "workspace_id": workspace_id, + "patch": patch, + }, + ) + ) + return {"workspace_id": workspace_id, "changed": True} + + pyro = Pyro(manager=cast(Any, StubManager())) + + def _extract_structured(raw_result: object) -> dict[str, Any]: + if not isinstance(raw_result, tuple) or len(raw_result) != 2: + raise TypeError("unexpected call_tool result shape") + _, structured = raw_result + if not isinstance(structured, dict): + raise TypeError("expected structured dictionary result") + return cast(dict[str, Any], structured) + + async def _run() -> tuple[dict[str, Any], ...]: + server = pyro.create_server() + listed = _extract_structured( + await server.call_tool( + "workspace_file_list", + { + "workspace_id": "workspace-123", + "path": "/workspace/src", + "recursive": True, + }, + ) + ) + read = _extract_structured( + await server.call_tool( + "workspace_file_read", + { + "workspace_id": "workspace-123", + "path": "note.txt", + "max_bytes": 4096, + }, + ) + ) + written = _extract_structured( + await server.call_tool( + "workspace_file_write", + { + "workspace_id": "workspace-123", + "path": "src/app.py", + "text": "print('hi')\n", + }, + ) + ) + patched = _extract_structured( + await server.call_tool( + "workspace_patch_apply", + { + "workspace_id": "workspace-123", + "patch": "--- a/note.txt\n+++ b/note.txt\n@@ -1 +1 @@\n-old\n+new\n", + }, + ) + ) + return listed, read, written, patched + + listed, read, written, patched = asyncio.run(_run()) + assert listed["entries"] == [] + assert read["content"] == "hello\n" + assert written["bytes_written"] == len("print('hi')\n".encode("utf-8")) + assert patched["changed"] is True + assert calls == [ + ( + "list_workspace_files", + { + "workspace_id": "workspace-123", + "path": "/workspace/src", + "recursive": True, + }, + ), + ( + "read_workspace_file", + { + "workspace_id": "workspace-123", + "path": "note.txt", + "max_bytes": 4096, + }, + ), + ( + "write_workspace_file", + { + "workspace_id": "workspace-123", + "path": "src/app.py", + "text": "print('hi')\n", + }, + ), + ( + "apply_workspace_patch", + { + "workspace_id": "workspace-123", + "patch": "--- a/note.txt\n+++ b/note.txt\n@@ -1 +1 @@\n-old\n+new\n", + }, + ), + ] + + def test_pyro_create_server_workspace_status_shell_and_service_delegate() -> None: calls: list[tuple[str, dict[str, Any]]] = [] diff --git a/tests/test_cli.py b/tests/test_cli.py index 872c5a0..7b669af 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -116,6 +116,37 @@ def test_cli_subcommand_help_includes_examples_and_guidance() -> None: assert "--output" in workspace_export_help assert "Export one file or directory from `/workspace`" in workspace_export_help + workspace_file_help = _subparser_choice( + _subparser_choice(parser, "workspace"), "file" + ).format_help() + assert "model-native tree inspection and text edits" in workspace_file_help + assert "pyro workspace file read WORKSPACE_ID src/app.py" in workspace_file_help + + workspace_file_list_help = _subparser_choice( + _subparser_choice(_subparser_choice(parser, "workspace"), "file"), "list" + ).format_help() + assert "--recursive" in workspace_file_list_help + + workspace_file_read_help = _subparser_choice( + _subparser_choice(_subparser_choice(parser, "workspace"), "file"), "read" + ).format_help() + assert "--max-bytes" in workspace_file_read_help + + workspace_file_write_help = _subparser_choice( + _subparser_choice(_subparser_choice(parser, "workspace"), "file"), "write" + ).format_help() + assert "--text" in workspace_file_write_help + + workspace_patch_help = _subparser_choice( + _subparser_choice(parser, "workspace"), "patch" + ).format_help() + assert "Apply add/modify/delete unified text patches" in workspace_patch_help + + workspace_patch_apply_help = _subparser_choice( + _subparser_choice(_subparser_choice(parser, "workspace"), "patch"), "apply" + ).format_help() + assert "--patch" in workspace_patch_apply_help + workspace_stop_help = _subparser_choice( _subparser_choice(parser, "workspace"), "stop" ).format_help() @@ -682,6 +713,169 @@ def test_cli_workspace_export_prints_human_output( assert "artifact_type=file" in output +def test_cli_workspace_file_commands_print_human_and_json( + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + class StubPyro: + def list_workspace_files( + self, + workspace_id: str, + *, + path: str, + recursive: bool, + ) -> dict[str, Any]: + assert workspace_id == "workspace-123" + assert path == "/workspace/src" + assert recursive is True + return { + "workspace_id": workspace_id, + "path": path, + "recursive": recursive, + "entries": [ + { + "path": "/workspace/src/app.py", + "artifact_type": "file", + "size_bytes": 14, + "link_target": None, + } + ], + "execution_mode": "guest_vsock", + } + + def read_workspace_file( + self, + workspace_id: str, + path: str, + *, + max_bytes: int, + ) -> dict[str, Any]: + assert workspace_id == "workspace-123" + assert path == "src/app.py" + assert max_bytes == 4096 + return { + "workspace_id": workspace_id, + "path": "/workspace/src/app.py", + "size_bytes": 14, + "max_bytes": max_bytes, + "content": "print('hi')\n", + "truncated": False, + "execution_mode": "guest_vsock", + } + + def write_workspace_file( + self, + workspace_id: str, + path: str, + *, + text: str, + ) -> dict[str, Any]: + assert workspace_id == "workspace-123" + assert path == "src/app.py" + assert text == "print('hello')\n" + return { + "workspace_id": workspace_id, + "path": "/workspace/src/app.py", + "size_bytes": len(text.encode("utf-8")), + "bytes_written": len(text.encode("utf-8")), + "execution_mode": "guest_vsock", + } + + def apply_workspace_patch( + self, + workspace_id: str, + *, + patch: str, + ) -> dict[str, Any]: + assert workspace_id == "workspace-123" + assert patch.startswith("--- a/src/app.py") + return { + "workspace_id": workspace_id, + "changed": True, + "summary": {"total": 1, "added": 0, "modified": 1, "deleted": 0}, + "entries": [{"path": "/workspace/src/app.py", "status": "modified"}], + "patch": patch, + "execution_mode": "guest_vsock", + } + + class ListParser: + def parse_args(self) -> argparse.Namespace: + return argparse.Namespace( + command="workspace", + workspace_command="file", + workspace_file_command="list", + workspace_id="workspace-123", + path="/workspace/src", + recursive=True, + json=False, + ) + + class ReadParser: + def parse_args(self) -> argparse.Namespace: + return argparse.Namespace( + command="workspace", + workspace_command="file", + workspace_file_command="read", + workspace_id="workspace-123", + path="src/app.py", + max_bytes=4096, + json=True, + ) + + class WriteParser: + def parse_args(self) -> argparse.Namespace: + return argparse.Namespace( + command="workspace", + workspace_command="file", + workspace_file_command="write", + workspace_id="workspace-123", + path="src/app.py", + text="print('hello')\n", + json=False, + ) + + class PatchParser: + def parse_args(self) -> argparse.Namespace: + return argparse.Namespace( + command="workspace", + workspace_command="patch", + workspace_patch_command="apply", + workspace_id="workspace-123", + patch=( + "--- a/src/app.py\n" + "+++ b/src/app.py\n" + "@@ -1 +1 @@\n" + "-print('hi')\n" + "+print('hello')\n" + ), + json=False, + ) + + monkeypatch.setattr(cli, "Pyro", StubPyro) + + monkeypatch.setattr(cli, "_build_parser", lambda: ListParser()) + cli.main() + list_output = capsys.readouterr().out + assert "Workspace path: /workspace/src (recursive=yes)" in list_output + assert "/workspace/src/app.py [file]" in list_output + + monkeypatch.setattr(cli, "_build_parser", lambda: ReadParser()) + cli.main() + read_payload = json.loads(capsys.readouterr().out) + assert read_payload["path"] == "/workspace/src/app.py" + assert read_payload["content"] == "print('hi')\n" + + monkeypatch.setattr(cli, "_build_parser", lambda: WriteParser()) + cli.main() + write_output = capsys.readouterr().out + assert "[workspace-file-write] workspace_id=workspace-123" in write_output + + monkeypatch.setattr(cli, "_build_parser", lambda: PatchParser()) + cli.main() + patch_output = capsys.readouterr().out + assert "[workspace-patch] workspace_id=workspace-123 total=1" in patch_output + + def test_cli_workspace_stop_and_start_print_human_output( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], diff --git a/tests/test_public_contract.py b/tests/test_public_contract.py index ab5ad98..406ce16 100644 --- a/tests/test_public_contract.py +++ b/tests/test_public_contract.py @@ -24,6 +24,12 @@ from pyro_mcp.contract import ( PUBLIC_CLI_WORKSPACE_DISK_READ_FLAGS, PUBLIC_CLI_WORKSPACE_EXEC_FLAGS, PUBLIC_CLI_WORKSPACE_EXPORT_FLAGS, + PUBLIC_CLI_WORKSPACE_FILE_LIST_FLAGS, + PUBLIC_CLI_WORKSPACE_FILE_READ_FLAGS, + PUBLIC_CLI_WORKSPACE_FILE_SUBCOMMANDS, + PUBLIC_CLI_WORKSPACE_FILE_WRITE_FLAGS, + PUBLIC_CLI_WORKSPACE_PATCH_APPLY_FLAGS, + PUBLIC_CLI_WORKSPACE_PATCH_SUBCOMMANDS, PUBLIC_CLI_WORKSPACE_RESET_FLAGS, PUBLIC_CLI_WORKSPACE_SERVICE_LIST_FLAGS, PUBLIC_CLI_WORKSPACE_SERVICE_LOGS_FLAGS, @@ -121,6 +127,36 @@ def test_public_cli_help_lists_commands_and_run_flags() -> None: ).format_help() for flag in PUBLIC_CLI_WORKSPACE_EXPORT_FLAGS: assert flag in workspace_export_help_text + workspace_file_help_text = _subparser_choice( + _subparser_choice(parser, "workspace"), "file" + ).format_help() + for subcommand_name in PUBLIC_CLI_WORKSPACE_FILE_SUBCOMMANDS: + assert subcommand_name in workspace_file_help_text + workspace_file_list_help_text = _subparser_choice( + _subparser_choice(_subparser_choice(parser, "workspace"), "file"), "list" + ).format_help() + for flag in PUBLIC_CLI_WORKSPACE_FILE_LIST_FLAGS: + assert flag in workspace_file_list_help_text + workspace_file_read_help_text = _subparser_choice( + _subparser_choice(_subparser_choice(parser, "workspace"), "file"), "read" + ).format_help() + for flag in PUBLIC_CLI_WORKSPACE_FILE_READ_FLAGS: + assert flag in workspace_file_read_help_text + workspace_file_write_help_text = _subparser_choice( + _subparser_choice(_subparser_choice(parser, "workspace"), "file"), "write" + ).format_help() + for flag in PUBLIC_CLI_WORKSPACE_FILE_WRITE_FLAGS: + assert flag in workspace_file_write_help_text + workspace_patch_help_text = _subparser_choice( + _subparser_choice(parser, "workspace"), "patch" + ).format_help() + for subcommand_name in PUBLIC_CLI_WORKSPACE_PATCH_SUBCOMMANDS: + assert subcommand_name in workspace_patch_help_text + workspace_patch_apply_help_text = _subparser_choice( + _subparser_choice(_subparser_choice(parser, "workspace"), "patch"), "apply" + ).format_help() + for flag in PUBLIC_CLI_WORKSPACE_PATCH_APPLY_FLAGS: + assert flag in workspace_patch_apply_help_text workspace_disk_help_text = _subparser_choice( _subparser_choice(parser, "workspace"), "disk" ).format_help() diff --git a/tests/test_server.py b/tests/test_server.py index 5cfa044..547f0d2 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -36,6 +36,10 @@ def test_create_server_registers_vm_tools(tmp_path: Path) -> None: assert "workspace_stop" in tool_names assert "workspace_diff" in tool_names assert "workspace_export" in tool_names + assert "workspace_file_list" in tool_names + assert "workspace_file_read" in tool_names + assert "workspace_file_write" in tool_names + assert "workspace_patch_apply" in tool_names assert "workspace_disk_export" in tool_names assert "workspace_disk_list" in tool_names assert "workspace_disk_read" in tool_names @@ -247,6 +251,51 @@ def test_workspace_tools_round_trip(tmp_path: Path) -> None: }, ) ) + listed_files = _extract_structured( + await server.call_tool( + "workspace_file_list", + { + "workspace_id": workspace_id, + "path": "/workspace", + "recursive": True, + }, + ) + ) + file_read = _extract_structured( + await server.call_tool( + "workspace_file_read", + { + "workspace_id": workspace_id, + "path": "note.txt", + "max_bytes": 4096, + }, + ) + ) + file_written = _extract_structured( + await server.call_tool( + "workspace_file_write", + { + "workspace_id": workspace_id, + "path": "src/app.py", + "text": "print('hello from file op')\n", + }, + ) + ) + patched = _extract_structured( + await server.call_tool( + "workspace_patch_apply", + { + "workspace_id": workspace_id, + "patch": ( + "--- a/note.txt\n" + "+++ b/note.txt\n" + "@@ -1 +1 @@\n" + "-ok\n" + "+patched\n" + ), + }, + ) + ) diffed = _extract_structured( await server.call_tool("workspace_diff", {"workspace_id": workspace_id}) ) @@ -338,6 +387,10 @@ def test_workspace_tools_round_trip(tmp_path: Path) -> None: created, synced, executed, + listed_files, + file_read, + file_written, + patched, diffed, snapshot, snapshots, @@ -357,6 +410,10 @@ def test_workspace_tools_round_trip(tmp_path: Path) -> None: created, synced, executed, + listed_files, + file_read, + file_written, + patched, diffed, snapshot, snapshots, @@ -379,6 +436,10 @@ def test_workspace_tools_round_trip(tmp_path: Path) -> None: ] assert synced["workspace_sync"]["destination"] == "/workspace/subdir" assert executed["stdout"] == "[REDACTED]\n" + assert any(entry["path"] == "/workspace/note.txt" for entry in listed_files["entries"]) + assert file_read["content"] == "ok\n" + assert file_written["path"] == "/workspace/src/app.py" + assert patched["changed"] is True assert diffed["changed"] is True assert snapshot["snapshot"]["snapshot_name"] == "checkpoint" assert [entry["snapshot_name"] for entry in snapshots["snapshots"]] == [ diff --git a/tests/test_vm_manager.py b/tests/test_vm_manager.py index 0613806..e955ce9 100644 --- a/tests/test_vm_manager.py +++ b/tests/test_vm_manager.py @@ -552,6 +552,91 @@ def test_workspace_diff_and_export_round_trip(tmp_path: Path) -> None: assert logs["count"] == 0 +def test_workspace_file_ops_and_patch_round_trip(tmp_path: Path) -> None: + seed_dir = tmp_path / "seed" + seed_dir.mkdir() + src_dir = seed_dir / "src" + src_dir.mkdir() + (src_dir / "app.py").write_text('print("bug")\n', encoding="utf-8") + + manager = VmManager( + backend_name="mock", + base_dir=tmp_path / "vms", + network_manager=TapNetworkManager(enabled=False), + ) + + workspace_id = str( + manager.create_workspace( + environment="debian:12-base", + allow_host_compat=True, + seed_path=seed_dir, + )["workspace_id"] + ) + + listing = manager.list_workspace_files(workspace_id, path="src", recursive=True) + assert listing["entries"] == [ + { + "path": "/workspace/src/app.py", + "artifact_type": "file", + "size_bytes": 13, + "link_target": None, + } + ] + + read_payload = manager.read_workspace_file(workspace_id, "src/app.py") + assert read_payload["content"] == 'print("bug")\n' + + written = manager.write_workspace_file( + workspace_id, + "src/generated/out.txt", + text="generated\n", + ) + assert written["bytes_written"] == 10 + + patch_payload = manager.apply_workspace_patch( + workspace_id, + patch=( + "--- a/src/app.py\n" + "+++ b/src/app.py\n" + "@@ -1 +1 @@\n" + '-print("bug")\n' + '+print("fixed")\n' + "--- /dev/null\n" + "+++ b/src/new.py\n" + "@@ -0,0 +1 @@\n" + '+print("new")\n' + ), + ) + assert patch_payload["changed"] is True + assert patch_payload["summary"] == { + "total": 2, + "added": 1, + "modified": 1, + "deleted": 0, + } + + executed = manager.exec_workspace( + workspace_id, + command="python3 src/app.py && cat src/new.py && cat src/generated/out.txt", + timeout_seconds=30, + ) + assert executed["stdout"] == 'fixed\nprint("new")\ngenerated\n' + + diff_payload = manager.diff_workspace(workspace_id) + assert diff_payload["changed"] is True + assert diff_payload["summary"]["added"] == 2 + assert diff_payload["summary"]["modified"] == 1 + + output_path = tmp_path / "exported-app.py" + export_payload = manager.export_workspace( + workspace_id, + path="src/app.py", + output_path=output_path, + ) + assert export_payload["artifact_type"] == "file" + assert output_path.read_text(encoding="utf-8") == 'print("fixed")\n' + + def test_workspace_export_directory_uses_exact_output_path(tmp_path: Path) -> None: seed_dir = tmp_path / "seed" nested_dir = seed_dir / "src" diff --git a/tests/test_workspace_files.py b/tests/test_workspace_files.py new file mode 100644 index 0000000..a321456 --- /dev/null +++ b/tests/test_workspace_files.py @@ -0,0 +1,427 @@ +from __future__ import annotations + +import os +from pathlib import Path + +import pytest + +from pyro_mcp.workspace_files import ( + WORKSPACE_FILE_MAX_BYTES, + WORKSPACE_PATCH_MAX_BYTES, + WorkspacePatchHunk, + WorkspaceTextPatch, + apply_unified_text_patch, + delete_workspace_path, + list_workspace_files, + normalize_workspace_path, + parse_unified_text_patch, + read_workspace_file, + workspace_host_path, + write_workspace_file, +) + + +def test_workspace_files_list_read_write_and_delete(tmp_path: Path) -> None: + workspace_dir = tmp_path / "workspace" + workspace_dir.mkdir() + (workspace_dir / "src").mkdir() + (workspace_dir / "src" / "note.txt").write_text("hello\n", encoding="utf-8") + os.symlink("note.txt", workspace_dir / "src" / "note-link") + + listing = list_workspace_files( + workspace_dir, + workspace_path="/workspace/src", + recursive=True, + ) + assert listing.path == "/workspace/src" + assert listing.artifact_type == "directory" + assert [entry.to_payload() for entry in listing.entries] == [ + { + "path": "/workspace/src/note-link", + "artifact_type": "symlink", + "size_bytes": 8, + "link_target": "note.txt", + }, + { + "path": "/workspace/src/note.txt", + "artifact_type": "file", + "size_bytes": 6, + "link_target": None, + }, + ] + + read_payload = read_workspace_file( + workspace_dir, + workspace_path="/workspace/src/note.txt", + ) + assert read_payload.content_bytes == b"hello\n" + + written = write_workspace_file( + workspace_dir, + workspace_path="/workspace/generated/out.txt", + text="generated\n", + ) + assert written.bytes_written == 10 + assert (workspace_dir / "generated" / "out.txt").read_text(encoding="utf-8") == "generated\n" + + deleted = delete_workspace_path( + workspace_dir, + workspace_path="/workspace/generated/out.txt", + ) + assert deleted.deleted is True + assert not (workspace_dir / "generated" / "out.txt").exists() + + +def test_workspace_file_read_and_delete_reject_unsupported_paths(tmp_path: Path) -> None: + workspace_dir = tmp_path / "workspace" + workspace_dir.mkdir() + (workspace_dir / "dir").mkdir() + (workspace_dir / "file.txt").write_text("ok\n", encoding="utf-8") + os.symlink("file.txt", workspace_dir / "link.txt") + + with pytest.raises(RuntimeError, match="regular files"): + read_workspace_file(workspace_dir, workspace_path="/workspace/dir") + with pytest.raises(RuntimeError, match="regular files"): + read_workspace_file(workspace_dir, workspace_path="/workspace/link.txt") + with pytest.raises(RuntimeError, match="does not support directories"): + delete_workspace_path(workspace_dir, workspace_path="/workspace/dir") + + +def test_workspace_file_helpers_cover_single_paths_and_path_validation(tmp_path: Path) -> None: + workspace_dir = tmp_path / "workspace" + workspace_dir.mkdir() + (workspace_dir / "note.txt").write_text("hello\n", encoding="utf-8") + + listing = list_workspace_files( + workspace_dir, + workspace_path="/workspace/note.txt", + recursive=False, + ) + assert listing.path == "/workspace/note.txt" + assert listing.artifact_type == "file" + assert [entry.path for entry in listing.entries] == ["/workspace/note.txt"] + + assert normalize_workspace_path("src/app.py") == "/workspace/src/app.py" + assert workspace_host_path(workspace_dir, "src/app.py") == workspace_dir / "src" / "app.py" + + with pytest.raises(ValueError, match="must not be empty"): + normalize_workspace_path(" ") + with pytest.raises(ValueError, match="must stay inside /workspace"): + normalize_workspace_path("..") + with pytest.raises(ValueError, match="must stay inside /workspace"): + normalize_workspace_path("/tmp/outside") + with pytest.raises(ValueError, match="must stay inside /workspace"): + normalize_workspace_path("/") + + +def test_workspace_file_read_limits_and_write_validation(tmp_path: Path) -> None: + workspace_dir = tmp_path / "workspace" + workspace_dir.mkdir() + (workspace_dir / "big.txt").write_text("hello\n", encoding="utf-8") + (workspace_dir / "dir").mkdir() + real_dir = workspace_dir / "real" + real_dir.mkdir() + os.symlink("real", workspace_dir / "linked") + + with pytest.raises(ValueError, match="max_bytes must be positive"): + read_workspace_file(workspace_dir, workspace_path="/workspace/big.txt", max_bytes=0) + with pytest.raises(ValueError, match="at most"): + read_workspace_file( + workspace_dir, + workspace_path="/workspace/big.txt", + max_bytes=WORKSPACE_FILE_MAX_BYTES + 1, + ) + with pytest.raises(RuntimeError, match="exceeds the maximum supported size"): + read_workspace_file(workspace_dir, workspace_path="/workspace/big.txt", max_bytes=4) + + with pytest.raises(RuntimeError, match="regular file targets"): + write_workspace_file(workspace_dir, workspace_path="/workspace/dir", text="nope\n") + with pytest.raises(RuntimeError, match="symlinked parent"): + write_workspace_file( + workspace_dir, + workspace_path="/workspace/linked/out.txt", + text="nope\n", + ) + with pytest.raises(ValueError, match="at most"): + write_workspace_file( + workspace_dir, + workspace_path="/workspace/huge.txt", + text="x" * (WORKSPACE_FILE_MAX_BYTES + 1), + ) + + +def test_workspace_file_list_rejects_unsupported_filesystem_types(tmp_path: Path) -> None: + workspace_dir = tmp_path / "workspace" + workspace_dir.mkdir() + fifo_path = workspace_dir / "pipe" + os.mkfifo(fifo_path) + + with pytest.raises(RuntimeError, match="unsupported workspace path type"): + list_workspace_files(workspace_dir, workspace_path="/workspace", recursive=True) + + +def test_parse_and_apply_unified_text_patch_round_trip() -> None: + patch_text = """--- a/src/app.py ++++ b/src/app.py +@@ -1,2 +1,3 @@ + print("old") +-print("bug") ++print("fixed") ++print("done") +--- /dev/null ++++ b/src/new.py +@@ -0,0 +1 @@ ++print("new") +--- a/src/remove.py ++++ /dev/null +@@ -1 +0,0 @@ +-print("remove") +""" + patches = parse_unified_text_patch(patch_text) + assert [(item.path, item.status) for item in patches] == [ + ("/workspace/src/app.py", "modified"), + ("/workspace/src/new.py", "added"), + ("/workspace/src/remove.py", "deleted"), + ] + + modified = apply_unified_text_patch( + path="/workspace/src/app.py", + patch=patches[0], + before_text='print("old")\nprint("bug")\n', + ) + added = apply_unified_text_patch( + path="/workspace/src/new.py", + patch=patches[1], + before_text=None, + ) + deleted = apply_unified_text_patch( + path="/workspace/src/remove.py", + patch=patches[2], + before_text='print("remove")\n', + ) + + assert modified == 'print("old")\nprint("fixed")\nprint("done")\n' + assert added == 'print("new")\n' + assert deleted is None + + +def test_parse_unified_text_patch_rejects_unsupported_features() -> None: + with pytest.raises(ValueError, match="unsupported patch feature"): + parse_unified_text_patch( + """diff --git a/file.txt b/file.txt +old mode 100644 +--- a/file.txt ++++ b/file.txt +@@ -1 +1 @@ +-old ++new +""" + ) + + with pytest.raises(ValueError, match="rename and copy patches are not supported"): + parse_unified_text_patch( + """--- a/old.txt ++++ b/new.txt +@@ -1 +1 @@ +-old ++new +""" + ) + + +def test_parse_unified_text_patch_handles_git_headers_and_validation_errors() -> None: + parsed = parse_unified_text_patch( + """ +diff --git a/file.txt b/file.txt +index 1234567..89abcde 100644 +--- /workspace/file.txt ++++ /workspace/file.txt +@@ -1 +1 @@ +-old ++new +\\ No newline at end of file +""" + ) + assert parsed[0].path == "/workspace/file.txt" + + with pytest.raises(ValueError, match="must not be empty"): + parse_unified_text_patch("") + with pytest.raises(ValueError, match="invalid patch header"): + parse_unified_text_patch("oops\n") + with pytest.raises(ValueError, match="missing '\\+\\+\\+' header"): + parse_unified_text_patch("--- a/file.txt\n") + with pytest.raises(ValueError, match="has no hunks"): + parse_unified_text_patch("--- a/file.txt\n+++ b/file.txt\n") + with pytest.raises(ValueError, match="line counts do not match"): + parse_unified_text_patch( + """--- a/file.txt ++++ b/file.txt +@@ -1,2 +1,1 @@ +-old ++new +""" + ) + with pytest.raises(ValueError, match="invalid patch hunk line"): + parse_unified_text_patch( + """--- a/file.txt ++++ b/file.txt +@@ -1 +1 @@ +?bad +""" + ) + + with pytest.raises(ValueError, match="at most"): + parse_unified_text_patch("x" * (WORKSPACE_PATCH_MAX_BYTES + 1)) + with pytest.raises(ValueError, match="patch must target a workspace path"): + parse_unified_text_patch("--- /dev/null\n+++ /dev/null\n") + with pytest.raises(ValueError, match="patch must contain at least one file change"): + parse_unified_text_patch( + """diff --git a/file.txt b/file.txt +index 1234567..89abcde 100644 +""" + ) + with pytest.raises(ValueError, match="unsupported patch feature"): + parse_unified_text_patch( + """--- a/file.txt ++++ b/file.txt +new mode 100644 +""" + ) + with pytest.raises(ValueError, match="invalid patch hunk header"): + parse_unified_text_patch( + """--- a/file.txt ++++ b/file.txt +@@ -1 +1 @@ + old +@@bogus +""" + ) + + parsed = parse_unified_text_patch( + """--- a/file.txt ++++ b/file.txt +index 1234567..89abcde 100644 +@@ -1 +1 @@ +-old ++new +@@ -3 +3 @@ +-before ++after +""" + ) + assert len(parsed[0].hunks) == 2 + + +def test_apply_unified_text_patch_rejects_context_mismatches() -> None: + patch = parse_unified_text_patch( + """--- a/file.txt ++++ b/file.txt +@@ -1 +1 @@ +-before ++after +""" + )[0] + + with pytest.raises(RuntimeError, match="patch context does not match"): + apply_unified_text_patch( + path="/workspace/file.txt", + patch=patch, + before_text="different\n", + ) + with pytest.raises(RuntimeError, match="patch context does not match"): + apply_unified_text_patch( + path="/workspace/file.txt", + patch=WorkspaceTextPatch( + path="/workspace/file.txt", + status="modified", + hunks=[ + WorkspacePatchHunk( + old_start=1, + old_count=1, + new_start=1, + new_count=1, + lines=[" same\n"], + ) + ], + ), + before_text="", + ) + + +def test_apply_unified_text_patch_rejects_range_prefix_delete_and_size_errors() -> None: + with pytest.raises(RuntimeError, match="out of range"): + apply_unified_text_patch( + path="/workspace/file.txt", + patch=WorkspaceTextPatch( + path="/workspace/file.txt", + status="modified", + hunks=[ + WorkspacePatchHunk( + old_start=3, + old_count=1, + new_start=3, + new_count=1, + lines=["-old\n", "+new\n"], + ) + ], + ), + before_text="old\n", + ) + + with pytest.raises(RuntimeError, match="invalid patch line prefix"): + apply_unified_text_patch( + path="/workspace/file.txt", + patch=WorkspaceTextPatch( + path="/workspace/file.txt", + status="modified", + hunks=[ + WorkspacePatchHunk( + old_start=1, + old_count=0, + new_start=1, + new_count=0, + lines=["?bad\n"], + ) + ], + ), + before_text="", + ) + + with pytest.raises(RuntimeError, match="delete patch did not remove all content"): + apply_unified_text_patch( + path="/workspace/file.txt", + patch=WorkspaceTextPatch( + path="/workspace/file.txt", + status="deleted", + hunks=[ + WorkspacePatchHunk( + old_start=1, + old_count=1, + new_start=1, + new_count=0, + lines=["-first\n"], + ) + ], + ), + before_text="first\nsecond\n", + ) + + huge_payload = "x" * (WORKSPACE_FILE_MAX_BYTES + 1) + with pytest.raises(RuntimeError, match="exceeds the maximum supported size"): + apply_unified_text_patch( + path="/workspace/file.txt", + patch=WorkspaceTextPatch( + path="/workspace/file.txt", + status="added", + hunks=[ + WorkspacePatchHunk( + old_start=0, + old_count=0, + new_start=1, + new_count=1, + lines=[f"+{huge_payload}"], + ) + ], + ), + before_text=None, + ) diff --git a/uv.lock b/uv.lock index 681046b..b8da050 100644 --- a/uv.lock +++ b/uv.lock @@ -706,7 +706,7 @@ crypto = [ [[package]] name = "pyro-mcp" -version = "3.1.0" +version = "3.2.0" source = { editable = "." } dependencies = [ { name = "mcp" },