From 84a7e18d4dfe921e0f94e30e82fc5275923000e9 Mon Sep 17 00:00:00 2001 From: Thales Maciel Date: Thu, 12 Mar 2026 03:15:45 -0300 Subject: [PATCH] Add workspace export and baseline diff Complete the 2.6.0 workspace milestone by adding explicit host-out export and immutable-baseline diff across the CLI, Python SDK, and MCP server. Capture a baseline archive at workspace creation, export live /workspace paths through the guest agent, and compute structured whole-workspace diffs on the host without affecting command logs or shell state. The docs, roadmap, bundled guest agent, and workspace example now reflect the new create -> sync -> diff -> export workflow. Validation: uv lock, UV_CACHE_DIR=.uv-cache make check, UV_CACHE_DIR=.uv-cache make dist-check, and a real guest-backed Firecracker smoke covering workspace create, sync push, diff, export, and delete. --- CHANGELOG.md | 10 + README.md | 20 +- docs/first-run.md | 19 +- docs/install.md | 14 +- docs/integrations.md | 5 + docs/public-contract.md | 14 + docs/roadmap/task-workspace-ga.md | 9 +- ...6.0-structured-export-and-baseline-diff.md | 2 + examples/python_workspace.py | 6 + pyproject.toml | 2 +- .../linux-x86_64/guest/pyro_guest_agent.py | 69 ++ src/pyro_mcp/api.py | 30 + src/pyro_mcp/cli.py | 131 ++++ src/pyro_mcp/contract.py | 8 + .../linux-x86_64/guest/pyro_guest_agent.py | 69 ++ .../runtime_bundle/linux-x86_64/manifest.json | 2 +- src/pyro_mcp/vm_environments.py | 2 +- src/pyro_mcp/vm_guest.py | 54 ++ src/pyro_mcp/vm_manager.py | 619 +++++++++++++++++- tests/test_api.py | 8 + tests/test_cli.py | 108 +++ tests/test_public_contract.py | 12 + tests/test_server.py | 38 +- tests/test_vm_guest.py | 48 ++ tests/test_vm_manager.py | 234 +++++++ uv.lock | 2 +- 26 files changed, 1492 insertions(+), 43 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c0269c6..f8ffbcf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,16 @@ All notable user-visible changes to `pyro-mcp` are documented here. +## 2.6.0 + +- Added explicit host-out workspace operations across the CLI, Python SDK, and MCP server with + `pyro workspace export`, `Pyro.export_workspace()`, `pyro workspace diff`, + `Pyro.diff_workspace()`, and the matching `workspace_export` / `workspace_diff` MCP tools. +- Captured an immutable create-time baseline for every new workspace so later `workspace diff` + compares the live `/workspace` tree against that original seed state. +- Kept export and diff separate from command execution and shell state so workspaces can mutate, + be inspected, and copy results back to the host without affecting command logs or shell sessions. + ## 2.5.0 - Added persistent PTY shell sessions across the CLI, Python SDK, and MCP server with diff --git a/README.md b/README.md index 11fac75..7f4f812 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ It exposes the same runtime in three public forms: - First run transcript: [docs/first-run.md](docs/first-run.md) - Terminal walkthrough GIF: [docs/assets/first-run.gif](docs/assets/first-run.gif) - PyPI package: [pypi.org/project/pyro-mcp](https://pypi.org/project/pyro-mcp/) -- What's new in 2.5.0: [CHANGELOG.md#250](CHANGELOG.md#250) +- What's new in 2.6.0: [CHANGELOG.md#260](CHANGELOG.md#260) - Host requirements: [docs/host-requirements.md](docs/host-requirements.md) - Integration targets: [docs/integrations.md](docs/integrations.md) - Public contract: [docs/public-contract.md](docs/public-contract.md) @@ -57,7 +57,7 @@ What success looks like: ```bash Platform: linux-x86_64 Runtime: PASS -Catalog version: 2.5.0 +Catalog version: 2.6.0 ... [pull] phase=install environment=debian:12 [pull] phase=ready environment=debian:12 @@ -78,6 +78,8 @@ After the quickstart works: - prove the full one-shot lifecycle with `uvx --from pyro-mcp pyro demo` - create a persistent workspace with `uvx --from pyro-mcp pyro workspace create debian:12 --seed-path ./repo` - update a live workspace from the host with `uvx --from pyro-mcp pyro workspace sync push WORKSPACE_ID ./changes` +- diff the live workspace against its create-time baseline with `uvx --from pyro-mcp pyro workspace diff WORKSPACE_ID` +- export a changed file or directory with `uvx --from pyro-mcp pyro workspace export WORKSPACE_ID note.txt --output ./note.txt` - open a persistent interactive shell with `uvx --from pyro-mcp pyro workspace shell open WORKSPACE_ID` - move to Python or MCP via [docs/integrations.md](docs/integrations.md) @@ -132,7 +134,7 @@ uvx --from pyro-mcp pyro env list Expected output: ```bash -Catalog version: 2.5.0 +Catalog version: 2.6.0 debian:12 [installed|not installed] Debian 12 environment with Git preinstalled for common agent workflows. debian:12-base [installed|not installed] Minimal Debian 12 environment for shell and core Unix tooling. debian:12-build [installed|not installed] Debian 12 environment with Git and common build tools preinstalled. @@ -210,6 +212,8 @@ longer-term interaction model. pyro workspace create debian:12 --seed-path ./repo pyro workspace sync push WORKSPACE_ID ./changes --dest src pyro workspace exec WORKSPACE_ID -- cat src/note.txt +pyro workspace diff WORKSPACE_ID +pyro workspace export WORKSPACE_ID src/note.txt --output ./note.txt pyro workspace shell open WORKSPACE_ID pyro workspace shell write WORKSPACE_ID SHELL_ID --input 'pwd' pyro workspace shell read WORKSPACE_ID SHELL_ID @@ -222,10 +226,12 @@ Persistent workspaces start in `/workspace` and keep command history until you d machine consumption, add `--json` and read the returned `workspace_id`. Use `--seed-path` when you want the workspace to start from a host directory or a local `.tar` / `.tar.gz` / `.tgz` archive instead of an empty workspace. Use `pyro workspace sync push` when you want to import -later host-side changes into a started workspace. Sync is non-atomic in `2.5.0`; if it fails -partway through, delete and recreate the workspace from its seed. Use `pyro workspace exec` for -one-shot non-interactive commands inside a live workspace, and `pyro workspace shell *` when you -need a persistent PTY session that keeps interactive shell state between calls. +later host-side changes into a started workspace. Sync is non-atomic in `2.6.0`; if it fails +partway through, delete and recreate the workspace from its seed. Use `pyro workspace diff` to +compare the live `/workspace` tree to its immutable create-time baseline, and `pyro workspace export` +to copy one changed file or directory back to the host. Use `pyro workspace exec` for one-shot +non-interactive commands inside a live workspace, and `pyro workspace shell *` when you need a +persistent PTY session that keeps interactive shell state between calls. ## Public Interfaces diff --git a/docs/first-run.md b/docs/first-run.md index b0d0e73..ed2a0a7 100644 --- a/docs/first-run.md +++ b/docs/first-run.md @@ -22,7 +22,7 @@ Networking: tun=yes ip_forward=yes ```bash $ uvx --from pyro-mcp pyro env list -Catalog version: 2.5.0 +Catalog version: 2.6.0 debian:12 [installed|not installed] Debian 12 environment with Git preinstalled for common agent workflows. debian:12-base [installed|not installed] Minimal Debian 12 environment for shell and core Unix tooling. debian:12-build [installed|not installed] Debian 12 environment with Git and common build tools preinstalled. @@ -72,6 +72,8 @@ deterministic structured result. $ uvx --from pyro-mcp pyro demo $ uvx --from pyro-mcp pyro workspace create debian:12 --seed-path ./repo $ uvx --from pyro-mcp pyro workspace sync push WORKSPACE_ID ./changes +$ uvx --from pyro-mcp pyro workspace diff WORKSPACE_ID +$ uvx --from pyro-mcp pyro workspace export WORKSPACE_ID note.txt --output ./note.txt $ uvx --from pyro-mcp pyro workspace shell open WORKSPACE_ID $ uvx --from pyro-mcp pyro mcp serve ``` @@ -98,6 +100,15 @@ $ uvx --from pyro-mcp pyro workspace exec WORKSPACE_ID -- cat src/note.txt hello from synced workspace [workspace-exec] workspace_id=... sequence=1 cwd=/workspace execution_mode=guest_vsock exit_code=0 duration_ms=... +$ uvx --from pyro-mcp pyro workspace diff WORKSPACE_ID +[workspace-diff] workspace_id=... total=... added=... modified=... deleted=... type_changed=... text_patched=... non_text=... +--- a/src/note.txt ++++ b/src/note.txt +@@ ... + +$ uvx --from pyro-mcp pyro workspace export WORKSPACE_ID src/note.txt --output ./note.txt +[workspace-export] workspace_id=... workspace_path=/workspace/src/note.txt output_path=... artifact_type=file entry_count=... bytes_written=... execution_mode=guest_vsock + $ uvx --from pyro-mcp pyro workspace shell open WORKSPACE_ID [workspace-shell-open] workspace_id=... shell_id=... state=running cwd=/workspace cols=120 rows=30 execution_mode=guest_vsock @@ -112,8 +123,10 @@ $ uvx --from pyro-mcp pyro workspace shell read WORKSPACE_ID SHELL_ID Use `--seed-path` when the workspace should start from a host directory or a local `.tar` / `.tar.gz` / `.tgz` archive instead of an empty `/workspace`. Use `pyro workspace sync push` when you need to import later host-side changes into a started -workspace. Sync is non-atomic in `2.5.0`; if it fails partway through, delete and recreate the -workspace. Use `pyro workspace exec` for one-shot commands and `pyro workspace shell *` when you +workspace. Sync is non-atomic in `2.6.0`; if it fails partway through, delete and recreate the +workspace. Use `pyro workspace diff` to compare the current `/workspace` tree to its immutable +create-time baseline, and `pyro workspace export` to copy one changed file or directory back to +the host. Use `pyro workspace exec` for one-shot commands and `pyro workspace shell *` when you need a persistent interactive PTY session in that same workspace. Example output: diff --git a/docs/install.md b/docs/install.md index 4e7300b..847291b 100644 --- a/docs/install.md +++ b/docs/install.md @@ -83,7 +83,7 @@ uvx --from pyro-mcp pyro env list Expected output: ```bash -Catalog version: 2.5.0 +Catalog version: 2.6.0 debian:12 [installed|not installed] Debian 12 environment with Git preinstalled for common agent workflows. debian:12-base [installed|not installed] Minimal Debian 12 environment for shell and core Unix tooling. debian:12-build [installed|not installed] Debian 12 environment with Git and common build tools preinstalled. @@ -176,6 +176,8 @@ After the CLI path works, you can move on to: - persistent workspaces: `pyro workspace create debian:12 --seed-path ./repo` - live workspace updates: `pyro workspace sync push WORKSPACE_ID ./changes` +- baseline diff: `pyro workspace diff WORKSPACE_ID` +- host export: `pyro workspace export WORKSPACE_ID note.txt --output ./note.txt` - interactive shells: `pyro workspace shell open WORKSPACE_ID` - MCP: `pyro mcp serve` - Python SDK: `from pyro_mcp import Pyro` @@ -189,6 +191,8 @@ Use `pyro workspace ...` when you need repeated commands in one sandbox instead pyro workspace create debian:12 --seed-path ./repo pyro workspace sync push WORKSPACE_ID ./changes --dest src pyro workspace exec WORKSPACE_ID -- cat src/note.txt +pyro workspace diff WORKSPACE_ID +pyro workspace export WORKSPACE_ID src/note.txt --output ./note.txt pyro workspace shell open WORKSPACE_ID pyro workspace shell write WORKSPACE_ID SHELL_ID --input 'pwd' pyro workspace shell read WORKSPACE_ID SHELL_ID @@ -201,9 +205,11 @@ Workspace commands default to the persistent `/workspace` directory inside the g the identifier programmatically, use `--json` and read the `workspace_id` field. Use `--seed-path` when the workspace should start from a host directory or a local `.tar` / `.tar.gz` / `.tgz` archive. Use `pyro workspace sync push` for later host-side changes to a started workspace. Sync -is non-atomic in `2.5.0`; if it fails partway through, delete and recreate the workspace from its -seed. Use `pyro workspace exec` for one-shot commands and `pyro workspace shell *` when you need -an interactive PTY that survives across separate calls. +is non-atomic in `2.6.0`; if it fails partway through, delete and recreate the workspace from its +seed. Use `pyro workspace diff` to compare the current workspace tree to its immutable create-time +baseline, and `pyro workspace export` to copy one changed file or directory back to the host. Use +`pyro workspace exec` for one-shot commands and `pyro workspace shell *` when you need an +interactive PTY that survives across separate calls. ## Contributor Clone diff --git a/docs/integrations.md b/docs/integrations.md index 9be184d..e66791e 100644 --- a/docs/integrations.md +++ b/docs/integrations.md @@ -31,6 +31,7 @@ Recommended surface: - `vm_run` - `workspace_create(seed_path=...)` + `workspace_sync_push` + `workspace_exec` when the agent needs persistent workspace state +- `workspace_diff` + `workspace_export` when the agent needs explicit baseline comparison or host-out file transfer - `open_shell` / `read_shell` / `write_shell` when the agent needs an interactive PTY inside that workspace Canonical example: @@ -67,6 +68,7 @@ Recommended default: - `Pyro.run_in_vm(...)` - `Pyro.create_workspace(seed_path=...)` + `Pyro.push_workspace_sync(...)` + `Pyro.exec_workspace(...)` when repeated workspace commands are required +- `Pyro.diff_workspace(...)` + `Pyro.export_workspace(...)` when the agent needs baseline comparison or host-out file transfer - `Pyro.open_shell(...)` + `Pyro.write_shell(...)` + `Pyro.read_shell(...)` when the agent needs an interactive PTY inside the workspace Lifecycle note: @@ -78,6 +80,9 @@ Lifecycle note: `/workspace` that starts from host content - use `push_workspace_sync(...)` when later host-side changes need to be imported into that running workspace without recreating it +- use `diff_workspace(...)` when the agent needs a structured comparison against the immutable + create-time baseline +- use `export_workspace(...)` when the agent needs one file or directory copied back to the host - use `open_shell(...)` when the agent needs interactive shell state instead of one-shot execs Examples: diff --git a/docs/public-contract.md b/docs/public-contract.md index 65e5b74..f62ce2e 100644 --- a/docs/public-contract.md +++ b/docs/public-contract.md @@ -22,6 +22,8 @@ Top-level commands: - `pyro workspace create` - `pyro workspace sync push` - `pyro workspace exec` +- `pyro workspace export` +- `pyro workspace diff` - `pyro workspace shell open` - `pyro workspace shell read` - `pyro workspace shell write` @@ -54,6 +56,8 @@ Behavioral guarantees: - `pyro workspace create` auto-starts a persistent workspace. - `pyro workspace create --seed-path PATH` seeds `/workspace` from a host directory or a local `.tar` / `.tar.gz` / `.tgz` archive before the workspace is returned. - `pyro workspace sync push WORKSPACE_ID SOURCE_PATH [--dest WORKSPACE_PATH]` imports later host-side directory or archive content into a started workspace. +- `pyro workspace export WORKSPACE_ID PATH --output HOST_PATH` exports one file or directory from `/workspace` back to the host. +- `pyro workspace diff WORKSPACE_ID` compares the current `/workspace` tree to the immutable create-time baseline. - `pyro workspace exec` runs in the persistent `/workspace` for that workspace and does not auto-clean. - `pyro workspace shell *` manages persistent PTY sessions inside a started workspace. - `pyro workspace logs` returns persisted command history for that workspace until `pyro workspace delete`. @@ -76,6 +80,8 @@ Supported public entrypoints: - `Pyro.create_vm(...)` - `Pyro.create_workspace(...)` - `Pyro.push_workspace_sync(workspace_id, source_path, *, dest="/workspace")` +- `Pyro.export_workspace(workspace_id, path, *, output_path)` +- `Pyro.diff_workspace(workspace_id)` - `Pyro.open_shell(workspace_id, *, cwd="/workspace", cols=120, rows=30)` - `Pyro.read_shell(workspace_id, shell_id, *, cursor=0, max_chars=65536)` - `Pyro.write_shell(workspace_id, shell_id, *, input, append_newline=True)` @@ -104,6 +110,8 @@ Stable public method names: - `create_vm(...)` - `create_workspace(...)` - `push_workspace_sync(workspace_id, source_path, *, dest="/workspace")` +- `export_workspace(workspace_id, path, *, output_path)` +- `diff_workspace(workspace_id)` - `open_shell(workspace_id, *, cwd="/workspace", cols=120, rows=30)` - `read_shell(workspace_id, shell_id, *, cursor=0, max_chars=65536)` - `write_shell(workspace_id, shell_id, *, input, append_newline=True)` @@ -130,6 +138,8 @@ Behavioral defaults: - `allow_host_compat` defaults to `False` on `create_workspace(...)`. - `Pyro.create_workspace(..., seed_path=...)` seeds `/workspace` from a host directory or a local `.tar` / `.tar.gz` / `.tgz` archive before the workspace is returned. - `Pyro.push_workspace_sync(...)` imports later host-side directory or archive content into a started workspace. +- `Pyro.export_workspace(...)` exports one file or directory from `/workspace` to an explicit host path. +- `Pyro.diff_workspace(...)` compares the current `/workspace` tree to the immutable create-time baseline. - `Pyro.exec_vm(...)` runs one command and auto-cleans that VM after the exec completes. - `Pyro.exec_workspace(...)` runs one command in the persistent workspace and leaves it alive. - `Pyro.open_shell(...)` opens a persistent PTY shell attached to one started workspace. @@ -159,6 +169,8 @@ Persistent workspace tools: - `workspace_create` - `workspace_sync_push` - `workspace_exec` +- `workspace_export` +- `workspace_diff` - `shell_open` - `shell_read` - `shell_write` @@ -176,6 +188,8 @@ Behavioral defaults: - `workspace_create` exposes `allow_host_compat`, which defaults to `false`. - `workspace_create` accepts optional `seed_path` and seeds `/workspace` from a host directory or a local `.tar` / `.tar.gz` / `.tgz` archive before the workspace is returned. - `workspace_sync_push` imports later host-side directory or archive content into a started workspace, with an optional `dest` under `/workspace`. +- `workspace_export` exports one file or directory from `/workspace` to an explicit host path. +- `workspace_diff` compares the current `/workspace` tree to the immutable create-time baseline. - `vm_exec` runs one command and auto-cleans that VM after the exec completes. - `workspace_exec` runs one command in a persistent `/workspace` and leaves the workspace alive. - `shell_open`, `shell_read`, `shell_write`, `shell_signal`, and `shell_close` manage persistent PTY shells inside a started workspace. diff --git a/docs/roadmap/task-workspace-ga.md b/docs/roadmap/task-workspace-ga.md index e878c10..631f2af 100644 --- a/docs/roadmap/task-workspace-ga.md +++ b/docs/roadmap/task-workspace-ga.md @@ -2,12 +2,13 @@ This roadmap turns the agent-workspace vision into release-sized milestones. -Current baseline is `2.5.0`: +Current baseline is `2.6.0`: - workspace persistence exists and the public surface is now workspace-first -- host crossing currently covers create-time seeding and later sync push +- host crossing currently covers create-time seeding, later sync push, and explicit export - persistent PTY shell sessions exist alongside one-shot `workspace exec` -- no export, diff, service, snapshot, reset, or secrets contract exists yet +- immutable create-time baselines now power whole-workspace diff +- no service, snapshot, reset, or secrets contract exists yet Locked roadmap decisions: @@ -28,7 +29,7 @@ also expected to update: 1. [`2.4.0` Workspace Contract Pivot](task-workspace-ga/2.4.0-workspace-contract-pivot.md) - Done 2. [`2.5.0` PTY Shell Sessions](task-workspace-ga/2.5.0-pty-shell-sessions.md) - Done -3. [`2.6.0` Structured Export And Baseline Diff](task-workspace-ga/2.6.0-structured-export-and-baseline-diff.md) +3. [`2.6.0` Structured Export And Baseline Diff](task-workspace-ga/2.6.0-structured-export-and-baseline-diff.md) - Done 4. [`2.7.0` Service Lifecycle And Typed Readiness](task-workspace-ga/2.7.0-service-lifecycle-and-typed-readiness.md) 5. [`2.8.0` Named Snapshots And Reset](task-workspace-ga/2.8.0-named-snapshots-and-reset.md) 6. [`2.9.0` Secrets](task-workspace-ga/2.9.0-secrets.md) diff --git a/docs/roadmap/task-workspace-ga/2.6.0-structured-export-and-baseline-diff.md b/docs/roadmap/task-workspace-ga/2.6.0-structured-export-and-baseline-diff.md index 9a51d55..4ee66bb 100644 --- a/docs/roadmap/task-workspace-ga/2.6.0-structured-export-and-baseline-diff.md +++ b/docs/roadmap/task-workspace-ga/2.6.0-structured-export-and-baseline-diff.md @@ -1,5 +1,7 @@ # `2.6.0` Structured Export And Baseline Diff +Status: Done + ## Goal Complete the next explicit host-crossing step by letting a workspace export diff --git a/examples/python_workspace.py b/examples/python_workspace.py index c1a3f9e..141fc8f 100644 --- a/examples/python_workspace.py +++ b/examples/python_workspace.py @@ -11,6 +11,7 @@ def main() -> None: with ( tempfile.TemporaryDirectory(prefix="pyro-workspace-seed-") as seed_dir, tempfile.TemporaryDirectory(prefix="pyro-workspace-sync-") as sync_dir, + tempfile.TemporaryDirectory(prefix="pyro-workspace-export-") as export_dir, ): Path(seed_dir, "note.txt").write_text("hello from seed\n", encoding="utf-8") Path(sync_dir, "note.txt").write_text("hello from sync\n", encoding="utf-8") @@ -20,6 +21,11 @@ def main() -> None: pyro.push_workspace_sync(workspace_id, sync_dir) result = pyro.exec_workspace(workspace_id, command="cat note.txt") print(result["stdout"], end="") + diff_result = pyro.diff_workspace(workspace_id) + print(f"changed={diff_result['changed']} total={diff_result['summary']['total']}") + exported_path = Path(export_dir, "note.txt") + pyro.export_workspace(workspace_id, "note.txt", output_path=exported_path) + print(exported_path.read_text(encoding="utf-8"), end="") logs = pyro.logs_workspace(workspace_id) print(f"workspace_id={workspace_id} command_count={logs['count']}") finally: diff --git a/pyproject.toml b/pyproject.toml index 2ad5fb0..e88ae09 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "pyro-mcp" -version = "2.5.0" +version = "2.6.0" description = "Ephemeral Firecracker sandboxes with curated environments, persistent workspaces, and MCP tools." readme = "README.md" license = { file = "LICENSE" } diff --git a/runtime_sources/linux-x86_64/guest/pyro_guest_agent.py b/runtime_sources/linux-x86_64/guest/pyro_guest_agent.py index 03ff492..91a9103 100644 --- a/runtime_sources/linux-x86_64/guest/pyro_guest_agent.py +++ b/runtime_sources/linux-x86_64/guest/pyro_guest_agent.py @@ -14,6 +14,7 @@ import socket import struct import subprocess import tarfile +import tempfile import termios import threading import time @@ -180,6 +181,54 @@ def _extract_archive(payload: bytes, destination: str) -> dict[str, Any]: } +def _inspect_archive(archive_path: Path) -> tuple[int, int]: + entry_count = 0 + bytes_written = 0 + with tarfile.open(archive_path, "r:*") as archive: + for member in archive.getmembers(): + entry_count += 1 + if member.isfile(): + bytes_written += member.size + return entry_count, bytes_written + + +def _prepare_export_archive(path: str) -> dict[str, Any]: + normalized_path, source_path = _normalize_destination(path) + if not source_path.exists() and not source_path.is_symlink(): + raise RuntimeError(f"workspace path does not exist: {normalized_path}") + if source_path.is_symlink(): + artifact_type = "symlink" + elif source_path.is_file(): + artifact_type = "file" + elif source_path.is_dir(): + artifact_type = "directory" + else: + raise RuntimeError(f"unsupported workspace path type: {normalized_path}") + + with tempfile.NamedTemporaryFile(prefix="pyro-export-", suffix=".tar", delete=False) as handle: + archive_path = Path(handle.name) + try: + with tarfile.open(archive_path, "w") as archive: + archive.dereference = False + if artifact_type == "directory": + for child in sorted(source_path.iterdir(), key=lambda item: item.name): + archive.add(child, arcname=child.name, recursive=True) + else: + archive.add(source_path, arcname=source_path.name, recursive=False) + entry_count, bytes_written = _inspect_archive(archive_path) + return { + "workspace_path": str(normalized_path), + "artifact_type": artifact_type, + "archive_path": archive_path, + "archive_size": archive_path.stat().st_size, + "entry_count": entry_count, + "bytes_written": bytes_written, + } + except Exception: + archive_path.unlink(missing_ok=True) + raise + + def _run_command(command: str, timeout_seconds: int) -> dict[str, Any]: started = time.monotonic() try: @@ -533,6 +582,26 @@ def main() -> None: with conn: try: request = _read_request(conn) + if str(request.get("action", "")) == "export_archive": + export = _prepare_export_archive(str(request.get("path", "/workspace"))) + try: + header = { + "workspace_path": export["workspace_path"], + "artifact_type": export["artifact_type"], + "archive_size": export["archive_size"], + "entry_count": export["entry_count"], + "bytes_written": export["bytes_written"], + } + conn.sendall((json.dumps(header) + "\n").encode("utf-8")) + with Path(str(export["archive_path"])).open("rb") as handle: + while True: + chunk = handle.read(BUFFER_SIZE) + if chunk == b"": + break + conn.sendall(chunk) + finally: + Path(str(export["archive_path"])).unlink(missing_ok=True) + continue response = _dispatch(request, conn) except Exception as exc: # noqa: BLE001 response = {"error": str(exc)} diff --git a/src/pyro_mcp/api.py b/src/pyro_mcp/api.py index 568d089..0653759 100644 --- a/src/pyro_mcp/api.py +++ b/src/pyro_mcp/api.py @@ -130,6 +130,22 @@ class Pyro: def logs_workspace(self, workspace_id: str) -> dict[str, Any]: return self._manager.logs_workspace(workspace_id) + def export_workspace( + self, + workspace_id: str, + path: str, + *, + output_path: str | Path, + ) -> dict[str, Any]: + return self._manager.export_workspace( + workspace_id, + path=path, + output_path=output_path, + ) + + def diff_workspace(self, workspace_id: str) -> dict[str, Any]: + return self._manager.diff_workspace(workspace_id) + def open_shell( self, workspace_id: str, @@ -370,6 +386,20 @@ class Pyro: """Return persisted command history for one workspace.""" return self.logs_workspace(workspace_id) + @server.tool() + async def workspace_export( + workspace_id: str, + path: str, + output_path: str, + ) -> dict[str, Any]: + """Export one file or directory from `/workspace` back to the host.""" + return self.export_workspace(workspace_id, path, output_path=output_path) + + @server.tool() + async def workspace_diff(workspace_id: str) -> dict[str, Any]: + """Compare `/workspace` to the immutable create-time baseline.""" + return self.diff_workspace(workspace_id) + @server.tool() async def shell_open( workspace_id: str, diff --git a/src/pyro_mcp/cli.py b/src/pyro_mcp/cli.py index 38a592b..f0e5aa3 100644 --- a/src/pyro_mcp/cli.py +++ b/src/pyro_mcp/cli.py @@ -215,6 +215,41 @@ def _print_workspace_sync_human(payload: dict[str, Any]) -> None: ) +def _print_workspace_export_human(payload: dict[str, Any]) -> None: + print( + "[workspace-export] " + f"workspace_id={str(payload.get('workspace_id', 'unknown'))} " + f"workspace_path={str(payload.get('workspace_path', WORKSPACE_GUEST_PATH))} " + f"output_path={str(payload.get('output_path', 'unknown'))} " + f"artifact_type={str(payload.get('artifact_type', 'unknown'))} " + f"entry_count={int(payload.get('entry_count', 0))} " + f"bytes_written={int(payload.get('bytes_written', 0))} " + f"execution_mode={str(payload.get('execution_mode', 'unknown'))}" + ) + + +def _print_workspace_diff_human(payload: dict[str, Any]) -> None: + if not bool(payload.get("changed")): + print("No workspace changes.") + return + summary = payload.get("summary") + if isinstance(summary, dict): + print( + "[workspace-diff] " + f"workspace_id={str(payload.get('workspace_id', 'unknown'))} " + f"total={int(summary.get('total', 0))} " + f"added={int(summary.get('added', 0))} " + f"modified={int(summary.get('modified', 0))} " + f"deleted={int(summary.get('deleted', 0))} " + f"type_changed={int(summary.get('type_changed', 0))} " + f"text_patched={int(summary.get('text_patched', 0))} " + f"non_text={int(summary.get('non_text', 0))}" + ) + patch = str(payload.get("patch", "")) + if patch != "": + print(patch, end="" if patch.endswith("\n") else "\n") + + def _print_workspace_logs_human(payload: dict[str, Any]) -> None: entries = payload.get("entries") if not isinstance(entries, list) or not entries: @@ -301,6 +336,8 @@ def _build_parser() -> argparse.ArgumentParser: Need repeated commands in one workspace after that? pyro workspace create debian:12 --seed-path ./repo pyro workspace sync push WORKSPACE_ID ./changes + pyro workspace diff WORKSPACE_ID + pyro workspace export WORKSPACE_ID note.txt --output ./note.txt pyro workspace shell open WORKSPACE_ID Use `pyro mcp serve` only after the CLI validation path works. @@ -509,6 +546,8 @@ def _build_parser() -> argparse.ArgumentParser: pyro workspace create debian:12 --seed-path ./repo pyro workspace sync push WORKSPACE_ID ./repo --dest src pyro workspace exec WORKSPACE_ID -- sh -lc 'printf "hello\\n" > note.txt' + pyro workspace diff WORKSPACE_ID + pyro workspace export WORKSPACE_ID src/note.txt --output ./note.txt pyro workspace shell open WORKSPACE_ID pyro workspace logs WORKSPACE_ID """ @@ -530,6 +569,7 @@ def _build_parser() -> argparse.ArgumentParser: pyro workspace create debian:12 pyro workspace create debian:12 --seed-path ./repo pyro workspace sync push WORKSPACE_ID ./changes + pyro workspace diff WORKSPACE_ID """ ), formatter_class=_HelpFormatter, @@ -667,6 +707,57 @@ def _build_parser() -> argparse.ArgumentParser: action="store_true", help="Print structured JSON instead of human-readable output.", ) + workspace_export_parser = workspace_subparsers.add_parser( + "export", + help="Export one workspace path to the host.", + description="Export one file or directory from `/workspace` to an explicit host path.", + epilog="Example:\n pyro workspace export WORKSPACE_ID src/note.txt --output ./note.txt", + formatter_class=_HelpFormatter, + ) + workspace_export_parser.add_argument( + "workspace_id", + metavar="WORKSPACE_ID", + help="Persistent workspace identifier.", + ) + workspace_export_parser.add_argument( + "path", + metavar="PATH", + help="Workspace path to export. Relative values resolve inside `/workspace`.", + ) + workspace_export_parser.add_argument( + "--output", + required=True, + help="Exact host path to create for the exported file or directory.", + ) + workspace_export_parser.add_argument( + "--json", + action="store_true", + help="Print structured JSON instead of human-readable output.", + ) + workspace_diff_parser = workspace_subparsers.add_parser( + "diff", + help="Diff `/workspace` against the create-time baseline.", + description="Compare the current `/workspace` tree to the immutable workspace baseline.", + epilog=dedent( + """ + Example: + pyro workspace diff WORKSPACE_ID + + Use `workspace export` to copy a changed file or directory back to the host. + """ + ), + formatter_class=_HelpFormatter, + ) + workspace_diff_parser.add_argument( + "workspace_id", + metavar="WORKSPACE_ID", + help="Persistent workspace identifier.", + ) + workspace_diff_parser.add_argument( + "--json", + action="store_true", + help="Print structured JSON instead of human-readable output.", + ) workspace_shell_parser = workspace_subparsers.add_parser( "shell", help="Open and manage persistent interactive shells.", @@ -1148,6 +1239,46 @@ def main() -> None: raise SystemExit(1) from exc _print_workspace_sync_human(payload) return + if args.workspace_command == "export": + if bool(args.json): + try: + payload = pyro.export_workspace( + args.workspace_id, + args.path, + output_path=args.output, + ) + except Exception as exc: # noqa: BLE001 + _print_json({"ok": False, "error": str(exc)}) + raise SystemExit(1) from exc + _print_json(payload) + else: + try: + payload = pyro.export_workspace( + args.workspace_id, + args.path, + output_path=args.output, + ) + except Exception as exc: # noqa: BLE001 + print(f"[error] {exc}", file=sys.stderr, flush=True) + raise SystemExit(1) from exc + _print_workspace_export_human(payload) + return + if args.workspace_command == "diff": + if bool(args.json): + try: + payload = pyro.diff_workspace(args.workspace_id) + except Exception as exc: # noqa: BLE001 + _print_json({"ok": False, "error": str(exc)}) + raise SystemExit(1) from exc + _print_json(payload) + else: + try: + payload = pyro.diff_workspace(args.workspace_id) + except Exception as exc: # noqa: BLE001 + print(f"[error] {exc}", file=sys.stderr, flush=True) + raise SystemExit(1) from exc + _print_workspace_diff_human(payload) + return if args.workspace_command == "shell": if args.workspace_shell_command == "open": try: diff --git a/src/pyro_mcp/contract.py b/src/pyro_mcp/contract.py index ae2a05c..0d68527 100644 --- a/src/pyro_mcp/contract.py +++ b/src/pyro_mcp/contract.py @@ -8,7 +8,9 @@ PUBLIC_CLI_ENV_SUBCOMMANDS = ("inspect", "list", "pull", "prune") PUBLIC_CLI_WORKSPACE_SUBCOMMANDS = ( "create", "delete", + "diff", "exec", + "export", "logs", "shell", "status", @@ -25,6 +27,8 @@ PUBLIC_CLI_WORKSPACE_CREATE_FLAGS = ( "--seed-path", "--json", ) +PUBLIC_CLI_WORKSPACE_DIFF_FLAGS = ("--json",) +PUBLIC_CLI_WORKSPACE_EXPORT_FLAGS = ("--output", "--json") PUBLIC_CLI_WORKSPACE_SHELL_OPEN_FLAGS = ("--cwd", "--cols", "--rows", "--json") PUBLIC_CLI_WORKSPACE_SHELL_READ_FLAGS = ("--cursor", "--max-chars", "--json") PUBLIC_CLI_WORKSPACE_SHELL_WRITE_FLAGS = ("--input", "--no-newline", "--json") @@ -48,8 +52,10 @@ PUBLIC_SDK_METHODS = ( "create_workspace", "delete_vm", "delete_workspace", + "diff_workspace", "exec_vm", "exec_workspace", + "export_workspace", "inspect_environment", "list_environments", "logs_workspace", @@ -87,7 +93,9 @@ PUBLIC_MCP_TOOLS = ( "vm_stop", "workspace_create", "workspace_delete", + "workspace_diff", "workspace_exec", + "workspace_export", "workspace_logs", "workspace_status", "workspace_sync_push", diff --git a/src/pyro_mcp/runtime_bundle/linux-x86_64/guest/pyro_guest_agent.py b/src/pyro_mcp/runtime_bundle/linux-x86_64/guest/pyro_guest_agent.py index 03ff492..91a9103 100755 --- a/src/pyro_mcp/runtime_bundle/linux-x86_64/guest/pyro_guest_agent.py +++ b/src/pyro_mcp/runtime_bundle/linux-x86_64/guest/pyro_guest_agent.py @@ -14,6 +14,7 @@ import socket import struct import subprocess import tarfile +import tempfile import termios import threading import time @@ -180,6 +181,54 @@ def _extract_archive(payload: bytes, destination: str) -> dict[str, Any]: } +def _inspect_archive(archive_path: Path) -> tuple[int, int]: + entry_count = 0 + bytes_written = 0 + with tarfile.open(archive_path, "r:*") as archive: + for member in archive.getmembers(): + entry_count += 1 + if member.isfile(): + bytes_written += member.size + return entry_count, bytes_written + + +def _prepare_export_archive(path: str) -> dict[str, Any]: + normalized_path, source_path = _normalize_destination(path) + if not source_path.exists() and not source_path.is_symlink(): + raise RuntimeError(f"workspace path does not exist: {normalized_path}") + if source_path.is_symlink(): + artifact_type = "symlink" + elif source_path.is_file(): + artifact_type = "file" + elif source_path.is_dir(): + artifact_type = "directory" + else: + raise RuntimeError(f"unsupported workspace path type: {normalized_path}") + + with tempfile.NamedTemporaryFile(prefix="pyro-export-", suffix=".tar", delete=False) as handle: + archive_path = Path(handle.name) + try: + with tarfile.open(archive_path, "w") as archive: + archive.dereference = False + if artifact_type == "directory": + for child in sorted(source_path.iterdir(), key=lambda item: item.name): + archive.add(child, arcname=child.name, recursive=True) + else: + archive.add(source_path, arcname=source_path.name, recursive=False) + entry_count, bytes_written = _inspect_archive(archive_path) + return { + "workspace_path": str(normalized_path), + "artifact_type": artifact_type, + "archive_path": archive_path, + "archive_size": archive_path.stat().st_size, + "entry_count": entry_count, + "bytes_written": bytes_written, + } + except Exception: + archive_path.unlink(missing_ok=True) + raise + + def _run_command(command: str, timeout_seconds: int) -> dict[str, Any]: started = time.monotonic() try: @@ -533,6 +582,26 @@ def main() -> None: with conn: try: request = _read_request(conn) + if str(request.get("action", "")) == "export_archive": + export = _prepare_export_archive(str(request.get("path", "/workspace"))) + try: + header = { + "workspace_path": export["workspace_path"], + "artifact_type": export["artifact_type"], + "archive_size": export["archive_size"], + "entry_count": export["entry_count"], + "bytes_written": export["bytes_written"], + } + conn.sendall((json.dumps(header) + "\n").encode("utf-8")) + with Path(str(export["archive_path"])).open("rb") as handle: + while True: + chunk = handle.read(BUFFER_SIZE) + if chunk == b"": + break + conn.sendall(chunk) + finally: + Path(str(export["archive_path"])).unlink(missing_ok=True) + continue response = _dispatch(request, conn) except Exception as exc: # noqa: BLE001 response = {"error": str(exc)} diff --git a/src/pyro_mcp/runtime_bundle/linux-x86_64/manifest.json b/src/pyro_mcp/runtime_bundle/linux-x86_64/manifest.json index 52d1a5d..d58fbbf 100644 --- a/src/pyro_mcp/runtime_bundle/linux-x86_64/manifest.json +++ b/src/pyro_mcp/runtime_bundle/linux-x86_64/manifest.json @@ -25,7 +25,7 @@ "guest": { "agent": { "path": "guest/pyro_guest_agent.py", - "sha256": "07adf6269551447dbea8c236f91499ea1479212a3f084c5402a656f5f5cc5892" + "sha256": "4118589ccd8f4ac8200d9cedf25d13ff515d77c28094bbbdb208310247688b40" } }, "platform": "linux-x86_64", diff --git a/src/pyro_mcp/vm_environments.py b/src/pyro_mcp/vm_environments.py index 3e7eddf..8f07ca6 100644 --- a/src/pyro_mcp/vm_environments.py +++ b/src/pyro_mcp/vm_environments.py @@ -19,7 +19,7 @@ from typing import Any from pyro_mcp.runtime import DEFAULT_PLATFORM, RuntimePaths DEFAULT_ENVIRONMENT_VERSION = "1.0.0" -DEFAULT_CATALOG_VERSION = "2.5.0" +DEFAULT_CATALOG_VERSION = "2.6.0" OCI_MANIFEST_ACCEPT = ", ".join( ( "application/vnd.oci.image.index.v1+json", diff --git a/src/pyro_mcp/vm_guest.py b/src/pyro_mcp/vm_guest.py index 1988269..c9a8db8 100644 --- a/src/pyro_mcp/vm_guest.py +++ b/src/pyro_mcp/vm_guest.py @@ -39,6 +39,14 @@ class GuestArchiveResponse: bytes_written: int +@dataclass(frozen=True) +class GuestArchiveExportResponse: + workspace_path: str + artifact_type: str + entry_count: int + bytes_written: int + + @dataclass(frozen=True) class GuestShellSummary: shell_id: str @@ -128,6 +136,52 @@ class VsockExecClient: bytes_written=int(payload.get("bytes_written", 0)), ) + def export_archive( + self, + guest_cid: int, + port: int, + *, + workspace_path: str, + archive_path: Path, + timeout_seconds: int = 60, + uds_path: str | None = None, + ) -> GuestArchiveExportResponse: + request = { + "action": "export_archive", + "path": workspace_path, + } + sock = self._connect(guest_cid, port, timeout_seconds, uds_path=uds_path) + try: + sock.sendall((json.dumps(request) + "\n").encode("utf-8")) + header = self._recv_line(sock) + if header.strip() == "": + raise RuntimeError("guest export response header is empty") + payload = json.loads(header) + if not isinstance(payload, dict): + raise RuntimeError("guest export response header must be a JSON object") + error = payload.get("error") + if error is not None: + raise RuntimeError(str(error)) + archive_size = int(payload.get("archive_size", 0)) + if archive_size < 0: + raise RuntimeError("guest export archive_size must not be negative") + with archive_path.open("wb") as handle: + remaining = archive_size + while remaining > 0: + chunk = sock.recv(min(65536, remaining)) + if chunk == b"": + raise RuntimeError("unexpected EOF while receiving export archive") + handle.write(chunk) + remaining -= len(chunk) + finally: + sock.close() + return GuestArchiveExportResponse( + workspace_path=str(payload.get("workspace_path", workspace_path)), + artifact_type=str(payload.get("artifact_type", "file")), + entry_count=int(payload.get("entry_count", 0)), + bytes_written=int(payload.get("bytes_written", 0)), + ) + def open_shell( self, guest_cid: int, diff --git a/src/pyro_mcp/vm_manager.py b/src/pyro_mcp/vm_manager.py index ad221dc..225ab2b 100644 --- a/src/pyro_mcp/vm_manager.py +++ b/src/pyro_mcp/vm_manager.py @@ -2,6 +2,7 @@ from __future__ import annotations +import difflib import json import os import shlex @@ -43,7 +44,9 @@ DEFAULT_TIMEOUT_SECONDS = 30 DEFAULT_TTL_SECONDS = 600 DEFAULT_ALLOW_HOST_COMPAT = False -WORKSPACE_LAYOUT_VERSION = 3 +WORKSPACE_LAYOUT_VERSION = 4 +WORKSPACE_BASELINE_DIRNAME = "baseline" +WORKSPACE_BASELINE_ARCHIVE_NAME = "workspace.tar" WORKSPACE_DIRNAME = "workspace" WORKSPACE_COMMANDS_DIRNAME = "commands" WORKSPACE_SHELLS_DIRNAME = "shells" @@ -57,6 +60,7 @@ DEFAULT_SHELL_MAX_CHARS = 65536 WORKSPACE_SHELL_SIGNAL_NAMES = shell_signal_names() WorkspaceSeedMode = Literal["empty", "directory", "tar_archive"] +WorkspaceArtifactType = Literal["file", "directory", "symlink"] @dataclass @@ -287,6 +291,24 @@ class VmExecResult: duration_ms: int +@dataclass(frozen=True) +class ExportedWorkspaceArchive: + workspace_path: str + artifact_type: WorkspaceArtifactType + archive_path: Path + entry_count: int + bytes_written: int + + +@dataclass(frozen=True) +class WorkspaceTreeEntry: + path: str + artifact_type: WorkspaceArtifactType + disk_path: Path + size_bytes: int = 0 + link_target: str | None = None + + def _optional_int(value: object) -> int | None: if value is None: return None @@ -522,6 +544,66 @@ def _write_directory_seed_archive(source_dir: Path, archive_path: Path) -> None: archive.add(child, arcname=child.name, recursive=True) +def _write_empty_seed_archive(archive_path: Path) -> None: + archive_path.parent.mkdir(parents=True, exist_ok=True) + with tarfile.open(archive_path, "w"): + pass + + +def _persist_workspace_baseline( + prepared_seed: PreparedWorkspaceSeed, + *, + baseline_archive_path: Path, +) -> None: + baseline_archive_path.parent.mkdir(parents=True, exist_ok=True) + if prepared_seed.archive_path is None: + _write_empty_seed_archive(baseline_archive_path) + return + shutil.copy2(prepared_seed.archive_path, baseline_archive_path) + + +def _write_workspace_export_archive( + source_path: Path, + *, + archive_path: Path, +) -> WorkspaceArtifactType: + archive_path.parent.mkdir(parents=True, exist_ok=True) + if source_path.is_symlink(): + artifact_type: WorkspaceArtifactType = "symlink" + elif source_path.is_file(): + artifact_type = "file" + elif source_path.is_dir(): + artifact_type = "directory" + else: + raise RuntimeError(f"unsupported workspace path type: {source_path}") + + def validate_source(current_path: Path, relative_path: PurePosixPath) -> None: + if current_path.is_symlink(): + _validate_archive_symlink_target(relative_path, os.readlink(current_path)) + return + if current_path.is_file(): + return + if current_path.is_dir(): + for child in sorted(current_path.iterdir(), key=lambda item: item.name): + validate_source(child, relative_path / child.name) + return + raise RuntimeError(f"unsupported workspace path type: {current_path}") + + if artifact_type == "directory": + for child in sorted(source_path.iterdir(), key=lambda item: item.name): + validate_source(child, PurePosixPath(child.name)) + else: + validate_source(source_path, PurePosixPath(source_path.name)) + with tarfile.open(archive_path, "w") as archive: + archive.dereference = False + if artifact_type == "directory": + for child in sorted(source_path.iterdir(), key=lambda item: item.name): + archive.add(child, arcname=child.name, recursive=True) + else: + archive.add(source_path, arcname=source_path.name, recursive=False) + return artifact_type + + def _extract_seed_archive_to_host_workspace( archive_path: Path, *, @@ -576,6 +658,120 @@ def _extract_seed_archive_to_host_workspace( } +def _prepare_workspace_export_archive( + *, + workspace_dir: Path, + workspace_path: str, + archive_path: Path, +) -> ExportedWorkspaceArchive: + normalized_workspace_path, _ = _normalize_workspace_destination(workspace_path) + source_path = _workspace_host_destination(workspace_dir, normalized_workspace_path) + if not source_path.exists() and not source_path.is_symlink(): + raise RuntimeError(f"workspace path does not exist: {normalized_workspace_path}") + artifact_type = _write_workspace_export_archive(source_path, archive_path=archive_path) + entry_count, bytes_written = _inspect_seed_archive(archive_path) + return ExportedWorkspaceArchive( + workspace_path=normalized_workspace_path, + artifact_type=artifact_type, + archive_path=archive_path, + entry_count=entry_count, + bytes_written=bytes_written, + ) + + +def _extract_workspace_export_archive( + archive_path: Path, + *, + output_path: Path, + artifact_type: WorkspaceArtifactType, +) -> dict[str, Any]: + output_path.parent.mkdir(parents=True, exist_ok=True) + if output_path.exists() or output_path.is_symlink(): + raise RuntimeError(f"output_path already exists: {output_path}") + + entry_count = 0 + bytes_written = 0 + if artifact_type == "directory": + output_path.mkdir(parents=True, exist_ok=False) + with tarfile.open(archive_path, "r:*") as archive: + for member in archive.getmembers(): + member_name = _normalize_archive_member_name(member.name) + target_path = output_path.joinpath(*member_name.parts) + entry_count += 1 + _ensure_no_symlink_parents(output_path, target_path, member.name) + if member.isdir(): + if target_path.is_symlink() or ( + target_path.exists() and not target_path.is_dir() + ): + raise RuntimeError(f"directory conflicts with existing path: {member.name}") + target_path.mkdir(parents=True, exist_ok=True) + continue + if member.isfile(): + target_path.parent.mkdir(parents=True, exist_ok=True) + if target_path.is_symlink() or target_path.is_dir(): + raise RuntimeError(f"file conflicts with existing path: {member.name}") + source = archive.extractfile(member) + if source is None: + raise RuntimeError(f"failed to read archive member: {member.name}") + with target_path.open("wb") as handle: + shutil.copyfileobj(source, handle) + bytes_written += member.size + continue + if member.issym(): + _validate_archive_symlink_target(member_name, member.linkname) + target_path.parent.mkdir(parents=True, exist_ok=True) + if target_path.exists() and not target_path.is_symlink(): + raise RuntimeError(f"symlink conflicts with existing path: {member.name}") + if target_path.is_symlink(): + target_path.unlink() + os.symlink(member.linkname, target_path) + continue + if member.islnk(): + raise RuntimeError( + f"hard links are not allowed in workspace archives: {member.name}" + ) + raise RuntimeError(f"unsupported archive member type: {member.name}") + return { + "output_path": str(output_path), + "artifact_type": artifact_type, + "entry_count": entry_count, + "bytes_written": bytes_written, + } + + with tarfile.open(archive_path, "r:*") as archive: + members = archive.getmembers() + if len(members) != 1: + raise RuntimeError( + "expected exactly one archive member for " + f"{artifact_type} export, got {len(members)}" + ) + member = members[0] + _normalize_archive_member_name(member.name) + entry_count = 1 + if artifact_type == "file": + if not member.isfile(): + raise RuntimeError("exported archive did not contain a regular file") + source = archive.extractfile(member) + if source is None: + raise RuntimeError(f"failed to read archive member: {member.name}") + with output_path.open("wb") as handle: + shutil.copyfileobj(source, handle) + bytes_written = member.size + elif artifact_type == "symlink": + if not member.issym(): + raise RuntimeError("exported archive did not contain a symlink") + _validate_archive_symlink_target(PurePosixPath(member.name), member.linkname) + os.symlink(member.linkname, output_path) + else: + raise RuntimeError(f"unsupported artifact type: {artifact_type}") + return { + "output_path": str(output_path), + "artifact_type": artifact_type, + "entry_count": entry_count, + "bytes_written": bytes_written, + } + + def _instance_workspace_host_dir(instance: VmInstance) -> Path: raw_value = instance.metadata.get("workspace_host_dir") if raw_value is None or raw_value == "": @@ -640,6 +836,205 @@ def _pid_is_running(pid: int | None) -> bool: return True +def _collect_workspace_tree(root: Path) -> dict[str, WorkspaceTreeEntry]: + entries: dict[str, WorkspaceTreeEntry] = {} + + def walk(current: Path, relative_parts: tuple[str, ...] = ()) -> bool: + has_entries = False + for child in sorted(current.iterdir(), key=lambda item: item.name): + child_relative_parts = relative_parts + (child.name,) + relative_path = "/".join(child_relative_parts) + if child.is_symlink(): + entries[relative_path] = WorkspaceTreeEntry( + path=relative_path, + artifact_type="symlink", + disk_path=child, + link_target=os.readlink(child), + ) + has_entries = True + continue + if child.is_file(): + entries[relative_path] = WorkspaceTreeEntry( + path=relative_path, + artifact_type="file", + disk_path=child, + size_bytes=child.stat().st_size, + ) + has_entries = True + continue + if child.is_dir(): + child_has_entries = walk(child, child_relative_parts) + if not child_has_entries: + entries[relative_path] = WorkspaceTreeEntry( + path=relative_path, + artifact_type="directory", + disk_path=child, + ) + has_entries = True + else: + has_entries = True + continue + raise RuntimeError(f"unsupported workspace artifact type: {child}") + return has_entries + + walk(root) + return entries + + +def _is_probably_text(data: bytes) -> bool: + if b"\x00" in data: + return False + try: + data.decode("utf-8") + except UnicodeDecodeError: + return False + return True + + +def _build_text_patch( + *, + path: str, + before_text: str, + after_text: str, + status: str, +) -> str: + if status == "added": + fromfile = "/dev/null" + tofile = f"b/{path}" + elif status == "deleted": + fromfile = f"a/{path}" + tofile = "/dev/null" + else: + fromfile = f"a/{path}" + tofile = f"b/{path}" + lines = list( + difflib.unified_diff( + before_text.splitlines(keepends=True), + after_text.splitlines(keepends=True), + fromfile=fromfile, + tofile=tofile, + n=3, + ) + ) + if not lines: + return "" + return "".join(lines) + + +def _diff_workspace_trees( + baseline_root: Path, + current_root: Path, +) -> dict[str, Any]: + baseline_entries = _collect_workspace_tree(baseline_root) + current_entries = _collect_workspace_tree(current_root) + changed_entries: list[dict[str, Any]] = [] + patch_parts: list[str] = [] + summary = { + "total": 0, + "added": 0, + "modified": 0, + "deleted": 0, + "type_changed": 0, + "text_patched": 0, + "non_text": 0, + } + + for path in sorted(set(baseline_entries) | set(current_entries)): + baseline_entry = baseline_entries.get(path) + current_entry = current_entries.get(path) + entry_payload: dict[str, Any] | None = None + text_patch = "" + + if baseline_entry is None and current_entry is not None: + entry_payload = { + "path": path, + "status": "added", + "artifact_type": current_entry.artifact_type, + "text_patch": None, + } + if current_entry.artifact_type == "file": + current_bytes = current_entry.disk_path.read_bytes() + if _is_probably_text(current_bytes): + text_patch = _build_text_patch( + path=path, + before_text="", + after_text=current_bytes.decode("utf-8"), + status="added", + ) + elif current_entry is None and baseline_entry is not None: + entry_payload = { + "path": path, + "status": "deleted", + "artifact_type": baseline_entry.artifact_type, + "text_patch": None, + } + if baseline_entry.artifact_type == "file": + baseline_bytes = baseline_entry.disk_path.read_bytes() + if _is_probably_text(baseline_bytes): + text_patch = _build_text_patch( + path=path, + before_text=baseline_bytes.decode("utf-8"), + after_text="", + status="deleted", + ) + elif baseline_entry is not None and current_entry is not None: + if baseline_entry.artifact_type != current_entry.artifact_type: + entry_payload = { + "path": path, + "status": "type_changed", + "artifact_type": current_entry.artifact_type, + "text_patch": None, + } + elif current_entry.artifact_type == "directory": + continue + elif current_entry.artifact_type == "symlink": + if baseline_entry.link_target != current_entry.link_target: + entry_payload = { + "path": path, + "status": "modified", + "artifact_type": current_entry.artifact_type, + "text_patch": None, + } + else: + baseline_bytes = baseline_entry.disk_path.read_bytes() + current_bytes = current_entry.disk_path.read_bytes() + if baseline_bytes == current_bytes: + continue + entry_payload = { + "path": path, + "status": "modified", + "artifact_type": current_entry.artifact_type, + "text_patch": None, + } + if _is_probably_text(baseline_bytes) and _is_probably_text(current_bytes): + text_patch = _build_text_patch( + path=path, + before_text=baseline_bytes.decode("utf-8"), + after_text=current_bytes.decode("utf-8"), + status="modified", + ) + + if entry_payload is None: + continue + + summary["total"] += 1 + summary[str(entry_payload["status"])] += 1 + if text_patch != "": + entry_payload["text_patch"] = text_patch + patch_parts.append(text_patch) + summary["text_patched"] += 1 + else: + summary["non_text"] += 1 + changed_entries.append(entry_payload) + + return { + "changed": bool(changed_entries), + "summary": summary, + "entries": changed_entries, + "patch": "".join(patch_parts), + } + + class VmBackend: """Backend interface for lifecycle operations.""" @@ -674,6 +1069,15 @@ class VmBackend: ) -> dict[str, Any]: raise NotImplementedError + def export_archive( # pragma: no cover + self, + instance: VmInstance, + *, + workspace_path: str, + archive_path: Path, + ) -> dict[str, Any]: + raise NotImplementedError + def open_shell( # pragma: no cover self, instance: VmInstance, @@ -768,6 +1172,26 @@ class MockBackend(VmBackend): destination=destination, ) + def export_archive( + self, + instance: VmInstance, + *, + workspace_path: str, + archive_path: Path, + ) -> dict[str, Any]: + exported = _prepare_workspace_export_archive( + workspace_dir=_instance_workspace_host_dir(instance), + workspace_path=workspace_path, + archive_path=archive_path, + ) + return { + "workspace_path": exported.workspace_path, + "artifact_type": exported.artifact_type, + "entry_count": exported.entry_count, + "bytes_written": exported.bytes_written, + "execution_mode": "host_compat", + } + def open_shell( self, instance: VmInstance, @@ -1086,6 +1510,55 @@ class FirecrackerBackend(VmBackend): # pragma: no cover destination=destination, ) + def export_archive( + self, + instance: VmInstance, + *, + workspace_path: str, + archive_path: Path, + ) -> dict[str, Any]: + if self._runtime_capabilities.supports_guest_exec: + guest_cid = int(instance.metadata["guest_cid"]) + port = int(instance.metadata["guest_exec_port"]) + uds_path = instance.metadata.get("guest_exec_uds_path") + deadline = time.monotonic() + 10 + while True: + try: + response = self._guest_exec_client.export_archive( + guest_cid, + port, + workspace_path=workspace_path, + archive_path=archive_path, + timeout_seconds=WORKSPACE_ARCHIVE_UPLOAD_TIMEOUT_SECONDS, + uds_path=uds_path, + ) + return { + "workspace_path": response.workspace_path, + "artifact_type": response.artifact_type, + "entry_count": response.entry_count, + "bytes_written": response.bytes_written, + "execution_mode": instance.metadata.get("execution_mode", "pending"), + } + except (OSError, RuntimeError) as exc: + if time.monotonic() >= deadline: + raise RuntimeError( + f"guest export transport did not become ready: {exc}" + ) from exc + time.sleep(0.2) + instance.metadata["execution_mode"] = "host_compat" + exported = _prepare_workspace_export_archive( + workspace_dir=_instance_workspace_host_dir(instance), + workspace_path=workspace_path, + archive_path=archive_path, + ) + return { + "workspace_path": exported.workspace_path, + "artifact_type": exported.artifact_type, + "entry_count": exported.entry_count, + "bytes_written": exported.bytes_written, + "execution_mode": "host_compat", + } + def open_shell( self, instance: VmInstance, @@ -1488,10 +1961,15 @@ class VmManager: host_workspace_dir = self._workspace_host_dir(workspace_id) commands_dir = self._workspace_commands_dir(workspace_id) shells_dir = self._workspace_shells_dir(workspace_id) + baseline_archive_path = self._workspace_baseline_archive_path(workspace_id) workspace_dir.mkdir(parents=True, exist_ok=False) host_workspace_dir.mkdir(parents=True, exist_ok=True) commands_dir.mkdir(parents=True, exist_ok=True) shells_dir.mkdir(parents=True, exist_ok=True) + _persist_workspace_baseline( + prepared_seed, + baseline_archive_path=baseline_archive_path, + ) instance = VmInstance( vm_id=workspace_id, environment=environment, @@ -1523,23 +2001,14 @@ class VmManager: self._start_instance_locked(instance) self._require_guest_exec_or_opt_in(instance) workspace_seed = prepared_seed.to_payload() - if prepared_seed.archive_path is not None: - import_summary = self._backend.import_archive( - instance, - archive_path=prepared_seed.archive_path, - destination=WORKSPACE_GUEST_PATH, - ) - workspace_seed["entry_count"] = int(import_summary["entry_count"]) - workspace_seed["bytes_written"] = int(import_summary["bytes_written"]) - workspace_seed["destination"] = str(import_summary["destination"]) - elif self._runtime_capabilities.supports_guest_exec: - self._backend.exec( - instance, - f"mkdir -p {shlex.quote(WORKSPACE_GUEST_PATH)}", - 10, - ) - else: - instance.metadata["execution_mode"] = "host_compat" + import_summary = self._backend.import_archive( + instance, + archive_path=baseline_archive_path, + destination=WORKSPACE_GUEST_PATH, + ) + workspace_seed["entry_count"] = int(import_summary["entry_count"]) + workspace_seed["bytes_written"] = int(import_summary["bytes_written"]) + workspace_seed["destination"] = str(import_summary["destination"]) workspace = WorkspaceRecord.from_instance(instance, workspace_seed=workspace_seed) self._save_workspace_locked(workspace) return self._serialize_workspace(workspace) @@ -1612,6 +2081,98 @@ class VmManager: "workspace_sync": workspace_sync, } + def export_workspace( + self, + workspace_id: str, + *, + path: str, + output_path: str | Path, + ) -> dict[str, Any]: + normalized_path, _ = _normalize_workspace_destination(path) + raw_output_path = str(output_path).strip() + if raw_output_path == "": + raise ValueError("output_path must not be empty") + resolved_output_path = Path(output_path).expanduser().resolve() + with self._lock: + workspace = self._load_workspace_locked(workspace_id) + instance = self._workspace_instance_for_live_operation_locked( + workspace, + operation_name="workspace_export", + ) + with tempfile.TemporaryDirectory(prefix="pyro-workspace-export-") as temp_dir: + archive_path = Path(temp_dir) / "workspace-export.tar" + exported = self._backend.export_archive( + instance, + workspace_path=normalized_path, + archive_path=archive_path, + ) + extracted = _extract_workspace_export_archive( + archive_path, + output_path=resolved_output_path, + artifact_type=cast(WorkspaceArtifactType, str(exported["artifact_type"])), + ) + with self._lock: + workspace = self._load_workspace_locked(workspace_id) + workspace.state = instance.state + workspace.firecracker_pid = instance.firecracker_pid + workspace.last_error = instance.last_error + workspace.metadata = dict(instance.metadata) + self._save_workspace_locked(workspace) + return { + "workspace_id": workspace_id, + "workspace_path": normalized_path, + "output_path": str(Path(str(extracted["output_path"]))), + "artifact_type": extracted["artifact_type"], + "entry_count": int(extracted["entry_count"]), + "bytes_written": int(extracted["bytes_written"]), + "execution_mode": str( + exported.get("execution_mode", instance.metadata.get("execution_mode", "pending")) + ), + } + + def diff_workspace(self, workspace_id: str) -> dict[str, Any]: + with self._lock: + workspace = self._load_workspace_locked(workspace_id) + instance = self._workspace_instance_for_live_operation_locked( + workspace, + operation_name="workspace_diff", + ) + baseline_archive_path = self._workspace_baseline_archive_path(workspace_id) + if not baseline_archive_path.exists(): + raise RuntimeError( + "workspace diff requires a baseline snapshot. Recreate the workspace to use diff." + ) + with tempfile.TemporaryDirectory(prefix="pyro-workspace-diff-") as temp_dir: + temp_root = Path(temp_dir) + current_archive_path = temp_root / "current.tar" + baseline_root = temp_root / "baseline" + current_root = temp_root / "current" + self._backend.export_archive( + instance, + workspace_path=WORKSPACE_GUEST_PATH, + archive_path=current_archive_path, + ) + _extract_seed_archive_to_host_workspace( + baseline_archive_path, + workspace_dir=baseline_root, + destination=WORKSPACE_GUEST_PATH, + ) + _extract_seed_archive_to_host_workspace( + current_archive_path, + workspace_dir=current_root, + destination=WORKSPACE_GUEST_PATH, + ) + diff_payload = _diff_workspace_trees(baseline_root, current_root) + with self._lock: + workspace = self._load_workspace_locked(workspace_id) + workspace.state = instance.state + workspace.firecracker_pid = instance.firecracker_pid + workspace.last_error = instance.last_error + workspace.metadata = dict(instance.metadata) + self._save_workspace_locked(workspace) + diff_payload["workspace_id"] = workspace_id + return diff_payload + def exec_workspace( self, workspace_id: str, @@ -2136,6 +2697,12 @@ class VmManager: def _workspace_host_dir(self, workspace_id: str) -> Path: return self._workspace_dir(workspace_id) / WORKSPACE_DIRNAME + def _workspace_baseline_dir(self, workspace_id: str) -> Path: + return self._workspace_dir(workspace_id) / WORKSPACE_BASELINE_DIRNAME + + def _workspace_baseline_archive_path(self, workspace_id: str) -> Path: + return self._workspace_baseline_dir(workspace_id) / WORKSPACE_BASELINE_ARCHIVE_NAME + def _workspace_commands_dir(self, workspace_id: str) -> Path: return self._workspace_dir(workspace_id) / WORKSPACE_COMMANDS_DIRNAME @@ -2278,17 +2845,29 @@ class VmManager: return entries def _workspace_instance_for_live_shell_locked(self, workspace: WorkspaceRecord) -> VmInstance: + instance = self._workspace_instance_for_live_operation_locked( + workspace, + operation_name="shell operations", + ) + self._require_workspace_shell_support(instance) + return instance + + def _workspace_instance_for_live_operation_locked( + self, + workspace: WorkspaceRecord, + *, + operation_name: str, + ) -> VmInstance: self._ensure_workspace_not_expired_locked(workspace, time.time()) self._refresh_workspace_liveness_locked(workspace) if workspace.state != "started": raise RuntimeError( "workspace " - f"{workspace.workspace_id} must be in 'started' state before shell operations" + f"{workspace.workspace_id} must be in 'started' state before {operation_name}" ) instance = workspace.to_instance( workdir=self._workspace_runtime_dir(workspace.workspace_id) ) - self._require_workspace_shell_support(instance) return instance def _workspace_shell_record_from_payload( diff --git a/tests/test_api.py b/tests/test_api.py index cba5d1f..0a0e847 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -50,7 +50,9 @@ def test_pyro_create_server_registers_vm_run(tmp_path: Path) -> None: assert "vm_run" in tool_names assert "vm_create" in tool_names assert "workspace_create" in tool_names + assert "workspace_diff" in tool_names assert "workspace_sync_push" in tool_names + assert "workspace_export" in tool_names assert "shell_open" in tool_names assert "shell_read" in tool_names assert "shell_write" in tool_names @@ -136,6 +138,9 @@ def test_pyro_workspace_methods_delegate_to_manager(tmp_path: Path) -> None: (updated_dir / "more.txt").write_text("more\n", encoding="utf-8") synced = pyro.push_workspace_sync(workspace_id, updated_dir, dest="subdir") executed = pyro.exec_workspace(workspace_id, command="cat note.txt") + diff_payload = pyro.diff_workspace(workspace_id) + export_path = tmp_path / "exported-note.txt" + exported = pyro.export_workspace(workspace_id, "note.txt", output_path=export_path) opened = pyro.open_shell(workspace_id) shell_id = str(opened["shell_id"]) written = pyro.write_shell(workspace_id, shell_id, input="pwd") @@ -154,6 +159,9 @@ def test_pyro_workspace_methods_delegate_to_manager(tmp_path: Path) -> None: assert created["workspace_seed"]["mode"] == "directory" assert synced["workspace_sync"]["destination"] == "/workspace/subdir" assert written["input_length"] == 3 + assert diff_payload["changed"] is True + assert exported["output_path"] == str(export_path) + assert export_path.read_text(encoding="utf-8") == "ok\n" assert "/workspace" in read["output"] assert signaled["signal"] == "INT" assert closed["closed"] is True diff --git a/tests/test_cli.py b/tests/test_cli.py index da03832..a34babe 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -64,6 +64,8 @@ def test_cli_subcommand_help_includes_examples_and_guidance() -> None: assert "pyro workspace create debian:12 --seed-path ./repo" in workspace_help assert "pyro workspace sync push WORKSPACE_ID ./repo --dest src" in workspace_help assert "pyro workspace exec WORKSPACE_ID" in workspace_help + assert "pyro workspace diff WORKSPACE_ID" in workspace_help + assert "pyro workspace export WORKSPACE_ID src/note.txt --output ./note.txt" in workspace_help assert "pyro workspace shell open WORKSPACE_ID" in workspace_help workspace_create_help = _subparser_choice( @@ -93,6 +95,18 @@ def test_cli_subcommand_help_includes_examples_and_guidance() -> None: assert "--dest" in workspace_sync_push_help assert "Import host content into `/workspace`" in workspace_sync_push_help + workspace_export_help = _subparser_choice( + _subparser_choice(parser, "workspace"), "export" + ).format_help() + assert "--output" in workspace_export_help + assert "Export one file or directory from `/workspace`" in workspace_export_help + + workspace_diff_help = _subparser_choice( + _subparser_choice(parser, "workspace"), "diff" + ).format_help() + assert "immutable workspace baseline" in workspace_diff_help + assert "workspace export" in workspace_diff_help + workspace_shell_help = _subparser_choice( _subparser_choice(parser, "workspace"), "shell", @@ -522,6 +536,100 @@ def test_cli_workspace_exec_prints_human_output( ) +def test_cli_workspace_export_prints_human_output( + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + class StubPyro: + def export_workspace( + self, + workspace_id: str, + path: str, + *, + output_path: str, + ) -> dict[str, Any]: + assert workspace_id == "workspace-123" + assert path == "note.txt" + assert output_path == "./note.txt" + return { + "workspace_id": workspace_id, + "workspace_path": "/workspace/note.txt", + "output_path": "/tmp/note.txt", + "artifact_type": "file", + "entry_count": 1, + "bytes_written": 6, + "execution_mode": "guest_vsock", + } + + class StubParser: + def parse_args(self) -> argparse.Namespace: + return argparse.Namespace( + command="workspace", + workspace_command="export", + workspace_id="workspace-123", + path="note.txt", + output="./note.txt", + json=False, + ) + + monkeypatch.setattr(cli, "_build_parser", lambda: StubParser()) + monkeypatch.setattr(cli, "Pyro", StubPyro) + cli.main() + output = capsys.readouterr().out + assert "[workspace-export] workspace_id=workspace-123" in output + assert "artifact_type=file" in output + + +def test_cli_workspace_diff_prints_human_output( + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + class StubPyro: + def diff_workspace(self, workspace_id: str) -> dict[str, Any]: + assert workspace_id == "workspace-123" + return { + "workspace_id": workspace_id, + "changed": True, + "summary": { + "total": 1, + "added": 0, + "modified": 1, + "deleted": 0, + "type_changed": 0, + "text_patched": 1, + "non_text": 0, + }, + "entries": [ + { + "path": "note.txt", + "status": "modified", + "artifact_type": "file", + "text_patch": "--- a/note.txt\n+++ b/note.txt\n", + } + ], + "patch": "--- a/note.txt\n+++ b/note.txt\n", + } + + class StubParser: + def parse_args(self) -> argparse.Namespace: + return argparse.Namespace( + command="workspace", + workspace_command="diff", + workspace_id="workspace-123", + json=False, + ) + + monkeypatch.setattr(cli, "_build_parser", lambda: StubParser()) + monkeypatch.setattr(cli, "Pyro", StubPyro) + cli.main() + output = capsys.readouterr().out + assert ( + "[workspace-diff] workspace_id=workspace-123 total=1 added=0 modified=1" + in output + ) + assert "--- a/note.txt" in output + + def test_cli_workspace_sync_push_prints_json( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] ) -> None: diff --git a/tests/test_public_contract.py b/tests/test_public_contract.py index 5db18e7..f026351 100644 --- a/tests/test_public_contract.py +++ b/tests/test_public_contract.py @@ -18,6 +18,8 @@ from pyro_mcp.contract import ( PUBLIC_CLI_ENV_SUBCOMMANDS, PUBLIC_CLI_RUN_FLAGS, PUBLIC_CLI_WORKSPACE_CREATE_FLAGS, + PUBLIC_CLI_WORKSPACE_DIFF_FLAGS, + PUBLIC_CLI_WORKSPACE_EXPORT_FLAGS, PUBLIC_CLI_WORKSPACE_SHELL_CLOSE_FLAGS, PUBLIC_CLI_WORKSPACE_SHELL_OPEN_FLAGS, PUBLIC_CLI_WORKSPACE_SHELL_READ_FLAGS, @@ -92,6 +94,16 @@ def test_public_cli_help_lists_commands_and_run_flags() -> None: ).format_help() for flag in PUBLIC_CLI_WORKSPACE_SYNC_PUSH_FLAGS: assert flag in workspace_sync_push_help_text + workspace_export_help_text = _subparser_choice( + _subparser_choice(parser, "workspace"), "export" + ).format_help() + for flag in PUBLIC_CLI_WORKSPACE_EXPORT_FLAGS: + assert flag in workspace_export_help_text + workspace_diff_help_text = _subparser_choice( + _subparser_choice(parser, "workspace"), "diff" + ).format_help() + for flag in PUBLIC_CLI_WORKSPACE_DIFF_FLAGS: + assert flag in workspace_diff_help_text workspace_shell_help_text = _subparser_choice( _subparser_choice(parser, "workspace"), "shell", diff --git a/tests/test_server.py b/tests/test_server.py index 5525e8c..0df0197 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -33,6 +33,8 @@ def test_create_server_registers_vm_tools(tmp_path: Path) -> None: assert "vm_run" in tool_names assert "vm_status" in tool_names assert "workspace_create" in tool_names + assert "workspace_diff" in tool_names + assert "workspace_export" in tool_names assert "workspace_logs" in tool_names assert "workspace_sync_push" in tool_names assert "shell_open" in tool_names @@ -201,6 +203,8 @@ def test_workspace_tools_round_trip(tmp_path: Path) -> None: dict[str, Any], dict[str, Any], dict[str, Any], + dict[str, Any], + dict[str, Any], ]: server = create_server(manager=manager) created = _extract_structured( @@ -236,6 +240,20 @@ def test_workspace_tools_round_trip(tmp_path: Path) -> None: }, ) ) + diffed = _extract_structured( + await server.call_tool("workspace_diff", {"workspace_id": workspace_id}) + ) + export_path = tmp_path / "exported-more.txt" + exported = _extract_structured( + await server.call_tool( + "workspace_export", + { + "workspace_id": workspace_id, + "path": "subdir/more.txt", + "output_path": str(export_path), + }, + ) + ) opened = _extract_structured( await server.call_tool("shell_open", {"workspace_id": workspace_id}) ) @@ -296,12 +314,27 @@ def test_workspace_tools_round_trip(tmp_path: Path) -> None: deleted = _extract_structured( await server.call_tool("workspace_delete", {"workspace_id": workspace_id}) ) - return created, synced, executed, opened, written, read, signaled, closed, logs, deleted + return ( + created, + synced, + executed, + diffed, + exported, + opened, + written, + read, + signaled, + closed, + logs, + deleted, + ) ( created, synced, executed, + diffed, + exported, opened, written, read, @@ -314,6 +347,9 @@ def test_workspace_tools_round_trip(tmp_path: Path) -> None: assert created["workspace_seed"]["mode"] == "directory" assert synced["workspace_sync"]["destination"] == "/workspace/subdir" assert executed["stdout"] == "more\n" + assert diffed["changed"] is True + assert exported["artifact_type"] == "file" + assert Path(str(exported["output_path"])).read_text(encoding="utf-8") == "more\n" assert opened["state"] == "running" assert written["input_length"] == 3 assert "/workspace" in read["output"] diff --git a/tests/test_vm_guest.py b/tests/test_vm_guest.py index cc35209..a9310fd 100644 --- a/tests/test_vm_guest.py +++ b/tests/test_vm_guest.py @@ -105,6 +105,54 @@ def test_vsock_exec_client_upload_archive_round_trip( assert stub.closed is True +def test_vsock_exec_client_export_archive_round_trip( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + monkeypatch.setattr(socket, "AF_VSOCK", 40, raising=False) + archive_bytes = io.BytesIO() + with tarfile.open(fileobj=archive_bytes, mode="w") as archive: + payload = b"hello\n" + info = tarfile.TarInfo(name="note.txt") + info.size = len(payload) + archive.addfile(info, io.BytesIO(payload)) + archive_payload = archive_bytes.getvalue() + header = json.dumps( + { + "workspace_path": "/workspace/note.txt", + "artifact_type": "file", + "archive_size": len(archive_payload), + "entry_count": 1, + "bytes_written": 6, + } + ).encode("utf-8") + b"\n" + stub = StubSocket(header + archive_payload) + + def socket_factory(family: int, sock_type: int) -> StubSocket: + assert family == socket.AF_VSOCK + assert sock_type == socket.SOCK_STREAM + return stub + + client = VsockExecClient(socket_factory=socket_factory) + archive_path = tmp_path / "export.tar" + response = client.export_archive( + 1234, + 5005, + workspace_path="/workspace/note.txt", + archive_path=archive_path, + timeout_seconds=60, + ) + + request = json.loads(stub.sent.decode("utf-8").strip()) + assert request["action"] == "export_archive" + assert request["path"] == "/workspace/note.txt" + assert archive_path.read_bytes() == archive_payload + assert response.workspace_path == "/workspace/note.txt" + assert response.artifact_type == "file" + assert response.entry_count == 1 + assert response.bytes_written == 6 + assert stub.closed is True + + def test_vsock_exec_client_shell_round_trip(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setattr(socket, "AF_VSOCK", 40, raising=False) responses = [ diff --git a/tests/test_vm_manager.py b/tests/test_vm_manager.py index ca65ad2..ab17a11 100644 --- a/tests/test_vm_manager.py +++ b/tests/test_vm_manager.py @@ -2,6 +2,7 @@ from __future__ import annotations import io import json +import os import subprocess import tarfile import time @@ -454,6 +455,239 @@ def test_workspace_sync_push_rejects_destination_outside_workspace(tmp_path: Pat manager.push_workspace_sync(workspace_id, source_path=source_dir, dest="../escape") +def test_workspace_diff_and_export_round_trip(tmp_path: Path) -> None: + seed_dir = tmp_path / "seed" + seed_dir.mkdir() + (seed_dir / "note.txt").write_text("hello\n", encoding="utf-8") + update_dir = tmp_path / "update" + update_dir.mkdir() + (update_dir / "note.txt").write_text("hello from sync\n", encoding="utf-8") + + manager = VmManager( + backend_name="mock", + base_dir=tmp_path / "vms", + network_manager=TapNetworkManager(enabled=False), + ) + + workspace_id = str( + manager.create_workspace( + environment="debian:12-base", + allow_host_compat=True, + seed_path=seed_dir, + )["workspace_id"] + ) + manager.push_workspace_sync(workspace_id, source_path=update_dir) + + diff_payload = manager.diff_workspace(workspace_id) + assert diff_payload["workspace_id"] == workspace_id + assert diff_payload["changed"] is True + assert diff_payload["summary"]["modified"] == 1 + assert diff_payload["summary"]["text_patched"] == 1 + assert "-hello\n" in diff_payload["patch"] + assert "+hello from sync\n" in diff_payload["patch"] + + output_path = tmp_path / "exported-note.txt" + export_payload = manager.export_workspace( + workspace_id, + path="note.txt", + output_path=output_path, + ) + assert export_payload["workspace_id"] == workspace_id + assert export_payload["artifact_type"] == "file" + assert output_path.read_text(encoding="utf-8") == "hello from sync\n" + + status = manager.status_workspace(workspace_id) + logs = manager.logs_workspace(workspace_id) + assert status["command_count"] == 0 + assert logs["count"] == 0 + + +def test_workspace_export_directory_uses_exact_output_path(tmp_path: Path) -> None: + seed_dir = tmp_path / "seed" + nested_dir = seed_dir / "src" + nested_dir.mkdir(parents=True) + (nested_dir / "note.txt").write_text("hello\n", encoding="utf-8") + + manager = VmManager( + backend_name="mock", + base_dir=tmp_path / "vms", + network_manager=TapNetworkManager(enabled=False), + ) + + workspace_id = str( + manager.create_workspace( + environment="debian:12-base", + allow_host_compat=True, + seed_path=seed_dir, + )["workspace_id"] + ) + + output_dir = tmp_path / "exported-src" + payload = manager.export_workspace(workspace_id, path="src", output_path=output_dir) + assert payload["artifact_type"] == "directory" + assert (output_dir / "note.txt").read_text(encoding="utf-8") == "hello\n" + assert not (output_dir / "src").exists() + + +def test_workspace_diff_requires_create_time_baseline(tmp_path: Path) -> None: + manager = VmManager( + backend_name="mock", + base_dir=tmp_path / "vms", + network_manager=TapNetworkManager(enabled=False), + ) + workspace_id = str( + manager.create_workspace( + environment="debian:12-base", + allow_host_compat=True, + )["workspace_id"] + ) + baseline_path = tmp_path / "vms" / "workspaces" / workspace_id / "baseline" / "workspace.tar" + baseline_path.unlink() + + with pytest.raises(RuntimeError, match="requires a baseline snapshot"): + manager.diff_workspace(workspace_id) + + +def test_workspace_export_helpers_preserve_directory_symlinks(tmp_path: Path) -> None: + workspace_dir = tmp_path / "workspace" + workspace_dir.mkdir() + (workspace_dir / "note.txt").write_text("hello\n", encoding="utf-8") + os.symlink("note.txt", workspace_dir / "note-link") + (workspace_dir / "empty-dir").mkdir() + + archive_path = tmp_path / "workspace-export.tar" + exported = vm_manager_module._prepare_workspace_export_archive( # noqa: SLF001 + workspace_dir=workspace_dir, + workspace_path=".", + archive_path=archive_path, + ) + + assert exported.artifact_type == "directory" + + output_dir = tmp_path / "output" + extracted = vm_manager_module._extract_workspace_export_archive( # noqa: SLF001 + archive_path, + output_path=output_dir, + artifact_type="directory", + ) + + assert extracted["artifact_type"] == "directory" + assert (output_dir / "note.txt").read_text(encoding="utf-8") == "hello\n" + assert (output_dir / "note-link").is_symlink() + assert os.readlink(output_dir / "note-link") == "note.txt" + assert (output_dir / "empty-dir").is_dir() + + +def test_workspace_export_helpers_validate_missing_path_and_existing_output(tmp_path: Path) -> None: + workspace_dir = tmp_path / "workspace" + workspace_dir.mkdir() + (workspace_dir / "note.txt").write_text("hello\n", encoding="utf-8") + + with pytest.raises(RuntimeError, match="workspace path does not exist"): + vm_manager_module._prepare_workspace_export_archive( # noqa: SLF001 + workspace_dir=workspace_dir, + workspace_path="missing.txt", + archive_path=tmp_path / "missing.tar", + ) + + archive_path = tmp_path / "note-export.tar" + exported = vm_manager_module._prepare_workspace_export_archive( # noqa: SLF001 + workspace_dir=workspace_dir, + workspace_path="note.txt", + archive_path=archive_path, + ) + output_path = tmp_path / "note.txt" + output_path.write_text("already here\n", encoding="utf-8") + with pytest.raises(RuntimeError, match="output_path already exists"): + vm_manager_module._extract_workspace_export_archive( # noqa: SLF001 + archive_path, + output_path=output_path, + artifact_type=exported.artifact_type, + ) + + +def test_diff_workspace_trees_reports_empty_binary_symlink_and_type_changes(tmp_path: Path) -> None: + baseline_dir = tmp_path / "baseline" + current_dir = tmp_path / "current" + baseline_dir.mkdir() + current_dir.mkdir() + + (baseline_dir / "modified.txt").write_text("before\n", encoding="utf-8") + (current_dir / "modified.txt").write_text("after\n", encoding="utf-8") + + (baseline_dir / "deleted.txt").write_text("gone\n", encoding="utf-8") + (current_dir / "added.txt").write_text("new\n", encoding="utf-8") + + (baseline_dir / "binary.bin").write_bytes(b"\x00before") + (current_dir / "binary.bin").write_bytes(b"\x00after") + + os.symlink("link-target-old.txt", baseline_dir / "link") + os.symlink("link-target-new.txt", current_dir / "link") + + (baseline_dir / "swap").mkdir() + (current_dir / "swap").write_text("type changed\n", encoding="utf-8") + + (baseline_dir / "removed-empty").mkdir() + (current_dir / "added-empty").mkdir() + + diff_payload = vm_manager_module._diff_workspace_trees( # noqa: SLF001 + baseline_dir, + current_dir, + ) + + assert diff_payload["changed"] is True + assert diff_payload["summary"] == { + "total": 8, + "added": 2, + "modified": 3, + "deleted": 2, + "type_changed": 1, + "text_patched": 3, + "non_text": 5, + } + assert "--- a/modified.txt" in diff_payload["patch"] + assert "+++ b/modified.txt" in diff_payload["patch"] + assert "--- /dev/null" in diff_payload["patch"] + assert "+++ b/added.txt" in diff_payload["patch"] + assert "--- a/deleted.txt" in diff_payload["patch"] + assert "+++ /dev/null" in diff_payload["patch"] + entries = {entry["path"]: entry for entry in diff_payload["entries"]} + assert entries["binary.bin"]["text_patch"] is None + assert entries["link"]["artifact_type"] == "symlink" + assert entries["swap"]["artifact_type"] == "file" + assert entries["removed-empty"]["artifact_type"] == "directory" + assert entries["added-empty"]["artifact_type"] == "directory" + + +def test_diff_workspace_trees_unchanged_returns_empty_summary(tmp_path: Path) -> None: + baseline_dir = tmp_path / "baseline" + current_dir = tmp_path / "current" + baseline_dir.mkdir() + current_dir.mkdir() + (baseline_dir / "note.txt").write_text("same\n", encoding="utf-8") + (current_dir / "note.txt").write_text("same\n", encoding="utf-8") + + diff_payload = vm_manager_module._diff_workspace_trees( # noqa: SLF001 + baseline_dir, + current_dir, + ) + + assert diff_payload == { + "changed": False, + "summary": { + "total": 0, + "added": 0, + "modified": 0, + "deleted": 0, + "type_changed": 0, + "text_patched": 0, + "non_text": 0, + }, + "entries": [], + "patch": "", + } + + def test_workspace_shell_lifecycle_and_rehydration(tmp_path: Path) -> None: manager = VmManager( backend_name="mock", diff --git a/uv.lock b/uv.lock index 0297f19..6f69a2e 100644 --- a/uv.lock +++ b/uv.lock @@ -706,7 +706,7 @@ crypto = [ [[package]] name = "pyro-mcp" -version = "2.5.0" +version = "2.6.0" source = { editable = "." } dependencies = [ { name = "mcp" },