Add workspace export and baseline diff

Complete the 2.6.0 workspace milestone by adding explicit host-out export and immutable-baseline diff across the CLI, Python SDK, and MCP server.

Capture a baseline archive at workspace creation, export live /workspace paths through the guest agent, and compute structured whole-workspace diffs on the host without affecting command logs or shell state. The docs, roadmap, bundled guest agent, and workspace example now reflect the new create -> sync -> diff -> export workflow.

Validation: uv lock, UV_CACHE_DIR=.uv-cache make check, UV_CACHE_DIR=.uv-cache make dist-check, and a real guest-backed Firecracker smoke covering workspace create, sync push, diff, export, and delete.
This commit is contained in:
Thales Maciel 2026-03-12 03:15:45 -03:00
parent 3f8293ad24
commit 84a7e18d4d
26 changed files with 1492 additions and 43 deletions

View file

@ -14,6 +14,7 @@ import socket
import struct
import subprocess
import tarfile
import tempfile
import termios
import threading
import time
@ -180,6 +181,54 @@ def _extract_archive(payload: bytes, destination: str) -> dict[str, Any]:
}
def _inspect_archive(archive_path: Path) -> tuple[int, int]:
entry_count = 0
bytes_written = 0
with tarfile.open(archive_path, "r:*") as archive:
for member in archive.getmembers():
entry_count += 1
if member.isfile():
bytes_written += member.size
return entry_count, bytes_written
def _prepare_export_archive(path: str) -> dict[str, Any]:
normalized_path, source_path = _normalize_destination(path)
if not source_path.exists() and not source_path.is_symlink():
raise RuntimeError(f"workspace path does not exist: {normalized_path}")
if source_path.is_symlink():
artifact_type = "symlink"
elif source_path.is_file():
artifact_type = "file"
elif source_path.is_dir():
artifact_type = "directory"
else:
raise RuntimeError(f"unsupported workspace path type: {normalized_path}")
with tempfile.NamedTemporaryFile(prefix="pyro-export-", suffix=".tar", delete=False) as handle:
archive_path = Path(handle.name)
try:
with tarfile.open(archive_path, "w") as archive:
archive.dereference = False
if artifact_type == "directory":
for child in sorted(source_path.iterdir(), key=lambda item: item.name):
archive.add(child, arcname=child.name, recursive=True)
else:
archive.add(source_path, arcname=source_path.name, recursive=False)
entry_count, bytes_written = _inspect_archive(archive_path)
return {
"workspace_path": str(normalized_path),
"artifact_type": artifact_type,
"archive_path": archive_path,
"archive_size": archive_path.stat().st_size,
"entry_count": entry_count,
"bytes_written": bytes_written,
}
except Exception:
archive_path.unlink(missing_ok=True)
raise
def _run_command(command: str, timeout_seconds: int) -> dict[str, Any]:
started = time.monotonic()
try:
@ -533,6 +582,26 @@ def main() -> None:
with conn:
try:
request = _read_request(conn)
if str(request.get("action", "")) == "export_archive":
export = _prepare_export_archive(str(request.get("path", "/workspace")))
try:
header = {
"workspace_path": export["workspace_path"],
"artifact_type": export["artifact_type"],
"archive_size": export["archive_size"],
"entry_count": export["entry_count"],
"bytes_written": export["bytes_written"],
}
conn.sendall((json.dumps(header) + "\n").encode("utf-8"))
with Path(str(export["archive_path"])).open("rb") as handle:
while True:
chunk = handle.read(BUFFER_SIZE)
if chunk == b"":
break
conn.sendall(chunk)
finally:
Path(str(export["archive_path"])).unlink(missing_ok=True)
continue
response = _dispatch(request, conn)
except Exception as exc: # noqa: BLE001
response = {"error": str(exc)}