Add workspace export and baseline diff

Complete the 2.6.0 workspace milestone by adding explicit host-out export and immutable-baseline diff across the CLI, Python SDK, and MCP server.

Capture a baseline archive at workspace creation, export live /workspace paths through the guest agent, and compute structured whole-workspace diffs on the host without affecting command logs or shell state. The docs, roadmap, bundled guest agent, and workspace example now reflect the new create -> sync -> diff -> export workflow.

Validation: uv lock, UV_CACHE_DIR=.uv-cache make check, UV_CACHE_DIR=.uv-cache make dist-check, and a real guest-backed Firecracker smoke covering workspace create, sync push, diff, export, and delete.
This commit is contained in:
Thales Maciel 2026-03-12 03:15:45 -03:00
parent 3f8293ad24
commit 84a7e18d4d
26 changed files with 1492 additions and 43 deletions

View file

@ -130,6 +130,22 @@ class Pyro:
def logs_workspace(self, workspace_id: str) -> dict[str, Any]:
return self._manager.logs_workspace(workspace_id)
def export_workspace(
self,
workspace_id: str,
path: str,
*,
output_path: str | Path,
) -> dict[str, Any]:
return self._manager.export_workspace(
workspace_id,
path=path,
output_path=output_path,
)
def diff_workspace(self, workspace_id: str) -> dict[str, Any]:
return self._manager.diff_workspace(workspace_id)
def open_shell(
self,
workspace_id: str,
@ -370,6 +386,20 @@ class Pyro:
"""Return persisted command history for one workspace."""
return self.logs_workspace(workspace_id)
@server.tool()
async def workspace_export(
workspace_id: str,
path: str,
output_path: str,
) -> dict[str, Any]:
"""Export one file or directory from `/workspace` back to the host."""
return self.export_workspace(workspace_id, path, output_path=output_path)
@server.tool()
async def workspace_diff(workspace_id: str) -> dict[str, Any]:
"""Compare `/workspace` to the immutable create-time baseline."""
return self.diff_workspace(workspace_id)
@server.tool()
async def shell_open(
workspace_id: str,

View file

@ -215,6 +215,41 @@ def _print_workspace_sync_human(payload: dict[str, Any]) -> None:
)
def _print_workspace_export_human(payload: dict[str, Any]) -> None:
print(
"[workspace-export] "
f"workspace_id={str(payload.get('workspace_id', 'unknown'))} "
f"workspace_path={str(payload.get('workspace_path', WORKSPACE_GUEST_PATH))} "
f"output_path={str(payload.get('output_path', 'unknown'))} "
f"artifact_type={str(payload.get('artifact_type', 'unknown'))} "
f"entry_count={int(payload.get('entry_count', 0))} "
f"bytes_written={int(payload.get('bytes_written', 0))} "
f"execution_mode={str(payload.get('execution_mode', 'unknown'))}"
)
def _print_workspace_diff_human(payload: dict[str, Any]) -> None:
if not bool(payload.get("changed")):
print("No workspace changes.")
return
summary = payload.get("summary")
if isinstance(summary, dict):
print(
"[workspace-diff] "
f"workspace_id={str(payload.get('workspace_id', 'unknown'))} "
f"total={int(summary.get('total', 0))} "
f"added={int(summary.get('added', 0))} "
f"modified={int(summary.get('modified', 0))} "
f"deleted={int(summary.get('deleted', 0))} "
f"type_changed={int(summary.get('type_changed', 0))} "
f"text_patched={int(summary.get('text_patched', 0))} "
f"non_text={int(summary.get('non_text', 0))}"
)
patch = str(payload.get("patch", ""))
if patch != "":
print(patch, end="" if patch.endswith("\n") else "\n")
def _print_workspace_logs_human(payload: dict[str, Any]) -> None:
entries = payload.get("entries")
if not isinstance(entries, list) or not entries:
@ -301,6 +336,8 @@ def _build_parser() -> argparse.ArgumentParser:
Need repeated commands in one workspace after that?
pyro workspace create debian:12 --seed-path ./repo
pyro workspace sync push WORKSPACE_ID ./changes
pyro workspace diff WORKSPACE_ID
pyro workspace export WORKSPACE_ID note.txt --output ./note.txt
pyro workspace shell open WORKSPACE_ID
Use `pyro mcp serve` only after the CLI validation path works.
@ -509,6 +546,8 @@ def _build_parser() -> argparse.ArgumentParser:
pyro workspace create debian:12 --seed-path ./repo
pyro workspace sync push WORKSPACE_ID ./repo --dest src
pyro workspace exec WORKSPACE_ID -- sh -lc 'printf "hello\\n" > note.txt'
pyro workspace diff WORKSPACE_ID
pyro workspace export WORKSPACE_ID src/note.txt --output ./note.txt
pyro workspace shell open WORKSPACE_ID
pyro workspace logs WORKSPACE_ID
"""
@ -530,6 +569,7 @@ def _build_parser() -> argparse.ArgumentParser:
pyro workspace create debian:12
pyro workspace create debian:12 --seed-path ./repo
pyro workspace sync push WORKSPACE_ID ./changes
pyro workspace diff WORKSPACE_ID
"""
),
formatter_class=_HelpFormatter,
@ -667,6 +707,57 @@ def _build_parser() -> argparse.ArgumentParser:
action="store_true",
help="Print structured JSON instead of human-readable output.",
)
workspace_export_parser = workspace_subparsers.add_parser(
"export",
help="Export one workspace path to the host.",
description="Export one file or directory from `/workspace` to an explicit host path.",
epilog="Example:\n pyro workspace export WORKSPACE_ID src/note.txt --output ./note.txt",
formatter_class=_HelpFormatter,
)
workspace_export_parser.add_argument(
"workspace_id",
metavar="WORKSPACE_ID",
help="Persistent workspace identifier.",
)
workspace_export_parser.add_argument(
"path",
metavar="PATH",
help="Workspace path to export. Relative values resolve inside `/workspace`.",
)
workspace_export_parser.add_argument(
"--output",
required=True,
help="Exact host path to create for the exported file or directory.",
)
workspace_export_parser.add_argument(
"--json",
action="store_true",
help="Print structured JSON instead of human-readable output.",
)
workspace_diff_parser = workspace_subparsers.add_parser(
"diff",
help="Diff `/workspace` against the create-time baseline.",
description="Compare the current `/workspace` tree to the immutable workspace baseline.",
epilog=dedent(
"""
Example:
pyro workspace diff WORKSPACE_ID
Use `workspace export` to copy a changed file or directory back to the host.
"""
),
formatter_class=_HelpFormatter,
)
workspace_diff_parser.add_argument(
"workspace_id",
metavar="WORKSPACE_ID",
help="Persistent workspace identifier.",
)
workspace_diff_parser.add_argument(
"--json",
action="store_true",
help="Print structured JSON instead of human-readable output.",
)
workspace_shell_parser = workspace_subparsers.add_parser(
"shell",
help="Open and manage persistent interactive shells.",
@ -1148,6 +1239,46 @@ def main() -> None:
raise SystemExit(1) from exc
_print_workspace_sync_human(payload)
return
if args.workspace_command == "export":
if bool(args.json):
try:
payload = pyro.export_workspace(
args.workspace_id,
args.path,
output_path=args.output,
)
except Exception as exc: # noqa: BLE001
_print_json({"ok": False, "error": str(exc)})
raise SystemExit(1) from exc
_print_json(payload)
else:
try:
payload = pyro.export_workspace(
args.workspace_id,
args.path,
output_path=args.output,
)
except Exception as exc: # noqa: BLE001
print(f"[error] {exc}", file=sys.stderr, flush=True)
raise SystemExit(1) from exc
_print_workspace_export_human(payload)
return
if args.workspace_command == "diff":
if bool(args.json):
try:
payload = pyro.diff_workspace(args.workspace_id)
except Exception as exc: # noqa: BLE001
_print_json({"ok": False, "error": str(exc)})
raise SystemExit(1) from exc
_print_json(payload)
else:
try:
payload = pyro.diff_workspace(args.workspace_id)
except Exception as exc: # noqa: BLE001
print(f"[error] {exc}", file=sys.stderr, flush=True)
raise SystemExit(1) from exc
_print_workspace_diff_human(payload)
return
if args.workspace_command == "shell":
if args.workspace_shell_command == "open":
try:

View file

@ -8,7 +8,9 @@ PUBLIC_CLI_ENV_SUBCOMMANDS = ("inspect", "list", "pull", "prune")
PUBLIC_CLI_WORKSPACE_SUBCOMMANDS = (
"create",
"delete",
"diff",
"exec",
"export",
"logs",
"shell",
"status",
@ -25,6 +27,8 @@ PUBLIC_CLI_WORKSPACE_CREATE_FLAGS = (
"--seed-path",
"--json",
)
PUBLIC_CLI_WORKSPACE_DIFF_FLAGS = ("--json",)
PUBLIC_CLI_WORKSPACE_EXPORT_FLAGS = ("--output", "--json")
PUBLIC_CLI_WORKSPACE_SHELL_OPEN_FLAGS = ("--cwd", "--cols", "--rows", "--json")
PUBLIC_CLI_WORKSPACE_SHELL_READ_FLAGS = ("--cursor", "--max-chars", "--json")
PUBLIC_CLI_WORKSPACE_SHELL_WRITE_FLAGS = ("--input", "--no-newline", "--json")
@ -48,8 +52,10 @@ PUBLIC_SDK_METHODS = (
"create_workspace",
"delete_vm",
"delete_workspace",
"diff_workspace",
"exec_vm",
"exec_workspace",
"export_workspace",
"inspect_environment",
"list_environments",
"logs_workspace",
@ -87,7 +93,9 @@ PUBLIC_MCP_TOOLS = (
"vm_stop",
"workspace_create",
"workspace_delete",
"workspace_diff",
"workspace_exec",
"workspace_export",
"workspace_logs",
"workspace_status",
"workspace_sync_push",

View file

@ -14,6 +14,7 @@ import socket
import struct
import subprocess
import tarfile
import tempfile
import termios
import threading
import time
@ -180,6 +181,54 @@ def _extract_archive(payload: bytes, destination: str) -> dict[str, Any]:
}
def _inspect_archive(archive_path: Path) -> tuple[int, int]:
entry_count = 0
bytes_written = 0
with tarfile.open(archive_path, "r:*") as archive:
for member in archive.getmembers():
entry_count += 1
if member.isfile():
bytes_written += member.size
return entry_count, bytes_written
def _prepare_export_archive(path: str) -> dict[str, Any]:
normalized_path, source_path = _normalize_destination(path)
if not source_path.exists() and not source_path.is_symlink():
raise RuntimeError(f"workspace path does not exist: {normalized_path}")
if source_path.is_symlink():
artifact_type = "symlink"
elif source_path.is_file():
artifact_type = "file"
elif source_path.is_dir():
artifact_type = "directory"
else:
raise RuntimeError(f"unsupported workspace path type: {normalized_path}")
with tempfile.NamedTemporaryFile(prefix="pyro-export-", suffix=".tar", delete=False) as handle:
archive_path = Path(handle.name)
try:
with tarfile.open(archive_path, "w") as archive:
archive.dereference = False
if artifact_type == "directory":
for child in sorted(source_path.iterdir(), key=lambda item: item.name):
archive.add(child, arcname=child.name, recursive=True)
else:
archive.add(source_path, arcname=source_path.name, recursive=False)
entry_count, bytes_written = _inspect_archive(archive_path)
return {
"workspace_path": str(normalized_path),
"artifact_type": artifact_type,
"archive_path": archive_path,
"archive_size": archive_path.stat().st_size,
"entry_count": entry_count,
"bytes_written": bytes_written,
}
except Exception:
archive_path.unlink(missing_ok=True)
raise
def _run_command(command: str, timeout_seconds: int) -> dict[str, Any]:
started = time.monotonic()
try:
@ -533,6 +582,26 @@ def main() -> None:
with conn:
try:
request = _read_request(conn)
if str(request.get("action", "")) == "export_archive":
export = _prepare_export_archive(str(request.get("path", "/workspace")))
try:
header = {
"workspace_path": export["workspace_path"],
"artifact_type": export["artifact_type"],
"archive_size": export["archive_size"],
"entry_count": export["entry_count"],
"bytes_written": export["bytes_written"],
}
conn.sendall((json.dumps(header) + "\n").encode("utf-8"))
with Path(str(export["archive_path"])).open("rb") as handle:
while True:
chunk = handle.read(BUFFER_SIZE)
if chunk == b"":
break
conn.sendall(chunk)
finally:
Path(str(export["archive_path"])).unlink(missing_ok=True)
continue
response = _dispatch(request, conn)
except Exception as exc: # noqa: BLE001
response = {"error": str(exc)}

View file

@ -25,7 +25,7 @@
"guest": {
"agent": {
"path": "guest/pyro_guest_agent.py",
"sha256": "07adf6269551447dbea8c236f91499ea1479212a3f084c5402a656f5f5cc5892"
"sha256": "4118589ccd8f4ac8200d9cedf25d13ff515d77c28094bbbdb208310247688b40"
}
},
"platform": "linux-x86_64",

View file

@ -19,7 +19,7 @@ from typing import Any
from pyro_mcp.runtime import DEFAULT_PLATFORM, RuntimePaths
DEFAULT_ENVIRONMENT_VERSION = "1.0.0"
DEFAULT_CATALOG_VERSION = "2.5.0"
DEFAULT_CATALOG_VERSION = "2.6.0"
OCI_MANIFEST_ACCEPT = ", ".join(
(
"application/vnd.oci.image.index.v1+json",

View file

@ -39,6 +39,14 @@ class GuestArchiveResponse:
bytes_written: int
@dataclass(frozen=True)
class GuestArchiveExportResponse:
workspace_path: str
artifact_type: str
entry_count: int
bytes_written: int
@dataclass(frozen=True)
class GuestShellSummary:
shell_id: str
@ -128,6 +136,52 @@ class VsockExecClient:
bytes_written=int(payload.get("bytes_written", 0)),
)
def export_archive(
self,
guest_cid: int,
port: int,
*,
workspace_path: str,
archive_path: Path,
timeout_seconds: int = 60,
uds_path: str | None = None,
) -> GuestArchiveExportResponse:
request = {
"action": "export_archive",
"path": workspace_path,
}
sock = self._connect(guest_cid, port, timeout_seconds, uds_path=uds_path)
try:
sock.sendall((json.dumps(request) + "\n").encode("utf-8"))
header = self._recv_line(sock)
if header.strip() == "":
raise RuntimeError("guest export response header is empty")
payload = json.loads(header)
if not isinstance(payload, dict):
raise RuntimeError("guest export response header must be a JSON object")
error = payload.get("error")
if error is not None:
raise RuntimeError(str(error))
archive_size = int(payload.get("archive_size", 0))
if archive_size < 0:
raise RuntimeError("guest export archive_size must not be negative")
with archive_path.open("wb") as handle:
remaining = archive_size
while remaining > 0:
chunk = sock.recv(min(65536, remaining))
if chunk == b"":
raise RuntimeError("unexpected EOF while receiving export archive")
handle.write(chunk)
remaining -= len(chunk)
finally:
sock.close()
return GuestArchiveExportResponse(
workspace_path=str(payload.get("workspace_path", workspace_path)),
artifact_type=str(payload.get("artifact_type", "file")),
entry_count=int(payload.get("entry_count", 0)),
bytes_written=int(payload.get("bytes_written", 0)),
)
def open_shell(
self,
guest_cid: int,

View file

@ -2,6 +2,7 @@
from __future__ import annotations
import difflib
import json
import os
import shlex
@ -43,7 +44,9 @@ DEFAULT_TIMEOUT_SECONDS = 30
DEFAULT_TTL_SECONDS = 600
DEFAULT_ALLOW_HOST_COMPAT = False
WORKSPACE_LAYOUT_VERSION = 3
WORKSPACE_LAYOUT_VERSION = 4
WORKSPACE_BASELINE_DIRNAME = "baseline"
WORKSPACE_BASELINE_ARCHIVE_NAME = "workspace.tar"
WORKSPACE_DIRNAME = "workspace"
WORKSPACE_COMMANDS_DIRNAME = "commands"
WORKSPACE_SHELLS_DIRNAME = "shells"
@ -57,6 +60,7 @@ DEFAULT_SHELL_MAX_CHARS = 65536
WORKSPACE_SHELL_SIGNAL_NAMES = shell_signal_names()
WorkspaceSeedMode = Literal["empty", "directory", "tar_archive"]
WorkspaceArtifactType = Literal["file", "directory", "symlink"]
@dataclass
@ -287,6 +291,24 @@ class VmExecResult:
duration_ms: int
@dataclass(frozen=True)
class ExportedWorkspaceArchive:
workspace_path: str
artifact_type: WorkspaceArtifactType
archive_path: Path
entry_count: int
bytes_written: int
@dataclass(frozen=True)
class WorkspaceTreeEntry:
path: str
artifact_type: WorkspaceArtifactType
disk_path: Path
size_bytes: int = 0
link_target: str | None = None
def _optional_int(value: object) -> int | None:
if value is None:
return None
@ -522,6 +544,66 @@ def _write_directory_seed_archive(source_dir: Path, archive_path: Path) -> None:
archive.add(child, arcname=child.name, recursive=True)
def _write_empty_seed_archive(archive_path: Path) -> None:
archive_path.parent.mkdir(parents=True, exist_ok=True)
with tarfile.open(archive_path, "w"):
pass
def _persist_workspace_baseline(
prepared_seed: PreparedWorkspaceSeed,
*,
baseline_archive_path: Path,
) -> None:
baseline_archive_path.parent.mkdir(parents=True, exist_ok=True)
if prepared_seed.archive_path is None:
_write_empty_seed_archive(baseline_archive_path)
return
shutil.copy2(prepared_seed.archive_path, baseline_archive_path)
def _write_workspace_export_archive(
source_path: Path,
*,
archive_path: Path,
) -> WorkspaceArtifactType:
archive_path.parent.mkdir(parents=True, exist_ok=True)
if source_path.is_symlink():
artifact_type: WorkspaceArtifactType = "symlink"
elif source_path.is_file():
artifact_type = "file"
elif source_path.is_dir():
artifact_type = "directory"
else:
raise RuntimeError(f"unsupported workspace path type: {source_path}")
def validate_source(current_path: Path, relative_path: PurePosixPath) -> None:
if current_path.is_symlink():
_validate_archive_symlink_target(relative_path, os.readlink(current_path))
return
if current_path.is_file():
return
if current_path.is_dir():
for child in sorted(current_path.iterdir(), key=lambda item: item.name):
validate_source(child, relative_path / child.name)
return
raise RuntimeError(f"unsupported workspace path type: {current_path}")
if artifact_type == "directory":
for child in sorted(source_path.iterdir(), key=lambda item: item.name):
validate_source(child, PurePosixPath(child.name))
else:
validate_source(source_path, PurePosixPath(source_path.name))
with tarfile.open(archive_path, "w") as archive:
archive.dereference = False
if artifact_type == "directory":
for child in sorted(source_path.iterdir(), key=lambda item: item.name):
archive.add(child, arcname=child.name, recursive=True)
else:
archive.add(source_path, arcname=source_path.name, recursive=False)
return artifact_type
def _extract_seed_archive_to_host_workspace(
archive_path: Path,
*,
@ -576,6 +658,120 @@ def _extract_seed_archive_to_host_workspace(
}
def _prepare_workspace_export_archive(
*,
workspace_dir: Path,
workspace_path: str,
archive_path: Path,
) -> ExportedWorkspaceArchive:
normalized_workspace_path, _ = _normalize_workspace_destination(workspace_path)
source_path = _workspace_host_destination(workspace_dir, normalized_workspace_path)
if not source_path.exists() and not source_path.is_symlink():
raise RuntimeError(f"workspace path does not exist: {normalized_workspace_path}")
artifact_type = _write_workspace_export_archive(source_path, archive_path=archive_path)
entry_count, bytes_written = _inspect_seed_archive(archive_path)
return ExportedWorkspaceArchive(
workspace_path=normalized_workspace_path,
artifact_type=artifact_type,
archive_path=archive_path,
entry_count=entry_count,
bytes_written=bytes_written,
)
def _extract_workspace_export_archive(
archive_path: Path,
*,
output_path: Path,
artifact_type: WorkspaceArtifactType,
) -> dict[str, Any]:
output_path.parent.mkdir(parents=True, exist_ok=True)
if output_path.exists() or output_path.is_symlink():
raise RuntimeError(f"output_path already exists: {output_path}")
entry_count = 0
bytes_written = 0
if artifact_type == "directory":
output_path.mkdir(parents=True, exist_ok=False)
with tarfile.open(archive_path, "r:*") as archive:
for member in archive.getmembers():
member_name = _normalize_archive_member_name(member.name)
target_path = output_path.joinpath(*member_name.parts)
entry_count += 1
_ensure_no_symlink_parents(output_path, target_path, member.name)
if member.isdir():
if target_path.is_symlink() or (
target_path.exists() and not target_path.is_dir()
):
raise RuntimeError(f"directory conflicts with existing path: {member.name}")
target_path.mkdir(parents=True, exist_ok=True)
continue
if member.isfile():
target_path.parent.mkdir(parents=True, exist_ok=True)
if target_path.is_symlink() or target_path.is_dir():
raise RuntimeError(f"file conflicts with existing path: {member.name}")
source = archive.extractfile(member)
if source is None:
raise RuntimeError(f"failed to read archive member: {member.name}")
with target_path.open("wb") as handle:
shutil.copyfileobj(source, handle)
bytes_written += member.size
continue
if member.issym():
_validate_archive_symlink_target(member_name, member.linkname)
target_path.parent.mkdir(parents=True, exist_ok=True)
if target_path.exists() and not target_path.is_symlink():
raise RuntimeError(f"symlink conflicts with existing path: {member.name}")
if target_path.is_symlink():
target_path.unlink()
os.symlink(member.linkname, target_path)
continue
if member.islnk():
raise RuntimeError(
f"hard links are not allowed in workspace archives: {member.name}"
)
raise RuntimeError(f"unsupported archive member type: {member.name}")
return {
"output_path": str(output_path),
"artifact_type": artifact_type,
"entry_count": entry_count,
"bytes_written": bytes_written,
}
with tarfile.open(archive_path, "r:*") as archive:
members = archive.getmembers()
if len(members) != 1:
raise RuntimeError(
"expected exactly one archive member for "
f"{artifact_type} export, got {len(members)}"
)
member = members[0]
_normalize_archive_member_name(member.name)
entry_count = 1
if artifact_type == "file":
if not member.isfile():
raise RuntimeError("exported archive did not contain a regular file")
source = archive.extractfile(member)
if source is None:
raise RuntimeError(f"failed to read archive member: {member.name}")
with output_path.open("wb") as handle:
shutil.copyfileobj(source, handle)
bytes_written = member.size
elif artifact_type == "symlink":
if not member.issym():
raise RuntimeError("exported archive did not contain a symlink")
_validate_archive_symlink_target(PurePosixPath(member.name), member.linkname)
os.symlink(member.linkname, output_path)
else:
raise RuntimeError(f"unsupported artifact type: {artifact_type}")
return {
"output_path": str(output_path),
"artifact_type": artifact_type,
"entry_count": entry_count,
"bytes_written": bytes_written,
}
def _instance_workspace_host_dir(instance: VmInstance) -> Path:
raw_value = instance.metadata.get("workspace_host_dir")
if raw_value is None or raw_value == "":
@ -640,6 +836,205 @@ def _pid_is_running(pid: int | None) -> bool:
return True
def _collect_workspace_tree(root: Path) -> dict[str, WorkspaceTreeEntry]:
entries: dict[str, WorkspaceTreeEntry] = {}
def walk(current: Path, relative_parts: tuple[str, ...] = ()) -> bool:
has_entries = False
for child in sorted(current.iterdir(), key=lambda item: item.name):
child_relative_parts = relative_parts + (child.name,)
relative_path = "/".join(child_relative_parts)
if child.is_symlink():
entries[relative_path] = WorkspaceTreeEntry(
path=relative_path,
artifact_type="symlink",
disk_path=child,
link_target=os.readlink(child),
)
has_entries = True
continue
if child.is_file():
entries[relative_path] = WorkspaceTreeEntry(
path=relative_path,
artifact_type="file",
disk_path=child,
size_bytes=child.stat().st_size,
)
has_entries = True
continue
if child.is_dir():
child_has_entries = walk(child, child_relative_parts)
if not child_has_entries:
entries[relative_path] = WorkspaceTreeEntry(
path=relative_path,
artifact_type="directory",
disk_path=child,
)
has_entries = True
else:
has_entries = True
continue
raise RuntimeError(f"unsupported workspace artifact type: {child}")
return has_entries
walk(root)
return entries
def _is_probably_text(data: bytes) -> bool:
if b"\x00" in data:
return False
try:
data.decode("utf-8")
except UnicodeDecodeError:
return False
return True
def _build_text_patch(
*,
path: str,
before_text: str,
after_text: str,
status: str,
) -> str:
if status == "added":
fromfile = "/dev/null"
tofile = f"b/{path}"
elif status == "deleted":
fromfile = f"a/{path}"
tofile = "/dev/null"
else:
fromfile = f"a/{path}"
tofile = f"b/{path}"
lines = list(
difflib.unified_diff(
before_text.splitlines(keepends=True),
after_text.splitlines(keepends=True),
fromfile=fromfile,
tofile=tofile,
n=3,
)
)
if not lines:
return ""
return "".join(lines)
def _diff_workspace_trees(
baseline_root: Path,
current_root: Path,
) -> dict[str, Any]:
baseline_entries = _collect_workspace_tree(baseline_root)
current_entries = _collect_workspace_tree(current_root)
changed_entries: list[dict[str, Any]] = []
patch_parts: list[str] = []
summary = {
"total": 0,
"added": 0,
"modified": 0,
"deleted": 0,
"type_changed": 0,
"text_patched": 0,
"non_text": 0,
}
for path in sorted(set(baseline_entries) | set(current_entries)):
baseline_entry = baseline_entries.get(path)
current_entry = current_entries.get(path)
entry_payload: dict[str, Any] | None = None
text_patch = ""
if baseline_entry is None and current_entry is not None:
entry_payload = {
"path": path,
"status": "added",
"artifact_type": current_entry.artifact_type,
"text_patch": None,
}
if current_entry.artifact_type == "file":
current_bytes = current_entry.disk_path.read_bytes()
if _is_probably_text(current_bytes):
text_patch = _build_text_patch(
path=path,
before_text="",
after_text=current_bytes.decode("utf-8"),
status="added",
)
elif current_entry is None and baseline_entry is not None:
entry_payload = {
"path": path,
"status": "deleted",
"artifact_type": baseline_entry.artifact_type,
"text_patch": None,
}
if baseline_entry.artifact_type == "file":
baseline_bytes = baseline_entry.disk_path.read_bytes()
if _is_probably_text(baseline_bytes):
text_patch = _build_text_patch(
path=path,
before_text=baseline_bytes.decode("utf-8"),
after_text="",
status="deleted",
)
elif baseline_entry is not None and current_entry is not None:
if baseline_entry.artifact_type != current_entry.artifact_type:
entry_payload = {
"path": path,
"status": "type_changed",
"artifact_type": current_entry.artifact_type,
"text_patch": None,
}
elif current_entry.artifact_type == "directory":
continue
elif current_entry.artifact_type == "symlink":
if baseline_entry.link_target != current_entry.link_target:
entry_payload = {
"path": path,
"status": "modified",
"artifact_type": current_entry.artifact_type,
"text_patch": None,
}
else:
baseline_bytes = baseline_entry.disk_path.read_bytes()
current_bytes = current_entry.disk_path.read_bytes()
if baseline_bytes == current_bytes:
continue
entry_payload = {
"path": path,
"status": "modified",
"artifact_type": current_entry.artifact_type,
"text_patch": None,
}
if _is_probably_text(baseline_bytes) and _is_probably_text(current_bytes):
text_patch = _build_text_patch(
path=path,
before_text=baseline_bytes.decode("utf-8"),
after_text=current_bytes.decode("utf-8"),
status="modified",
)
if entry_payload is None:
continue
summary["total"] += 1
summary[str(entry_payload["status"])] += 1
if text_patch != "":
entry_payload["text_patch"] = text_patch
patch_parts.append(text_patch)
summary["text_patched"] += 1
else:
summary["non_text"] += 1
changed_entries.append(entry_payload)
return {
"changed": bool(changed_entries),
"summary": summary,
"entries": changed_entries,
"patch": "".join(patch_parts),
}
class VmBackend:
"""Backend interface for lifecycle operations."""
@ -674,6 +1069,15 @@ class VmBackend:
) -> dict[str, Any]:
raise NotImplementedError
def export_archive( # pragma: no cover
self,
instance: VmInstance,
*,
workspace_path: str,
archive_path: Path,
) -> dict[str, Any]:
raise NotImplementedError
def open_shell( # pragma: no cover
self,
instance: VmInstance,
@ -768,6 +1172,26 @@ class MockBackend(VmBackend):
destination=destination,
)
def export_archive(
self,
instance: VmInstance,
*,
workspace_path: str,
archive_path: Path,
) -> dict[str, Any]:
exported = _prepare_workspace_export_archive(
workspace_dir=_instance_workspace_host_dir(instance),
workspace_path=workspace_path,
archive_path=archive_path,
)
return {
"workspace_path": exported.workspace_path,
"artifact_type": exported.artifact_type,
"entry_count": exported.entry_count,
"bytes_written": exported.bytes_written,
"execution_mode": "host_compat",
}
def open_shell(
self,
instance: VmInstance,
@ -1086,6 +1510,55 @@ class FirecrackerBackend(VmBackend): # pragma: no cover
destination=destination,
)
def export_archive(
self,
instance: VmInstance,
*,
workspace_path: str,
archive_path: Path,
) -> dict[str, Any]:
if self._runtime_capabilities.supports_guest_exec:
guest_cid = int(instance.metadata["guest_cid"])
port = int(instance.metadata["guest_exec_port"])
uds_path = instance.metadata.get("guest_exec_uds_path")
deadline = time.monotonic() + 10
while True:
try:
response = self._guest_exec_client.export_archive(
guest_cid,
port,
workspace_path=workspace_path,
archive_path=archive_path,
timeout_seconds=WORKSPACE_ARCHIVE_UPLOAD_TIMEOUT_SECONDS,
uds_path=uds_path,
)
return {
"workspace_path": response.workspace_path,
"artifact_type": response.artifact_type,
"entry_count": response.entry_count,
"bytes_written": response.bytes_written,
"execution_mode": instance.metadata.get("execution_mode", "pending"),
}
except (OSError, RuntimeError) as exc:
if time.monotonic() >= deadline:
raise RuntimeError(
f"guest export transport did not become ready: {exc}"
) from exc
time.sleep(0.2)
instance.metadata["execution_mode"] = "host_compat"
exported = _prepare_workspace_export_archive(
workspace_dir=_instance_workspace_host_dir(instance),
workspace_path=workspace_path,
archive_path=archive_path,
)
return {
"workspace_path": exported.workspace_path,
"artifact_type": exported.artifact_type,
"entry_count": exported.entry_count,
"bytes_written": exported.bytes_written,
"execution_mode": "host_compat",
}
def open_shell(
self,
instance: VmInstance,
@ -1488,10 +1961,15 @@ class VmManager:
host_workspace_dir = self._workspace_host_dir(workspace_id)
commands_dir = self._workspace_commands_dir(workspace_id)
shells_dir = self._workspace_shells_dir(workspace_id)
baseline_archive_path = self._workspace_baseline_archive_path(workspace_id)
workspace_dir.mkdir(parents=True, exist_ok=False)
host_workspace_dir.mkdir(parents=True, exist_ok=True)
commands_dir.mkdir(parents=True, exist_ok=True)
shells_dir.mkdir(parents=True, exist_ok=True)
_persist_workspace_baseline(
prepared_seed,
baseline_archive_path=baseline_archive_path,
)
instance = VmInstance(
vm_id=workspace_id,
environment=environment,
@ -1523,23 +2001,14 @@ class VmManager:
self._start_instance_locked(instance)
self._require_guest_exec_or_opt_in(instance)
workspace_seed = prepared_seed.to_payload()
if prepared_seed.archive_path is not None:
import_summary = self._backend.import_archive(
instance,
archive_path=prepared_seed.archive_path,
destination=WORKSPACE_GUEST_PATH,
)
workspace_seed["entry_count"] = int(import_summary["entry_count"])
workspace_seed["bytes_written"] = int(import_summary["bytes_written"])
workspace_seed["destination"] = str(import_summary["destination"])
elif self._runtime_capabilities.supports_guest_exec:
self._backend.exec(
instance,
f"mkdir -p {shlex.quote(WORKSPACE_GUEST_PATH)}",
10,
)
else:
instance.metadata["execution_mode"] = "host_compat"
import_summary = self._backend.import_archive(
instance,
archive_path=baseline_archive_path,
destination=WORKSPACE_GUEST_PATH,
)
workspace_seed["entry_count"] = int(import_summary["entry_count"])
workspace_seed["bytes_written"] = int(import_summary["bytes_written"])
workspace_seed["destination"] = str(import_summary["destination"])
workspace = WorkspaceRecord.from_instance(instance, workspace_seed=workspace_seed)
self._save_workspace_locked(workspace)
return self._serialize_workspace(workspace)
@ -1612,6 +2081,98 @@ class VmManager:
"workspace_sync": workspace_sync,
}
def export_workspace(
self,
workspace_id: str,
*,
path: str,
output_path: str | Path,
) -> dict[str, Any]:
normalized_path, _ = _normalize_workspace_destination(path)
raw_output_path = str(output_path).strip()
if raw_output_path == "":
raise ValueError("output_path must not be empty")
resolved_output_path = Path(output_path).expanduser().resolve()
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
instance = self._workspace_instance_for_live_operation_locked(
workspace,
operation_name="workspace_export",
)
with tempfile.TemporaryDirectory(prefix="pyro-workspace-export-") as temp_dir:
archive_path = Path(temp_dir) / "workspace-export.tar"
exported = self._backend.export_archive(
instance,
workspace_path=normalized_path,
archive_path=archive_path,
)
extracted = _extract_workspace_export_archive(
archive_path,
output_path=resolved_output_path,
artifact_type=cast(WorkspaceArtifactType, str(exported["artifact_type"])),
)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = instance.last_error
workspace.metadata = dict(instance.metadata)
self._save_workspace_locked(workspace)
return {
"workspace_id": workspace_id,
"workspace_path": normalized_path,
"output_path": str(Path(str(extracted["output_path"]))),
"artifact_type": extracted["artifact_type"],
"entry_count": int(extracted["entry_count"]),
"bytes_written": int(extracted["bytes_written"]),
"execution_mode": str(
exported.get("execution_mode", instance.metadata.get("execution_mode", "pending"))
),
}
def diff_workspace(self, workspace_id: str) -> dict[str, Any]:
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
instance = self._workspace_instance_for_live_operation_locked(
workspace,
operation_name="workspace_diff",
)
baseline_archive_path = self._workspace_baseline_archive_path(workspace_id)
if not baseline_archive_path.exists():
raise RuntimeError(
"workspace diff requires a baseline snapshot. Recreate the workspace to use diff."
)
with tempfile.TemporaryDirectory(prefix="pyro-workspace-diff-") as temp_dir:
temp_root = Path(temp_dir)
current_archive_path = temp_root / "current.tar"
baseline_root = temp_root / "baseline"
current_root = temp_root / "current"
self._backend.export_archive(
instance,
workspace_path=WORKSPACE_GUEST_PATH,
archive_path=current_archive_path,
)
_extract_seed_archive_to_host_workspace(
baseline_archive_path,
workspace_dir=baseline_root,
destination=WORKSPACE_GUEST_PATH,
)
_extract_seed_archive_to_host_workspace(
current_archive_path,
workspace_dir=current_root,
destination=WORKSPACE_GUEST_PATH,
)
diff_payload = _diff_workspace_trees(baseline_root, current_root)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = instance.last_error
workspace.metadata = dict(instance.metadata)
self._save_workspace_locked(workspace)
diff_payload["workspace_id"] = workspace_id
return diff_payload
def exec_workspace(
self,
workspace_id: str,
@ -2136,6 +2697,12 @@ class VmManager:
def _workspace_host_dir(self, workspace_id: str) -> Path:
return self._workspace_dir(workspace_id) / WORKSPACE_DIRNAME
def _workspace_baseline_dir(self, workspace_id: str) -> Path:
return self._workspace_dir(workspace_id) / WORKSPACE_BASELINE_DIRNAME
def _workspace_baseline_archive_path(self, workspace_id: str) -> Path:
return self._workspace_baseline_dir(workspace_id) / WORKSPACE_BASELINE_ARCHIVE_NAME
def _workspace_commands_dir(self, workspace_id: str) -> Path:
return self._workspace_dir(workspace_id) / WORKSPACE_COMMANDS_DIRNAME
@ -2278,17 +2845,29 @@ class VmManager:
return entries
def _workspace_instance_for_live_shell_locked(self, workspace: WorkspaceRecord) -> VmInstance:
instance = self._workspace_instance_for_live_operation_locked(
workspace,
operation_name="shell operations",
)
self._require_workspace_shell_support(instance)
return instance
def _workspace_instance_for_live_operation_locked(
self,
workspace: WorkspaceRecord,
*,
operation_name: str,
) -> VmInstance:
self._ensure_workspace_not_expired_locked(workspace, time.time())
self._refresh_workspace_liveness_locked(workspace)
if workspace.state != "started":
raise RuntimeError(
"workspace "
f"{workspace.workspace_id} must be in 'started' state before shell operations"
f"{workspace.workspace_id} must be in 'started' state before {operation_name}"
)
instance = workspace.to_instance(
workdir=self._workspace_runtime_dir(workspace.workspace_id)
)
self._require_workspace_shell_support(instance)
return instance
def _workspace_shell_record_from_payload(