Add workspace export and baseline diff

Complete the 2.6.0 workspace milestone by adding explicit host-out export and immutable-baseline diff across the CLI, Python SDK, and MCP server.

Capture a baseline archive at workspace creation, export live /workspace paths through the guest agent, and compute structured whole-workspace diffs on the host without affecting command logs or shell state. The docs, roadmap, bundled guest agent, and workspace example now reflect the new create -> sync -> diff -> export workflow.

Validation: uv lock, UV_CACHE_DIR=.uv-cache make check, UV_CACHE_DIR=.uv-cache make dist-check, and a real guest-backed Firecracker smoke covering workspace create, sync push, diff, export, and delete.
This commit is contained in:
Thales Maciel 2026-03-12 03:15:45 -03:00
parent 3f8293ad24
commit 84a7e18d4d
26 changed files with 1492 additions and 43 deletions

View file

@ -2,6 +2,7 @@ from __future__ import annotations
import io
import json
import os
import subprocess
import tarfile
import time
@ -454,6 +455,239 @@ def test_workspace_sync_push_rejects_destination_outside_workspace(tmp_path: Pat
manager.push_workspace_sync(workspace_id, source_path=source_dir, dest="../escape")
def test_workspace_diff_and_export_round_trip(tmp_path: Path) -> None:
seed_dir = tmp_path / "seed"
seed_dir.mkdir()
(seed_dir / "note.txt").write_text("hello\n", encoding="utf-8")
update_dir = tmp_path / "update"
update_dir.mkdir()
(update_dir / "note.txt").write_text("hello from sync\n", encoding="utf-8")
manager = VmManager(
backend_name="mock",
base_dir=tmp_path / "vms",
network_manager=TapNetworkManager(enabled=False),
)
workspace_id = str(
manager.create_workspace(
environment="debian:12-base",
allow_host_compat=True,
seed_path=seed_dir,
)["workspace_id"]
)
manager.push_workspace_sync(workspace_id, source_path=update_dir)
diff_payload = manager.diff_workspace(workspace_id)
assert diff_payload["workspace_id"] == workspace_id
assert diff_payload["changed"] is True
assert diff_payload["summary"]["modified"] == 1
assert diff_payload["summary"]["text_patched"] == 1
assert "-hello\n" in diff_payload["patch"]
assert "+hello from sync\n" in diff_payload["patch"]
output_path = tmp_path / "exported-note.txt"
export_payload = manager.export_workspace(
workspace_id,
path="note.txt",
output_path=output_path,
)
assert export_payload["workspace_id"] == workspace_id
assert export_payload["artifact_type"] == "file"
assert output_path.read_text(encoding="utf-8") == "hello from sync\n"
status = manager.status_workspace(workspace_id)
logs = manager.logs_workspace(workspace_id)
assert status["command_count"] == 0
assert logs["count"] == 0
def test_workspace_export_directory_uses_exact_output_path(tmp_path: Path) -> None:
seed_dir = tmp_path / "seed"
nested_dir = seed_dir / "src"
nested_dir.mkdir(parents=True)
(nested_dir / "note.txt").write_text("hello\n", encoding="utf-8")
manager = VmManager(
backend_name="mock",
base_dir=tmp_path / "vms",
network_manager=TapNetworkManager(enabled=False),
)
workspace_id = str(
manager.create_workspace(
environment="debian:12-base",
allow_host_compat=True,
seed_path=seed_dir,
)["workspace_id"]
)
output_dir = tmp_path / "exported-src"
payload = manager.export_workspace(workspace_id, path="src", output_path=output_dir)
assert payload["artifact_type"] == "directory"
assert (output_dir / "note.txt").read_text(encoding="utf-8") == "hello\n"
assert not (output_dir / "src").exists()
def test_workspace_diff_requires_create_time_baseline(tmp_path: Path) -> None:
manager = VmManager(
backend_name="mock",
base_dir=tmp_path / "vms",
network_manager=TapNetworkManager(enabled=False),
)
workspace_id = str(
manager.create_workspace(
environment="debian:12-base",
allow_host_compat=True,
)["workspace_id"]
)
baseline_path = tmp_path / "vms" / "workspaces" / workspace_id / "baseline" / "workspace.tar"
baseline_path.unlink()
with pytest.raises(RuntimeError, match="requires a baseline snapshot"):
manager.diff_workspace(workspace_id)
def test_workspace_export_helpers_preserve_directory_symlinks(tmp_path: Path) -> None:
workspace_dir = tmp_path / "workspace"
workspace_dir.mkdir()
(workspace_dir / "note.txt").write_text("hello\n", encoding="utf-8")
os.symlink("note.txt", workspace_dir / "note-link")
(workspace_dir / "empty-dir").mkdir()
archive_path = tmp_path / "workspace-export.tar"
exported = vm_manager_module._prepare_workspace_export_archive( # noqa: SLF001
workspace_dir=workspace_dir,
workspace_path=".",
archive_path=archive_path,
)
assert exported.artifact_type == "directory"
output_dir = tmp_path / "output"
extracted = vm_manager_module._extract_workspace_export_archive( # noqa: SLF001
archive_path,
output_path=output_dir,
artifact_type="directory",
)
assert extracted["artifact_type"] == "directory"
assert (output_dir / "note.txt").read_text(encoding="utf-8") == "hello\n"
assert (output_dir / "note-link").is_symlink()
assert os.readlink(output_dir / "note-link") == "note.txt"
assert (output_dir / "empty-dir").is_dir()
def test_workspace_export_helpers_validate_missing_path_and_existing_output(tmp_path: Path) -> None:
workspace_dir = tmp_path / "workspace"
workspace_dir.mkdir()
(workspace_dir / "note.txt").write_text("hello\n", encoding="utf-8")
with pytest.raises(RuntimeError, match="workspace path does not exist"):
vm_manager_module._prepare_workspace_export_archive( # noqa: SLF001
workspace_dir=workspace_dir,
workspace_path="missing.txt",
archive_path=tmp_path / "missing.tar",
)
archive_path = tmp_path / "note-export.tar"
exported = vm_manager_module._prepare_workspace_export_archive( # noqa: SLF001
workspace_dir=workspace_dir,
workspace_path="note.txt",
archive_path=archive_path,
)
output_path = tmp_path / "note.txt"
output_path.write_text("already here\n", encoding="utf-8")
with pytest.raises(RuntimeError, match="output_path already exists"):
vm_manager_module._extract_workspace_export_archive( # noqa: SLF001
archive_path,
output_path=output_path,
artifact_type=exported.artifact_type,
)
def test_diff_workspace_trees_reports_empty_binary_symlink_and_type_changes(tmp_path: Path) -> None:
baseline_dir = tmp_path / "baseline"
current_dir = tmp_path / "current"
baseline_dir.mkdir()
current_dir.mkdir()
(baseline_dir / "modified.txt").write_text("before\n", encoding="utf-8")
(current_dir / "modified.txt").write_text("after\n", encoding="utf-8")
(baseline_dir / "deleted.txt").write_text("gone\n", encoding="utf-8")
(current_dir / "added.txt").write_text("new\n", encoding="utf-8")
(baseline_dir / "binary.bin").write_bytes(b"\x00before")
(current_dir / "binary.bin").write_bytes(b"\x00after")
os.symlink("link-target-old.txt", baseline_dir / "link")
os.symlink("link-target-new.txt", current_dir / "link")
(baseline_dir / "swap").mkdir()
(current_dir / "swap").write_text("type changed\n", encoding="utf-8")
(baseline_dir / "removed-empty").mkdir()
(current_dir / "added-empty").mkdir()
diff_payload = vm_manager_module._diff_workspace_trees( # noqa: SLF001
baseline_dir,
current_dir,
)
assert diff_payload["changed"] is True
assert diff_payload["summary"] == {
"total": 8,
"added": 2,
"modified": 3,
"deleted": 2,
"type_changed": 1,
"text_patched": 3,
"non_text": 5,
}
assert "--- a/modified.txt" in diff_payload["patch"]
assert "+++ b/modified.txt" in diff_payload["patch"]
assert "--- /dev/null" in diff_payload["patch"]
assert "+++ b/added.txt" in diff_payload["patch"]
assert "--- a/deleted.txt" in diff_payload["patch"]
assert "+++ /dev/null" in diff_payload["patch"]
entries = {entry["path"]: entry for entry in diff_payload["entries"]}
assert entries["binary.bin"]["text_patch"] is None
assert entries["link"]["artifact_type"] == "symlink"
assert entries["swap"]["artifact_type"] == "file"
assert entries["removed-empty"]["artifact_type"] == "directory"
assert entries["added-empty"]["artifact_type"] == "directory"
def test_diff_workspace_trees_unchanged_returns_empty_summary(tmp_path: Path) -> None:
baseline_dir = tmp_path / "baseline"
current_dir = tmp_path / "current"
baseline_dir.mkdir()
current_dir.mkdir()
(baseline_dir / "note.txt").write_text("same\n", encoding="utf-8")
(current_dir / "note.txt").write_text("same\n", encoding="utf-8")
diff_payload = vm_manager_module._diff_workspace_trees( # noqa: SLF001
baseline_dir,
current_dir,
)
assert diff_payload == {
"changed": False,
"summary": {
"total": 0,
"added": 0,
"modified": 0,
"deleted": 0,
"type_changed": 0,
"text_patched": 0,
"non_text": 0,
},
"entries": [],
"patch": "",
}
def test_workspace_shell_lifecycle_and_rehydration(tmp_path: Path) -> None:
manager = VmManager(
backend_name="mock",