Add workspace snapshots and full reset

Implement the 2.8.0 workspace milestone with named snapshots and full-sandbox reset across the CLI, Python SDK, and MCP server.

Persist the immutable baseline plus named snapshot archives under each workspace, add workspace reset metadata, and make reset recreate the sandbox while clearing command history, shells, and services without changing the workspace identity or diff baseline.

Refresh the 2.8.0 docs, roadmap, and Python example around reset-over-repair, then validate with uv lock, UV_CACHE_DIR=.uv-cache make check, UV_CACHE_DIR=.uv-cache make dist-check, and a real guest-backed create/snapshot/reset/diff smoke test outside the sandbox.
This commit is contained in:
Thales Maciel 2026-03-12 12:41:11 -03:00
parent f504f0a331
commit 18b8fd2a7d
20 changed files with 1429 additions and 29 deletions

View file

@ -545,10 +545,212 @@ def test_workspace_diff_requires_create_time_baseline(tmp_path: Path) -> None:
baseline_path = tmp_path / "vms" / "workspaces" / workspace_id / "baseline" / "workspace.tar"
baseline_path.unlink()
with pytest.raises(RuntimeError, match="requires a baseline snapshot"):
with pytest.raises(RuntimeError, match="require[s]? a baseline snapshot"):
manager.diff_workspace(workspace_id)
def test_workspace_snapshots_and_reset_round_trip(tmp_path: Path) -> None:
seed_dir = tmp_path / "seed"
seed_dir.mkdir()
(seed_dir / "note.txt").write_text("seed\n", encoding="utf-8")
manager = VmManager(
backend_name="mock",
base_dir=tmp_path / "vms",
network_manager=TapNetworkManager(enabled=False),
)
workspace_id = str(
manager.create_workspace(
environment="debian:12-base",
allow_host_compat=True,
seed_path=seed_dir,
)["workspace_id"]
)
manager.exec_workspace(
workspace_id,
command="printf 'checkpoint\\n' > note.txt",
timeout_seconds=30,
)
created_snapshot = manager.create_snapshot(workspace_id, "checkpoint")
assert created_snapshot["snapshot"]["snapshot_name"] == "checkpoint"
listed = manager.list_snapshots(workspace_id)
assert listed["count"] == 2
assert [snapshot["snapshot_name"] for snapshot in listed["snapshots"]] == [
"baseline",
"checkpoint",
]
manager.exec_workspace(
workspace_id,
command="printf 'after\\n' > note.txt",
timeout_seconds=30,
)
manager.start_service(
workspace_id,
"app",
command="sh -lc 'touch .ready; while true; do sleep 60; done'",
readiness={"type": "file", "path": ".ready"},
)
reset_to_snapshot = manager.reset_workspace(workspace_id, snapshot="checkpoint")
assert reset_to_snapshot["workspace_reset"]["snapshot_name"] == "checkpoint"
assert reset_to_snapshot["reset_count"] == 1
assert reset_to_snapshot["last_command"] is None
assert reset_to_snapshot["command_count"] == 0
assert reset_to_snapshot["service_count"] == 0
assert reset_to_snapshot["running_service_count"] == 0
checkpoint_result = manager.exec_workspace(
workspace_id,
command="cat note.txt",
timeout_seconds=30,
)
assert checkpoint_result["stdout"] == "checkpoint\n"
logs_after_snapshot_reset = manager.logs_workspace(workspace_id)
assert logs_after_snapshot_reset["count"] == 1
reset_to_baseline = manager.reset_workspace(workspace_id)
assert reset_to_baseline["workspace_reset"]["snapshot_name"] == "baseline"
assert reset_to_baseline["reset_count"] == 2
assert reset_to_baseline["command_count"] == 0
assert reset_to_baseline["service_count"] == 0
assert manager.logs_workspace(workspace_id)["count"] == 0
baseline_result = manager.exec_workspace(
workspace_id,
command="cat note.txt",
timeout_seconds=30,
)
assert baseline_result["stdout"] == "seed\n"
diff_payload = manager.diff_workspace(workspace_id)
assert diff_payload["changed"] is False
deleted_snapshot = manager.delete_snapshot(workspace_id, "checkpoint")
assert deleted_snapshot["deleted"] is True
listed_after_delete = manager.list_snapshots(workspace_id)
assert [snapshot["snapshot_name"] for snapshot in listed_after_delete["snapshots"]] == [
"baseline"
]
def test_workspace_snapshot_and_reset_require_baseline(tmp_path: Path) -> None:
manager = VmManager(
backend_name="mock",
base_dir=tmp_path / "vms",
network_manager=TapNetworkManager(enabled=False),
)
workspace_id = str(
manager.create_workspace(
environment="debian:12-base",
allow_host_compat=True,
)["workspace_id"]
)
baseline_path = tmp_path / "vms" / "workspaces" / workspace_id / "baseline" / "workspace.tar"
baseline_path.unlink()
with pytest.raises(RuntimeError, match="require[s]? a baseline snapshot"):
manager.list_snapshots(workspace_id)
with pytest.raises(RuntimeError, match="require[s]? a baseline snapshot"):
manager.create_snapshot(workspace_id, "checkpoint")
with pytest.raises(RuntimeError, match="require[s]? a baseline snapshot"):
manager.delete_snapshot(workspace_id, "checkpoint")
with pytest.raises(RuntimeError, match="require[s]? a baseline snapshot"):
manager.reset_workspace(workspace_id)
def test_workspace_delete_baseline_snapshot_is_rejected(tmp_path: Path) -> None:
manager = VmManager(
backend_name="mock",
base_dir=tmp_path / "vms",
network_manager=TapNetworkManager(enabled=False),
)
workspace_id = str(
manager.create_workspace(
environment="debian:12-base",
allow_host_compat=True,
)["workspace_id"]
)
with pytest.raises(ValueError, match="cannot delete the baseline snapshot"):
manager.delete_snapshot(workspace_id, "baseline")
def test_workspace_reset_recreates_stopped_workspace(tmp_path: Path) -> None:
seed_dir = tmp_path / "seed"
seed_dir.mkdir()
(seed_dir / "note.txt").write_text("seed\n", encoding="utf-8")
manager = VmManager(
backend_name="mock",
base_dir=tmp_path / "vms",
network_manager=TapNetworkManager(enabled=False),
)
workspace_id = str(
manager.create_workspace(
environment="debian:12-base",
allow_host_compat=True,
seed_path=seed_dir,
)["workspace_id"]
)
with manager._lock: # noqa: SLF001
workspace = manager._load_workspace_locked(workspace_id) # noqa: SLF001
workspace.state = "stopped"
workspace.firecracker_pid = None
manager._save_workspace_locked(workspace) # noqa: SLF001
reset_payload = manager.reset_workspace(workspace_id)
assert reset_payload["state"] == "started"
assert reset_payload["workspace_reset"]["snapshot_name"] == "baseline"
result = manager.exec_workspace(workspace_id, command="cat note.txt", timeout_seconds=30)
assert result["stdout"] == "seed\n"
def test_workspace_reset_failure_leaves_workspace_stopped(tmp_path: Path) -> None:
seed_dir = tmp_path / "seed"
seed_dir.mkdir()
(seed_dir / "note.txt").write_text("seed\n", encoding="utf-8")
manager = VmManager(
backend_name="mock",
base_dir=tmp_path / "vms",
network_manager=TapNetworkManager(enabled=False),
)
workspace_id = str(
manager.create_workspace(
environment="debian:12-base",
allow_host_compat=True,
seed_path=seed_dir,
)["workspace_id"]
)
manager.create_snapshot(workspace_id, "checkpoint")
def _failing_import_archive(*args: Any, **kwargs: Any) -> dict[str, Any]:
del args, kwargs
raise RuntimeError("boom")
manager._backend.import_archive = _failing_import_archive # type: ignore[method-assign] # noqa: SLF001
with pytest.raises(RuntimeError, match="boom"):
manager.reset_workspace(workspace_id, snapshot="checkpoint")
with manager._lock: # noqa: SLF001
workspace = manager._load_workspace_locked(workspace_id) # noqa: SLF001
assert workspace.state == "stopped"
assert workspace.firecracker_pid is None
assert workspace.reset_count == 0
listed = manager.list_snapshots(workspace_id)
assert [snapshot["snapshot_name"] for snapshot in listed["snapshots"]] == [
"baseline",
"checkpoint",
]
def test_workspace_export_helpers_preserve_directory_symlinks(tmp_path: Path) -> None:
workspace_dir = tmp_path / "workspace"
workspace_dir.mkdir()