Add stopped-workspace disk export and inspection
Finish the 3.1.0 secondary disk-tools milestone so stable workspaces can be stopped, inspected offline, exported as raw ext4 images, and started again without changing the primary workspace-first interaction model. Add workspace stop/start plus workspace disk export/list/read across the CLI, SDK, and MCP, backed by a new offline debugfs inspection helper and guest-only validation. Scrub runtime-only guest state before disk inspection/export, and fix the real guest reliability gaps by flushing the filesystem on stop and removing stale Firecracker socket files before restart. Update the docs, examples, changelog, and roadmap to mark 3.1.0 done, and cover the new lifecycle/disk paths with API, CLI, manager, contract, and package-surface tests. Validation: uv lock; UV_CACHE_DIR=.uv-cache make check; UV_CACHE_DIR=.uv-cache make dist-check; real guest-backed smoke for create, shell/service activity, stop, workspace disk list/read/export, start, exec, and delete.
This commit is contained in:
parent
f2d20ef30a
commit
287f6d100f
26 changed files with 2585 additions and 34 deletions
|
|
@ -18,6 +18,55 @@ from pyro_mcp.vm_manager import VmManager
|
|||
from pyro_mcp.vm_network import NetworkConfig, TapNetworkManager
|
||||
|
||||
|
||||
def _run_debugfs_write(rootfs_image: Path, command: str) -> None:
|
||||
proc = subprocess.run( # noqa: S603
|
||||
["debugfs", "-w", "-R", command, str(rootfs_image)],
|
||||
text=True,
|
||||
capture_output=True,
|
||||
check=False,
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
message = proc.stderr.strip() or proc.stdout.strip() or command
|
||||
raise RuntimeError(message)
|
||||
|
||||
|
||||
def _create_stopped_workspace_rootfs(tmp_path: Path) -> Path:
|
||||
rootfs_image = tmp_path / "workspace-rootfs.ext4"
|
||||
with rootfs_image.open("wb") as handle:
|
||||
handle.truncate(16 * 1024 * 1024)
|
||||
proc = subprocess.run( # noqa: S603
|
||||
["mkfs.ext4", "-F", str(rootfs_image)],
|
||||
text=True,
|
||||
capture_output=True,
|
||||
check=False,
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
message = proc.stderr.strip() or proc.stdout.strip() or "mkfs.ext4 failed"
|
||||
raise RuntimeError(message)
|
||||
for directory in (
|
||||
"/workspace",
|
||||
"/workspace/src",
|
||||
"/run",
|
||||
"/run/pyro-secrets",
|
||||
"/run/pyro-services",
|
||||
):
|
||||
_run_debugfs_write(rootfs_image, f"mkdir {directory}")
|
||||
note_path = tmp_path / "note.txt"
|
||||
note_path.write_text("hello from disk\n", encoding="utf-8")
|
||||
child_path = tmp_path / "child.txt"
|
||||
child_path.write_text("nested child\n", encoding="utf-8")
|
||||
secret_path = tmp_path / "secret.txt"
|
||||
secret_path.write_text("super-secret\n", encoding="utf-8")
|
||||
service_path = tmp_path / "service.log"
|
||||
service_path.write_text("service runtime\n", encoding="utf-8")
|
||||
_run_debugfs_write(rootfs_image, f"write {note_path} /workspace/note.txt")
|
||||
_run_debugfs_write(rootfs_image, f"write {child_path} /workspace/src/child.txt")
|
||||
_run_debugfs_write(rootfs_image, "symlink /workspace/link note.txt")
|
||||
_run_debugfs_write(rootfs_image, f"write {secret_path} /run/pyro-secrets/TOKEN")
|
||||
_run_debugfs_write(rootfs_image, f"write {service_path} /run/pyro-services/app.log")
|
||||
return rootfs_image
|
||||
|
||||
|
||||
def test_vm_manager_lifecycle_and_auto_cleanup(tmp_path: Path) -> None:
|
||||
manager = VmManager(
|
||||
backend_name="mock",
|
||||
|
|
@ -1129,6 +1178,80 @@ def test_vm_manager_firecracker_backend_path(
|
|||
assert manager._backend_name == "firecracker" # noqa: SLF001
|
||||
|
||||
|
||||
def test_firecracker_backend_start_removes_stale_socket_files(
|
||||
tmp_path: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
backend = cast(Any, object.__new__(vm_manager_module.FirecrackerBackend))
|
||||
backend._environment_store = object() # noqa: SLF001
|
||||
backend._firecracker_bin = tmp_path / "firecracker" # noqa: SLF001
|
||||
backend._jailer_bin = tmp_path / "jailer" # noqa: SLF001
|
||||
backend._runtime_capabilities = RuntimeCapabilities( # noqa: SLF001
|
||||
supports_vm_boot=True,
|
||||
supports_guest_exec=True,
|
||||
supports_guest_network=False,
|
||||
reason=None,
|
||||
)
|
||||
backend._network_manager = TapNetworkManager(enabled=False) # noqa: SLF001
|
||||
backend._guest_exec_client = None # noqa: SLF001
|
||||
backend._processes = {} # noqa: SLF001
|
||||
|
||||
backend._firecracker_bin.write_text("fc", encoding="utf-8") # noqa: SLF001
|
||||
backend._jailer_bin.write_text("jailer", encoding="utf-8") # noqa: SLF001
|
||||
kernel_image = tmp_path / "vmlinux"
|
||||
kernel_image.write_text("kernel", encoding="utf-8")
|
||||
rootfs_image = tmp_path / "rootfs.ext4"
|
||||
rootfs_image.write_bytes(b"rootfs")
|
||||
|
||||
workdir = tmp_path / "runtime"
|
||||
workdir.mkdir()
|
||||
firecracker_socket = workdir / "firecracker.sock"
|
||||
vsock_socket = workdir / "vsock.sock"
|
||||
firecracker_socket.write_text("stale firecracker socket", encoding="utf-8")
|
||||
vsock_socket.write_text("stale vsock socket", encoding="utf-8")
|
||||
|
||||
class DummyPopen:
|
||||
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
||||
del args, kwargs
|
||||
self.pid = 4242
|
||||
|
||||
def poll(self) -> None:
|
||||
return None
|
||||
|
||||
monkeypatch.setattr(
|
||||
cast(Any, vm_manager_module).subprocess,
|
||||
"run",
|
||||
lambda *args, **kwargs: subprocess.CompletedProcess( # noqa: ARG005
|
||||
args=args[0],
|
||||
returncode=0,
|
||||
stdout="Firecracker v1.0.0\n",
|
||||
stderr="",
|
||||
),
|
||||
)
|
||||
monkeypatch.setattr(cast(Any, vm_manager_module).subprocess, "Popen", DummyPopen)
|
||||
|
||||
instance = vm_manager_module.VmInstance(
|
||||
vm_id="abcd1234",
|
||||
environment="debian:12",
|
||||
vcpu_count=1,
|
||||
mem_mib=512,
|
||||
ttl_seconds=600,
|
||||
created_at=time.time(),
|
||||
expires_at=time.time() + 600,
|
||||
workdir=workdir,
|
||||
metadata={
|
||||
"kernel_image": str(kernel_image),
|
||||
"rootfs_image": str(rootfs_image),
|
||||
},
|
||||
)
|
||||
|
||||
backend.start(instance)
|
||||
|
||||
assert instance.firecracker_pid == 4242
|
||||
assert not firecracker_socket.exists()
|
||||
assert not vsock_socket.exists()
|
||||
|
||||
|
||||
def test_vm_manager_fails_closed_without_host_compat_opt_in(tmp_path: Path) -> None:
|
||||
manager = VmManager(
|
||||
backend_name="mock",
|
||||
|
|
@ -2691,3 +2814,181 @@ def test_workspace_secrets_require_guest_exec_on_firecracker_runtime(
|
|||
allow_host_compat=True,
|
||||
secrets=[{"name": "TOKEN", "value": "expected"}],
|
||||
)
|
||||
|
||||
|
||||
def test_workspace_stop_and_start_preserve_logs_and_clear_live_state(tmp_path: Path) -> None:
|
||||
manager = VmManager(
|
||||
backend_name="mock",
|
||||
base_dir=tmp_path / "vms",
|
||||
network_manager=TapNetworkManager(enabled=False),
|
||||
)
|
||||
seed_dir = tmp_path / "seed"
|
||||
seed_dir.mkdir()
|
||||
(seed_dir / "note.txt").write_text("hello from seed\n", encoding="utf-8")
|
||||
|
||||
created = manager.create_workspace(
|
||||
environment="debian:12-base",
|
||||
allow_host_compat=True,
|
||||
seed_path=seed_dir,
|
||||
)
|
||||
workspace_id = str(created["workspace_id"])
|
||||
manager.exec_workspace(workspace_id, command="cat note.txt", timeout_seconds=30)
|
||||
shell = manager.open_shell(workspace_id)
|
||||
shell_id = str(shell["shell_id"])
|
||||
started_service = manager.start_service(
|
||||
workspace_id,
|
||||
"app",
|
||||
command='sh -lc \'touch .ready && trap "exit 0" TERM; while true; do sleep 60; done\'',
|
||||
readiness={"type": "file", "path": ".ready"},
|
||||
)
|
||||
assert started_service["state"] == "running"
|
||||
|
||||
stopped = manager.stop_workspace(workspace_id)
|
||||
assert stopped["state"] == "stopped"
|
||||
assert stopped["command_count"] == 1
|
||||
assert stopped["service_count"] == 0
|
||||
assert stopped["running_service_count"] == 0
|
||||
assert manager.logs_workspace(workspace_id)["count"] == 1
|
||||
with pytest.raises(RuntimeError, match="must be in 'started' state"):
|
||||
manager.read_shell(workspace_id, shell_id, cursor=0, max_chars=1024)
|
||||
|
||||
restarted = manager.start_workspace(workspace_id)
|
||||
assert restarted["state"] == "started"
|
||||
assert restarted["command_count"] == 1
|
||||
assert restarted["service_count"] == 0
|
||||
rerun = manager.exec_workspace(workspace_id, command="cat note.txt", timeout_seconds=30)
|
||||
assert rerun["stdout"] == "hello from seed\n"
|
||||
|
||||
|
||||
def test_workspace_stop_flushes_guest_filesystem_before_stopping(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
manager = VmManager(
|
||||
backend_name="mock",
|
||||
base_dir=tmp_path / "vms",
|
||||
network_manager=TapNetworkManager(enabled=False),
|
||||
)
|
||||
created = manager.create_workspace(
|
||||
environment="debian:12-base",
|
||||
allow_host_compat=True,
|
||||
)
|
||||
workspace_id = str(created["workspace_id"])
|
||||
workspace_path = tmp_path / "vms" / "workspaces" / workspace_id / "workspace.json"
|
||||
payload = json.loads(workspace_path.read_text(encoding="utf-8"))
|
||||
payload["state"] = "started"
|
||||
payload["firecracker_pid"] = os.getpid()
|
||||
payload["metadata"]["execution_mode"] = "guest_vsock"
|
||||
payload["metadata"]["rootfs_image"] = str(_create_stopped_workspace_rootfs(tmp_path))
|
||||
workspace_path.write_text(json.dumps(payload, indent=2, sort_keys=True), encoding="utf-8")
|
||||
|
||||
calls: list[tuple[str, str]] = []
|
||||
|
||||
class StubBackend:
|
||||
def exec(
|
||||
self,
|
||||
instance: Any,
|
||||
command: str,
|
||||
timeout_seconds: int,
|
||||
*,
|
||||
workdir: Path | None = None,
|
||||
env: dict[str, str] | None = None,
|
||||
) -> vm_manager_module.VmExecResult:
|
||||
del instance, timeout_seconds, workdir, env
|
||||
calls.append(("exec", command))
|
||||
return vm_manager_module.VmExecResult(
|
||||
stdout="",
|
||||
stderr="",
|
||||
exit_code=0,
|
||||
duration_ms=1,
|
||||
)
|
||||
|
||||
def stop(self, instance: Any) -> None:
|
||||
del instance
|
||||
calls.append(("stop", "instance"))
|
||||
|
||||
manager._backend = StubBackend() # type: ignore[assignment] # noqa: SLF001
|
||||
manager._backend_name = "firecracker" # noqa: SLF001
|
||||
manager._runtime_capabilities = RuntimeCapabilities( # noqa: SLF001
|
||||
supports_vm_boot=True,
|
||||
supports_guest_exec=True,
|
||||
supports_guest_network=False,
|
||||
reason=None,
|
||||
)
|
||||
|
||||
stopped = manager.stop_workspace(workspace_id)
|
||||
|
||||
assert calls == [("exec", "sync"), ("stop", "instance")]
|
||||
assert stopped["state"] == "stopped"
|
||||
|
||||
|
||||
def test_workspace_disk_operations_scrub_runtime_only_paths_and_export(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
manager = VmManager(
|
||||
backend_name="mock",
|
||||
base_dir=tmp_path / "vms",
|
||||
network_manager=TapNetworkManager(enabled=False),
|
||||
)
|
||||
rootfs_image = _create_stopped_workspace_rootfs(tmp_path)
|
||||
workspace_id = "workspace-disk-123"
|
||||
workspace = vm_manager_module.WorkspaceRecord(
|
||||
workspace_id=workspace_id,
|
||||
environment="debian:12-base",
|
||||
vcpu_count=1,
|
||||
mem_mib=512,
|
||||
ttl_seconds=600,
|
||||
created_at=time.time(),
|
||||
expires_at=time.time() + 600,
|
||||
state="stopped",
|
||||
network_policy="off",
|
||||
allow_host_compat=False,
|
||||
metadata={
|
||||
"execution_mode": "guest_vsock",
|
||||
"rootfs_image": str(rootfs_image),
|
||||
"workspace_path": "/workspace",
|
||||
},
|
||||
)
|
||||
manager._save_workspace_locked(workspace) # noqa: SLF001
|
||||
|
||||
listed = manager.list_workspace_disk(workspace_id, path="/workspace", recursive=True)
|
||||
assert listed["path"] == "/workspace"
|
||||
listed_paths = {entry["path"] for entry in listed["entries"]}
|
||||
assert "/workspace/note.txt" in listed_paths
|
||||
assert "/workspace/src/child.txt" in listed_paths
|
||||
assert "/workspace/link" in listed_paths
|
||||
|
||||
read_payload = manager.read_workspace_disk(workspace_id, path="note.txt", max_bytes=4096)
|
||||
assert read_payload["content"] == "hello from disk\n"
|
||||
assert read_payload["truncated"] is False
|
||||
|
||||
run_listing = manager.list_workspace_disk(workspace_id, path="/run", recursive=True)
|
||||
run_paths = {entry["path"] for entry in run_listing["entries"]}
|
||||
assert "/run/pyro-secrets" not in run_paths
|
||||
assert "/run/pyro-services" not in run_paths
|
||||
|
||||
exported_path = tmp_path / "workspace-copy.ext4"
|
||||
exported = manager.export_workspace_disk(workspace_id, output_path=exported_path)
|
||||
assert exported["disk_format"] == "ext4"
|
||||
assert exported_path.exists()
|
||||
assert exported_path.stat().st_size == int(exported["bytes_written"])
|
||||
|
||||
|
||||
def test_workspace_disk_operations_reject_host_compat_workspaces(tmp_path: Path) -> None:
|
||||
manager = VmManager(
|
||||
backend_name="mock",
|
||||
base_dir=tmp_path / "vms",
|
||||
network_manager=TapNetworkManager(enabled=False),
|
||||
)
|
||||
created = manager.create_workspace(
|
||||
environment="debian:12-base",
|
||||
allow_host_compat=True,
|
||||
)
|
||||
workspace_id = str(created["workspace_id"])
|
||||
manager.stop_workspace(workspace_id)
|
||||
|
||||
with pytest.raises(RuntimeError, match="host_compat workspaces"):
|
||||
manager.export_workspace_disk(workspace_id, output_path=tmp_path / "workspace.ext4")
|
||||
with pytest.raises(RuntimeError, match="host_compat workspaces"):
|
||||
manager.list_workspace_disk(workspace_id)
|
||||
with pytest.raises(RuntimeError, match="host_compat workspaces"):
|
||||
manager.read_workspace_disk(workspace_id, path="note.txt")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue