Add stopped-workspace disk export and inspection

Finish the 3.1.0 secondary disk-tools milestone so stable workspaces can be
stopped, inspected offline, exported as raw ext4 images, and started again
without changing the primary workspace-first interaction model.

Add workspace stop/start plus workspace disk export/list/read across the CLI,
SDK, and MCP, backed by a new offline debugfs inspection helper and guest-only
validation. Scrub runtime-only guest state before disk inspection/export, and
fix the real guest reliability gaps by flushing the filesystem on stop and
removing stale Firecracker socket files before restart.

Update the docs, examples, changelog, and roadmap to mark 3.1.0 done, and
cover the new lifecycle/disk paths with API, CLI, manager, contract, and
package-surface tests.

Validation: uv lock; UV_CACHE_DIR=.uv-cache make check; UV_CACHE_DIR=.uv-cache
make dist-check; real guest-backed smoke for create, shell/service activity,
stop, workspace disk list/read/export, start, exec, and delete.
This commit is contained in:
Thales Maciel 2026-03-12 20:57:16 -03:00
parent f2d20ef30a
commit 287f6d100f
26 changed files with 2585 additions and 34 deletions

View file

@ -119,6 +119,12 @@ class Pyro:
def status_workspace(self, workspace_id: str) -> dict[str, Any]:
return self._manager.status_workspace(workspace_id)
def stop_workspace(self, workspace_id: str) -> dict[str, Any]:
return self._manager.stop_workspace(workspace_id)
def start_workspace(self, workspace_id: str) -> dict[str, Any]:
return self._manager.start_workspace(workspace_id)
def push_workspace_sync(
self,
workspace_id: str,
@ -151,6 +157,43 @@ class Pyro:
def diff_workspace(self, workspace_id: str) -> dict[str, Any]:
return self._manager.diff_workspace(workspace_id)
def export_workspace_disk(
self,
workspace_id: str,
*,
output_path: str | Path,
) -> dict[str, Any]:
return self._manager.export_workspace_disk(
workspace_id,
output_path=output_path,
)
def list_workspace_disk(
self,
workspace_id: str,
*,
path: str = "/workspace",
recursive: bool = False,
) -> dict[str, Any]:
return self._manager.list_workspace_disk(
workspace_id,
path=path,
recursive=recursive,
)
def read_workspace_disk(
self,
workspace_id: str,
path: str,
*,
max_bytes: int = 65536,
) -> dict[str, Any]:
return self._manager.read_workspace_disk(
workspace_id,
path=path,
max_bytes=max_bytes,
)
def create_snapshot(self, workspace_id: str, snapshot_name: str) -> dict[str, Any]:
return self._manager.create_snapshot(workspace_id, snapshot_name)
@ -457,6 +500,16 @@ class Pyro:
"""Inspect workspace state and latest command metadata."""
return self.status_workspace(workspace_id)
@server.tool()
async def workspace_stop(workspace_id: str) -> dict[str, Any]:
"""Stop one persistent workspace without resetting `/workspace`."""
return self.stop_workspace(workspace_id)
@server.tool()
async def workspace_start(workspace_id: str) -> dict[str, Any]:
"""Start one stopped persistent workspace without resetting `/workspace`."""
return self.start_workspace(workspace_id)
@server.tool()
async def workspace_logs(workspace_id: str) -> dict[str, Any]:
"""Return persisted command history for one workspace."""
@ -476,6 +529,40 @@ class Pyro:
"""Compare `/workspace` to the immutable create-time baseline."""
return self.diff_workspace(workspace_id)
@server.tool()
async def workspace_disk_export(
workspace_id: str,
output_path: str,
) -> dict[str, Any]:
"""Export the raw stopped workspace rootfs image to one host path."""
return self.export_workspace_disk(workspace_id, output_path=output_path)
@server.tool()
async def workspace_disk_list(
workspace_id: str,
path: str = "/workspace",
recursive: bool = False,
) -> dict[str, Any]:
"""Inspect one stopped workspace rootfs path without booting the guest."""
return self.list_workspace_disk(
workspace_id,
path=path,
recursive=recursive,
)
@server.tool()
async def workspace_disk_read(
workspace_id: str,
path: str,
max_bytes: int = 65536,
) -> dict[str, Any]:
"""Read one regular file from a stopped workspace rootfs without booting the guest."""
return self.read_workspace_disk(
workspace_id,
path,
max_bytes=max_bytes,
)
@server.tool()
async def snapshot_create(workspace_id: str, snapshot_name: str) -> dict[str, Any]:
"""Create one named workspace snapshot from the current `/workspace` tree."""

View file

@ -21,6 +21,7 @@ from pyro_mcp.vm_manager import (
DEFAULT_SERVICE_READY_INTERVAL_MS,
DEFAULT_SERVICE_READY_TIMEOUT_SECONDS,
DEFAULT_VCPU_COUNT,
DEFAULT_WORKSPACE_DISK_READ_MAX_BYTES,
WORKSPACE_GUEST_PATH,
WORKSPACE_SHELL_SIGNAL_NAMES,
)
@ -253,6 +254,52 @@ def _print_workspace_export_human(payload: dict[str, Any]) -> None:
)
def _print_workspace_disk_export_human(payload: dict[str, Any]) -> None:
print(
"[workspace-disk-export] "
f"workspace_id={str(payload.get('workspace_id', 'unknown'))} "
f"output_path={str(payload.get('output_path', 'unknown'))} "
f"disk_format={str(payload.get('disk_format', 'unknown'))} "
f"bytes_written={int(payload.get('bytes_written', 0))}"
)
def _print_workspace_disk_list_human(payload: dict[str, Any]) -> None:
print(
f"Workspace disk path: {str(payload.get('path', WORKSPACE_GUEST_PATH))} "
f"(recursive={'yes' if bool(payload.get('recursive')) else 'no'})"
)
entries = payload.get("entries")
if not isinstance(entries, list) or not entries:
print("No workspace disk entries found.")
return
for entry in entries:
if not isinstance(entry, dict):
continue
line = (
f"{str(entry.get('path', 'unknown'))} "
f"[{str(entry.get('artifact_type', 'unknown'))}] "
f"size={int(entry.get('size_bytes', 0))}"
)
link_target = entry.get("link_target")
if isinstance(link_target, str) and link_target != "":
line += f" -> {link_target}"
print(line)
def _print_workspace_disk_read_human(payload: dict[str, Any]) -> None:
_write_stream(str(payload.get("content", "")), stream=sys.stdout)
print(
"[workspace-disk-read] "
f"workspace_id={str(payload.get('workspace_id', 'unknown'))} "
f"path={str(payload.get('path', 'unknown'))} "
f"size_bytes={int(payload.get('size_bytes', 0))} "
f"truncated={'yes' if bool(payload.get('truncated', False)) else 'no'}",
file=sys.stderr,
flush=True,
)
def _print_workspace_diff_human(payload: dict[str, Any]) -> None:
if not bool(payload.get("changed")):
print("No workspace changes.")
@ -687,6 +734,10 @@ def _build_parser() -> argparse.ArgumentParser:
pyro workspace create debian:12 --seed-path ./repo
pyro workspace sync push WORKSPACE_ID ./repo --dest src
pyro workspace exec WORKSPACE_ID -- sh -lc 'printf "hello\\n" > note.txt'
pyro workspace stop WORKSPACE_ID
pyro workspace disk list WORKSPACE_ID
pyro workspace disk export WORKSPACE_ID --output ./workspace.ext4
pyro workspace start WORKSPACE_ID
pyro workspace snapshot create WORKSPACE_ID checkpoint
pyro workspace reset WORKSPACE_ID --snapshot checkpoint
pyro workspace diff WORKSPACE_ID
@ -1039,6 +1090,141 @@ def _build_parser() -> argparse.ArgumentParser:
action="store_true",
help="Print structured JSON instead of human-readable output.",
)
workspace_stop_parser = workspace_subparsers.add_parser(
"stop",
help="Stop one workspace without resetting it.",
description=(
"Stop the backing sandbox, close shells, stop services, and preserve the "
"workspace filesystem, history, and snapshots."
),
epilog="Example:\n pyro workspace stop WORKSPACE_ID",
formatter_class=_HelpFormatter,
)
workspace_stop_parser.add_argument(
"workspace_id",
metavar="WORKSPACE_ID",
help="Persistent workspace identifier.",
)
workspace_stop_parser.add_argument(
"--json",
action="store_true",
help="Print structured JSON instead of human-readable output.",
)
workspace_start_parser = workspace_subparsers.add_parser(
"start",
help="Start one stopped workspace without resetting it.",
description=(
"Start a previously stopped workspace from its preserved rootfs and "
"workspace state."
),
epilog="Example:\n pyro workspace start WORKSPACE_ID",
formatter_class=_HelpFormatter,
)
workspace_start_parser.add_argument(
"workspace_id",
metavar="WORKSPACE_ID",
help="Persistent workspace identifier.",
)
workspace_start_parser.add_argument(
"--json",
action="store_true",
help="Print structured JSON instead of human-readable output.",
)
workspace_disk_parser = workspace_subparsers.add_parser(
"disk",
help="Inspect or export a stopped workspace disk.",
description=(
"Use secondary stopped-workspace disk tools for raw ext4 export and offline "
"inspection without booting the guest."
),
epilog=dedent(
"""
Examples:
pyro workspace stop WORKSPACE_ID
pyro workspace disk list WORKSPACE_ID
pyro workspace disk read WORKSPACE_ID note.txt
pyro workspace disk export WORKSPACE_ID --output ./workspace.ext4
Disk tools are secondary to `workspace export` and require a stopped, guest-backed
workspace.
"""
),
formatter_class=_HelpFormatter,
)
workspace_disk_subparsers = workspace_disk_parser.add_subparsers(
dest="workspace_disk_command",
required=True,
metavar="DISK",
)
workspace_disk_export_parser = workspace_disk_subparsers.add_parser(
"export",
help="Export the raw stopped workspace rootfs image.",
description="Copy the raw stopped workspace rootfs ext4 image to an explicit host path.",
epilog="Example:\n pyro workspace disk export WORKSPACE_ID --output ./workspace.ext4",
formatter_class=_HelpFormatter,
)
workspace_disk_export_parser.add_argument("workspace_id", metavar="WORKSPACE_ID")
workspace_disk_export_parser.add_argument(
"--output",
required=True,
help="Exact host path to create for the exported raw ext4 image.",
)
workspace_disk_export_parser.add_argument(
"--json",
action="store_true",
help="Print structured JSON instead of human-readable output.",
)
workspace_disk_list_parser = workspace_disk_subparsers.add_parser(
"list",
help="List files from a stopped workspace rootfs path.",
description=(
"Inspect one stopped workspace rootfs path without booting the guest. Relative "
"paths resolve inside `/workspace`; absolute paths inspect any guest path."
),
epilog="Example:\n pyro workspace disk list WORKSPACE_ID src --recursive",
formatter_class=_HelpFormatter,
)
workspace_disk_list_parser.add_argument("workspace_id", metavar="WORKSPACE_ID")
workspace_disk_list_parser.add_argument(
"path",
nargs="?",
default=WORKSPACE_GUEST_PATH,
metavar="PATH",
help="Guest path to inspect. Defaults to `/workspace`.",
)
workspace_disk_list_parser.add_argument(
"--recursive",
action="store_true",
help="Recurse into nested directories.",
)
workspace_disk_list_parser.add_argument(
"--json",
action="store_true",
help="Print structured JSON instead of human-readable output.",
)
workspace_disk_read_parser = workspace_disk_subparsers.add_parser(
"read",
help="Read one regular file from a stopped workspace rootfs.",
description=(
"Read one regular file from a stopped workspace rootfs without booting the guest. "
"Relative paths resolve inside `/workspace`; absolute paths inspect any guest path."
),
epilog="Example:\n pyro workspace disk read WORKSPACE_ID note.txt --max-bytes 4096",
formatter_class=_HelpFormatter,
)
workspace_disk_read_parser.add_argument("workspace_id", metavar="WORKSPACE_ID")
workspace_disk_read_parser.add_argument("path", metavar="PATH")
workspace_disk_read_parser.add_argument(
"--max-bytes",
type=int,
default=DEFAULT_WORKSPACE_DISK_READ_MAX_BYTES,
help="Maximum number of decoded UTF-8 bytes to return.",
)
workspace_disk_read_parser.add_argument(
"--json",
action="store_true",
help="Print structured JSON instead of human-readable output.",
)
workspace_shell_parser = workspace_subparsers.add_parser(
"shell",
help="Open and manage persistent interactive shells.",
@ -1885,6 +2071,88 @@ def main() -> None:
else:
_print_workspace_reset_human(payload)
return
if args.workspace_command == "stop":
try:
payload = pyro.stop_workspace(args.workspace_id)
except Exception as exc: # noqa: BLE001
if bool(args.json):
_print_json({"ok": False, "error": str(exc)})
else:
print(f"[error] {exc}", file=sys.stderr, flush=True)
raise SystemExit(1) from exc
if bool(args.json):
_print_json(payload)
else:
_print_workspace_summary_human(payload, action="Stopped workspace")
return
if args.workspace_command == "start":
try:
payload = pyro.start_workspace(args.workspace_id)
except Exception as exc: # noqa: BLE001
if bool(args.json):
_print_json({"ok": False, "error": str(exc)})
else:
print(f"[error] {exc}", file=sys.stderr, flush=True)
raise SystemExit(1) from exc
if bool(args.json):
_print_json(payload)
else:
_print_workspace_summary_human(payload, action="Started workspace")
return
if args.workspace_command == "disk":
if args.workspace_disk_command == "export":
try:
payload = pyro.export_workspace_disk(
args.workspace_id,
output_path=args.output,
)
except Exception as exc: # noqa: BLE001
if bool(args.json):
_print_json({"ok": False, "error": str(exc)})
else:
print(f"[error] {exc}", file=sys.stderr, flush=True)
raise SystemExit(1) from exc
if bool(args.json):
_print_json(payload)
else:
_print_workspace_disk_export_human(payload)
return
if args.workspace_disk_command == "list":
try:
payload = pyro.list_workspace_disk(
args.workspace_id,
path=args.path,
recursive=bool(args.recursive),
)
except Exception as exc: # noqa: BLE001
if bool(args.json):
_print_json({"ok": False, "error": str(exc)})
else:
print(f"[error] {exc}", file=sys.stderr, flush=True)
raise SystemExit(1) from exc
if bool(args.json):
_print_json(payload)
else:
_print_workspace_disk_list_human(payload)
return
if args.workspace_disk_command == "read":
try:
payload = pyro.read_workspace_disk(
args.workspace_id,
args.path,
max_bytes=args.max_bytes,
)
except Exception as exc: # noqa: BLE001
if bool(args.json):
_print_json({"ok": False, "error": str(exc)})
else:
print(f"[error] {exc}", file=sys.stderr, flush=True)
raise SystemExit(1) from exc
if bool(args.json):
_print_json(payload)
else:
_print_workspace_disk_read_human(payload)
return
if args.workspace_command == "shell":
if args.workspace_shell_command == "open":
secret_env = _parse_workspace_secret_env_options(getattr(args, "secret_env", []))

View file

@ -8,6 +8,7 @@ PUBLIC_CLI_ENV_SUBCOMMANDS = ("inspect", "list", "pull", "prune")
PUBLIC_CLI_WORKSPACE_SUBCOMMANDS = (
"create",
"delete",
"disk",
"diff",
"exec",
"export",
@ -16,9 +17,12 @@ PUBLIC_CLI_WORKSPACE_SUBCOMMANDS = (
"service",
"shell",
"snapshot",
"start",
"status",
"stop",
"sync",
)
PUBLIC_CLI_WORKSPACE_DISK_SUBCOMMANDS = ("export", "list", "read")
PUBLIC_CLI_WORKSPACE_SERVICE_SUBCOMMANDS = ("list", "logs", "start", "status", "stop")
PUBLIC_CLI_WORKSPACE_SHELL_SUBCOMMANDS = ("close", "open", "read", "signal", "write")
PUBLIC_CLI_WORKSPACE_SNAPSHOT_SUBCOMMANDS = ("create", "delete", "list")
@ -34,6 +38,9 @@ PUBLIC_CLI_WORKSPACE_CREATE_FLAGS = (
"--secret-file",
"--json",
)
PUBLIC_CLI_WORKSPACE_DISK_EXPORT_FLAGS = ("--output", "--json")
PUBLIC_CLI_WORKSPACE_DISK_LIST_FLAGS = ("--recursive", "--json")
PUBLIC_CLI_WORKSPACE_DISK_READ_FLAGS = ("--max-bytes", "--json")
PUBLIC_CLI_WORKSPACE_EXEC_FLAGS = ("--timeout-seconds", "--secret-env", "--json")
PUBLIC_CLI_WORKSPACE_DIFF_FLAGS = ("--json",)
PUBLIC_CLI_WORKSPACE_EXPORT_FLAGS = ("--output", "--json")
@ -68,6 +75,9 @@ PUBLIC_CLI_WORKSPACE_SHELL_CLOSE_FLAGS = ("--json",)
PUBLIC_CLI_WORKSPACE_SNAPSHOT_CREATE_FLAGS = ("--json",)
PUBLIC_CLI_WORKSPACE_SNAPSHOT_DELETE_FLAGS = ("--json",)
PUBLIC_CLI_WORKSPACE_SNAPSHOT_LIST_FLAGS = ("--json",)
PUBLIC_CLI_WORKSPACE_START_FLAGS = ("--json",)
PUBLIC_CLI_WORKSPACE_STATUS_FLAGS = ("--json",)
PUBLIC_CLI_WORKSPACE_STOP_FLAGS = ("--json",)
PUBLIC_CLI_WORKSPACE_SYNC_PUSH_FLAGS = ("--dest", "--json")
PUBLIC_CLI_RUN_FLAGS = (
"--vcpu-count",
@ -92,10 +102,12 @@ PUBLIC_SDK_METHODS = (
"exec_vm",
"exec_workspace",
"export_workspace",
"export_workspace_disk",
"inspect_environment",
"list_environments",
"list_services",
"list_snapshots",
"list_workspace_disk",
"logs_service",
"logs_workspace",
"network_info_vm",
@ -104,17 +116,20 @@ PUBLIC_SDK_METHODS = (
"pull_environment",
"push_workspace_sync",
"read_shell",
"read_workspace_disk",
"reap_expired",
"reset_workspace",
"run_in_vm",
"signal_shell",
"start_service",
"start_vm",
"start_workspace",
"status_service",
"status_vm",
"status_workspace",
"stop_service",
"stop_vm",
"stop_workspace",
"write_shell",
)
@ -144,11 +159,16 @@ PUBLIC_MCP_TOOLS = (
"vm_stop",
"workspace_create",
"workspace_delete",
"workspace_disk_export",
"workspace_disk_list",
"workspace_disk_read",
"workspace_diff",
"workspace_exec",
"workspace_export",
"workspace_logs",
"workspace_reset",
"workspace_start",
"workspace_status",
"workspace_stop",
"workspace_sync_push",
)

View file

@ -19,7 +19,7 @@ from typing import Any
from pyro_mcp.runtime import DEFAULT_PLATFORM, RuntimePaths
DEFAULT_ENVIRONMENT_VERSION = "1.0.0"
DEFAULT_CATALOG_VERSION = "3.0.0"
DEFAULT_CATALOG_VERSION = "3.1.0"
OCI_MANIFEST_ACCEPT = ", ".join(
(
"application/vnd.oci.image.index.v1+json",

View file

@ -34,6 +34,12 @@ from pyro_mcp.vm_environments import EnvironmentStore, default_cache_dir, get_en
from pyro_mcp.vm_firecracker import build_launch_plan
from pyro_mcp.vm_guest import VsockExecClient
from pyro_mcp.vm_network import NetworkConfig, TapNetworkManager
from pyro_mcp.workspace_disk import (
export_workspace_disk_image,
list_workspace_disk,
read_workspace_disk_file,
scrub_workspace_runtime_paths,
)
from pyro_mcp.workspace_ports import DEFAULT_PUBLISHED_PORT_HOST
from pyro_mcp.workspace_shells import (
create_local_shell,
@ -72,6 +78,7 @@ WORKSPACE_SECRET_MAX_BYTES = 64 * 1024
DEFAULT_SHELL_COLS = 120
DEFAULT_SHELL_ROWS = 30
DEFAULT_SHELL_MAX_CHARS = 65536
DEFAULT_WORKSPACE_DISK_READ_MAX_BYTES = 65536
DEFAULT_SERVICE_READY_TIMEOUT_SECONDS = 30
DEFAULT_SERVICE_READY_INTERVAL_MS = 500
DEFAULT_SERVICE_LOG_TAIL_LINES = 200
@ -789,6 +796,28 @@ def _workspace_host_destination(workspace_dir: Path, destination: str) -> Path:
return workspace_dir.joinpath(*suffix.parts)
def _normalize_workspace_disk_path(path: str) -> str:
candidate = path.strip()
if candidate == "":
raise ValueError("workspace disk path must not be empty")
if candidate.startswith("/"):
raw_path = PurePosixPath(candidate)
normalized_parts: list[str] = []
for part in raw_path.parts:
if part in {"", "/", "."}:
continue
if part == "..":
if normalized_parts:
normalized_parts.pop()
continue
normalized_parts.append(part)
if not normalized_parts:
return "/"
return str(PurePosixPath("/") / PurePosixPath(*normalized_parts))
normalized, _ = _normalize_workspace_destination(candidate)
return normalized
def _normalize_archive_member_name(name: str) -> PurePosixPath:
candidate = name.strip()
if candidate == "":
@ -2480,6 +2509,11 @@ class FirecrackerBackend(VmBackend): # pragma: no cover
def start(self, instance: VmInstance) -> None:
launch_plan = build_launch_plan(instance)
for stale_socket_path in (
launch_plan.api_socket_path,
instance.workdir / "vsock.sock",
):
stale_socket_path.unlink(missing_ok=True)
instance.metadata["firecracker_config_path"] = str(launch_plan.config_path)
instance.metadata["guest_network_path"] = str(launch_plan.guest_network_path)
instance.metadata["guest_exec_path"] = str(launch_plan.guest_exec_path)
@ -4309,6 +4343,159 @@ class VmManager:
"entries": redacted_entries,
}
def stop_workspace(self, workspace_id: str) -> dict[str, Any]:
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
self._ensure_workspace_not_expired_locked(workspace, time.time())
self._refresh_workspace_liveness_locked(workspace)
instance = workspace.to_instance(
workdir=self._workspace_runtime_dir(workspace.workspace_id)
)
try:
self._stop_workspace_services_locked(workspace, instance)
self._close_workspace_shells_locked(workspace, instance)
self._flush_workspace_filesystem_locked(workspace, instance)
if workspace.state == "started":
self._backend.stop(instance)
workspace.state = "stopped"
workspace.firecracker_pid = None
workspace.last_error = None
workspace.metadata = dict(instance.metadata)
self._scrub_workspace_runtime_state_locked(workspace)
except Exception as exc:
workspace.state = "stopped"
workspace.firecracker_pid = None
workspace.last_error = str(exc)
workspace.metadata = dict(instance.metadata)
self._save_workspace_locked(workspace)
raise
self._save_workspace_locked(workspace)
return self._serialize_workspace(workspace)
def start_workspace(self, workspace_id: str) -> dict[str, Any]:
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
self._ensure_workspace_not_expired_locked(workspace, time.time())
self._refresh_workspace_liveness_locked(workspace)
if workspace.state == "started":
self._refresh_workspace_service_counts_locked(workspace)
self._save_workspace_locked(workspace)
return self._serialize_workspace(workspace)
instance = workspace.to_instance(
workdir=self._workspace_runtime_dir(workspace.workspace_id)
)
self._stop_workspace_services_locked(workspace, instance)
self._close_workspace_shells_locked(workspace, instance)
try:
self._require_workspace_network_policy_support(
network_policy=workspace.network_policy
)
if self._runtime_capabilities.supports_guest_exec:
self._ensure_workspace_guest_bootstrap_support(instance)
with self._lock:
self._start_instance_locked(instance)
workspace = self._load_workspace_locked(workspace_id)
if workspace.secrets:
self._install_workspace_secrets_locked(workspace, instance)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = None
workspace.metadata = dict(instance.metadata)
self._save_workspace_locked(workspace)
return self._serialize_workspace(workspace)
except Exception as exc:
try:
if instance.state == "started":
self._backend.stop(instance)
except Exception:
pass
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
workspace.state = "stopped"
workspace.firecracker_pid = None
workspace.last_error = str(exc)
workspace.metadata = dict(instance.metadata)
self._save_workspace_locked(workspace)
raise
def export_workspace_disk(
self,
workspace_id: str,
*,
output_path: str | Path,
) -> dict[str, Any]:
raw_output_path = str(output_path).strip()
if raw_output_path == "":
raise ValueError("output_path must not be empty")
resolved_output_path = Path(output_path).expanduser().resolve()
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
rootfs_path = self._workspace_stopped_disk_rootfs_locked(
workspace,
operation_name="workspace_disk_export",
)
self._scrub_workspace_runtime_state_locked(workspace, rootfs_path=rootfs_path)
self._save_workspace_locked(workspace)
exported = export_workspace_disk_image(rootfs_path, output_path=resolved_output_path)
return {
"workspace_id": workspace_id,
"output_path": str(Path(str(exported["output_path"]))),
"disk_format": str(exported["disk_format"]),
"bytes_written": int(exported["bytes_written"]),
}
def list_workspace_disk(
self,
workspace_id: str,
*,
path: str = WORKSPACE_GUEST_PATH,
recursive: bool = False,
) -> dict[str, Any]:
normalized_path = _normalize_workspace_disk_path(path)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
rootfs_path = self._workspace_stopped_disk_rootfs_locked(
workspace,
operation_name="workspace_disk_list",
)
self._scrub_workspace_runtime_state_locked(workspace, rootfs_path=rootfs_path)
self._save_workspace_locked(workspace)
entries = list_workspace_disk(
rootfs_path,
guest_path=normalized_path,
recursive=recursive,
)
return {
"workspace_id": workspace_id,
"path": normalized_path,
"recursive": recursive,
"entries": entries,
}
def read_workspace_disk(
self,
workspace_id: str,
*,
path: str,
max_bytes: int = DEFAULT_WORKSPACE_DISK_READ_MAX_BYTES,
) -> dict[str, Any]:
normalized_path = _normalize_workspace_disk_path(path)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
rootfs_path = self._workspace_stopped_disk_rootfs_locked(
workspace,
operation_name="workspace_disk_read",
)
self._scrub_workspace_runtime_state_locked(workspace, rootfs_path=rootfs_path)
self._save_workspace_locked(workspace)
payload = read_workspace_disk_file(
rootfs_path,
guest_path=normalized_path,
max_bytes=max_bytes,
)
payload["workspace_id"] = workspace_id
return payload
def delete_workspace(
self,
workspace_id: str,
@ -4748,6 +4935,67 @@ class VmManager:
def _workspace_service_record_path(self, workspace_id: str, service_name: str) -> Path:
return self._workspace_services_dir(workspace_id) / f"{service_name}.json"
def _workspace_rootfs_image_path_locked(
self,
workspace: WorkspaceRecord,
) -> Path:
raw_rootfs_image = workspace.metadata.get("rootfs_image")
if raw_rootfs_image is None or raw_rootfs_image == "":
raise RuntimeError(
f"workspace {workspace.workspace_id!r} does not have a persisted rootfs image"
)
rootfs_path = Path(raw_rootfs_image)
if not rootfs_path.exists():
raise RuntimeError(
f"workspace {workspace.workspace_id!r} rootfs image is unavailable at "
f"{rootfs_path}"
)
return rootfs_path
def _workspace_stopped_disk_rootfs_locked(
self,
workspace: WorkspaceRecord,
*,
operation_name: str,
) -> Path:
self._ensure_workspace_not_expired_locked(workspace, time.time())
self._refresh_workspace_liveness_locked(workspace)
if workspace.state != "stopped":
raise RuntimeError(
f"workspace {workspace.workspace_id!r} must be stopped before {operation_name}"
)
if workspace.metadata.get("execution_mode") == "host_compat":
raise RuntimeError(
f"{operation_name} is unavailable for host_compat workspaces"
)
return self._workspace_rootfs_image_path_locked(workspace)
def _scrub_workspace_runtime_state_locked(
self,
workspace: WorkspaceRecord,
*,
rootfs_path: Path | None = None,
) -> None:
execution_mode = workspace.metadata.get("execution_mode")
if execution_mode == "host_compat":
return
scrub_workspace_runtime_paths(
rootfs_path or self._workspace_rootfs_image_path_locked(workspace)
)
def _flush_workspace_filesystem_locked(
self,
workspace: WorkspaceRecord,
instance: VmInstance,
) -> None:
if workspace.state != "started":
return
if self._backend_name == "mock":
return
if not self._runtime_capabilities.supports_guest_exec:
return
self._backend.exec(instance, "sync", 10)
def _count_workspaces_locked(self) -> int:
return sum(1 for _ in self._workspaces_dir.glob("*/workspace.json"))

View file

@ -0,0 +1,264 @@
"""Stopped-workspace disk export and offline inspection helpers."""
from __future__ import annotations
import re
import shutil
import subprocess
import tempfile
from dataclasses import dataclass
from pathlib import Path, PurePosixPath
from typing import Literal
WorkspaceDiskArtifactType = Literal["file", "directory", "symlink"]
WORKSPACE_DISK_RUNTIME_ONLY_PATHS = (
"/run/pyro-secrets",
"/run/pyro-shells",
"/run/pyro-services",
)
_DEBUGFS_LS_RE = re.compile(
r"^/(?P<inode>\d+)/(?P<mode>\d+)/(?P<uid>\d+)/(?P<gid>\d+)/(?P<name>.*)/(?P<size>\d*)/$"
)
_DEBUGFS_SIZE_RE = re.compile(r"Size:\s+(?P<size>\d+)")
_DEBUGFS_TYPE_RE = re.compile(r"Type:\s+(?P<type>\w+)")
_DEBUGFS_LINK_RE = re.compile(r'Fast link dest:\s+"(?P<target>.*)"')
@dataclass(frozen=True)
class WorkspaceDiskEntry:
"""One inspectable path from a stopped workspace rootfs image."""
path: str
artifact_type: WorkspaceDiskArtifactType
size_bytes: int
link_target: str | None = None
def to_payload(self) -> dict[str, str | int | None]:
return {
"path": self.path,
"artifact_type": self.artifact_type,
"size_bytes": self.size_bytes,
"link_target": self.link_target,
}
@dataclass(frozen=True)
class _DebugfsStat:
path: str
artifact_type: WorkspaceDiskArtifactType
size_bytes: int
link_target: str | None = None
@dataclass(frozen=True)
class _DebugfsDirEntry:
name: str
path: str
artifact_type: WorkspaceDiskArtifactType | None
size_bytes: int
def export_workspace_disk_image(rootfs_image: Path, *, output_path: Path) -> dict[str, str | int]:
"""Copy one stopped workspace rootfs image to the requested host path."""
output_path.parent.mkdir(parents=True, exist_ok=True)
if output_path.exists() or output_path.is_symlink():
raise RuntimeError(f"output_path already exists: {output_path}")
shutil.copy2(rootfs_image, output_path)
return {
"output_path": str(output_path),
"disk_format": "ext4",
"bytes_written": output_path.stat().st_size,
}
def list_workspace_disk(
rootfs_image: Path,
*,
guest_path: str,
recursive: bool,
) -> list[dict[str, str | int | None]]:
"""Return inspectable entries from one stopped workspace rootfs path."""
target = _debugfs_stat(rootfs_image, guest_path)
if target is None:
raise RuntimeError(f"workspace disk path does not exist: {guest_path}")
if target.artifact_type != "directory":
return [WorkspaceDiskEntry(**target.__dict__).to_payload()]
entries: list[WorkspaceDiskEntry] = []
def walk(current_path: str) -> None:
children = _debugfs_ls_entries(rootfs_image, current_path)
for child in children:
if child.artifact_type is None:
continue
link_target = None
if child.artifact_type == "symlink":
child_stat = _debugfs_stat(rootfs_image, child.path)
link_target = None if child_stat is None else child_stat.link_target
entries.append(
WorkspaceDiskEntry(
path=child.path,
artifact_type=child.artifact_type,
size_bytes=child.size_bytes,
link_target=link_target,
)
)
if recursive and child.artifact_type == "directory":
walk(child.path)
walk(guest_path)
entries.sort(key=lambda item: item.path)
return [entry.to_payload() for entry in entries]
def read_workspace_disk_file(
rootfs_image: Path,
*,
guest_path: str,
max_bytes: int,
) -> dict[str, str | int | bool]:
"""Read one regular file from a stopped workspace rootfs image."""
target = _debugfs_stat(rootfs_image, guest_path)
if target is None:
raise RuntimeError(f"workspace disk path does not exist: {guest_path}")
if target.artifact_type != "file":
raise RuntimeError("workspace disk read only supports regular files")
if max_bytes <= 0:
raise ValueError("max_bytes must be positive")
with tempfile.TemporaryDirectory(prefix="pyro-workspace-disk-read-") as temp_dir:
dumped_path = Path(temp_dir) / "workspace-disk-read.bin"
_run_debugfs(rootfs_image, f"dump {guest_path} {dumped_path}")
if not dumped_path.exists():
raise RuntimeError(f"failed to dump workspace disk file: {guest_path}")
raw_bytes = dumped_path.read_bytes()
return {
"path": guest_path,
"size_bytes": len(raw_bytes),
"max_bytes": max_bytes,
"content": raw_bytes[:max_bytes].decode("utf-8", errors="replace"),
"truncated": len(raw_bytes) > max_bytes,
}
def scrub_workspace_runtime_paths(rootfs_image: Path) -> None:
"""Remove runtime-only guest paths from a stopped workspace rootfs image."""
for guest_path in WORKSPACE_DISK_RUNTIME_ONLY_PATHS:
_debugfs_remove_tree(rootfs_image, guest_path)
def _run_debugfs(rootfs_image: Path, command: str, *, writable: bool = False) -> str:
debugfs_path = shutil.which("debugfs")
if debugfs_path is None:
raise RuntimeError("debugfs is required for workspace disk operations")
debugfs_command = [debugfs_path]
if writable:
debugfs_command.append("-w")
proc = subprocess.run( # noqa: S603
[*debugfs_command, "-R", command, str(rootfs_image)],
text=True,
capture_output=True,
check=False,
)
combined = proc.stdout
if proc.stderr != "":
combined = combined + ("\n" if combined != "" else "") + proc.stderr
output = _strip_debugfs_banner(combined)
if proc.returncode != 0:
message = output.strip()
if message == "":
message = f"debugfs command failed: {command}"
raise RuntimeError(message)
return output.strip()
def _strip_debugfs_banner(output: str) -> str:
lines = output.splitlines()
while lines and lines[0].startswith("debugfs "):
lines.pop(0)
return "\n".join(lines)
def _debugfs_missing(output: str) -> bool:
return "File not found by ext2_lookup" in output or "File not found by ext2fs_lookup" in output
def _artifact_type_from_mode(mode: str) -> WorkspaceDiskArtifactType | None:
if mode.startswith("04"):
return "directory"
if mode.startswith("10"):
return "file"
if mode.startswith("12"):
return "symlink"
return None
def _debugfs_stat(rootfs_image: Path, guest_path: str) -> _DebugfsStat | None:
output = _run_debugfs(rootfs_image, f"stat {guest_path}")
if _debugfs_missing(output):
return None
type_match = _DEBUGFS_TYPE_RE.search(output)
size_match = _DEBUGFS_SIZE_RE.search(output)
if type_match is None or size_match is None:
raise RuntimeError(f"failed to inspect workspace disk path: {guest_path}")
raw_type = type_match.group("type")
artifact_type: WorkspaceDiskArtifactType
if raw_type == "directory":
artifact_type = "directory"
elif raw_type == "regular":
artifact_type = "file"
elif raw_type == "symlink":
artifact_type = "symlink"
else:
raise RuntimeError(f"unsupported workspace disk path type: {guest_path}")
link_target = None
if artifact_type == "symlink":
link_match = _DEBUGFS_LINK_RE.search(output)
if link_match is not None:
link_target = link_match.group("target")
return _DebugfsStat(
path=guest_path,
artifact_type=artifact_type,
size_bytes=int(size_match.group("size")),
link_target=link_target,
)
def _debugfs_ls_entries(rootfs_image: Path, guest_path: str) -> list[_DebugfsDirEntry]:
output = _run_debugfs(rootfs_image, f"ls -p {guest_path}")
if _debugfs_missing(output):
raise RuntimeError(f"workspace disk path does not exist: {guest_path}")
entries: list[_DebugfsDirEntry] = []
base = PurePosixPath(guest_path)
for raw_line in output.splitlines():
line = raw_line.strip()
if line == "":
continue
match = _DEBUGFS_LS_RE.match(line)
if match is None:
continue
name = match.group("name")
if name in {".", ".."}:
continue
child_path = str(base / name) if str(base) != "/" else f"/{name}"
entries.append(
_DebugfsDirEntry(
name=name,
path=child_path,
artifact_type=_artifact_type_from_mode(match.group("mode")),
size_bytes=int(match.group("size") or "0"),
)
)
return entries
def _debugfs_remove_tree(rootfs_image: Path, guest_path: str) -> None:
stat_result = _debugfs_stat(rootfs_image, guest_path)
if stat_result is None:
return
if stat_result.artifact_type == "directory":
for child in _debugfs_ls_entries(rootfs_image, guest_path):
_debugfs_remove_tree(rootfs_image, child.path)
_run_debugfs(rootfs_image, f"rmdir {guest_path}", writable=True)
return
_run_debugfs(rootfs_image, f"rm {guest_path}", writable=True)