Pivot persistent APIs to workspaces

Replace the public persistent-sandbox contract with workspace-first naming across CLI, SDK, MCP, payloads, and on-disk state.

Rename the task surface to workspace equivalents, switch create-time seeding to `seed_path`, and store records under `workspaces/<workspace_id>/workspace.json` without carrying legacy task aliases or migrating old local task state.

Keep `pyro run` and `vm_*` unchanged. Validation covered `uv lock`, focused public-contract/API/CLI/manager tests, `UV_CACHE_DIR=.uv-cache make check`, and `UV_CACHE_DIR=.uv-cache make dist-check`.
This commit is contained in:
Thales Maciel 2026-03-12 01:21:49 -03:00
parent f57454bcb4
commit 48b82d8386
13 changed files with 743 additions and 618 deletions

View file

@ -1,4 +1,4 @@
"""Lifecycle manager for ephemeral VM environments and persistent tasks."""
"""Lifecycle manager for ephemeral VM environments and persistent workspaces."""
from __future__ import annotations
@ -36,13 +36,13 @@ DEFAULT_TIMEOUT_SECONDS = 30
DEFAULT_TTL_SECONDS = 600
DEFAULT_ALLOW_HOST_COMPAT = False
TASK_LAYOUT_VERSION = 2
TASK_WORKSPACE_DIRNAME = "workspace"
TASK_COMMANDS_DIRNAME = "commands"
TASK_RUNTIME_DIRNAME = "runtime"
TASK_WORKSPACE_GUEST_PATH = "/workspace"
TASK_GUEST_AGENT_PATH = "/opt/pyro/bin/pyro_guest_agent.py"
TASK_ARCHIVE_UPLOAD_TIMEOUT_SECONDS = 60
WORKSPACE_LAYOUT_VERSION = 2
WORKSPACE_DIRNAME = "workspace"
WORKSPACE_COMMANDS_DIRNAME = "commands"
WORKSPACE_RUNTIME_DIRNAME = "runtime"
WORKSPACE_GUEST_PATH = "/workspace"
WORKSPACE_GUEST_AGENT_PATH = "/opt/pyro/bin/pyro_guest_agent.py"
WORKSPACE_ARCHIVE_UPLOAD_TIMEOUT_SECONDS = 60
WorkspaceSeedMode = Literal["empty", "directory", "tar_archive"]
@ -69,10 +69,10 @@ class VmInstance:
@dataclass
class TaskRecord:
"""Persistent task metadata stored on disk."""
class WorkspaceRecord:
"""Persistent workspace metadata stored on disk."""
task_id: str
workspace_id: str
environment: str
vcpu_count: int
mem_mib: int
@ -98,9 +98,9 @@ class TaskRecord:
command_count: int = 0,
last_command: dict[str, Any] | None = None,
workspace_seed: dict[str, Any] | None = None,
) -> TaskRecord:
) -> WorkspaceRecord:
return cls(
task_id=instance.vm_id,
workspace_id=instance.vm_id,
environment=instance.environment,
vcpu_count=instance.vcpu_count,
mem_mib=instance.mem_mib,
@ -121,7 +121,7 @@ class TaskRecord:
def to_instance(self, *, workdir: Path) -> VmInstance:
return VmInstance(
vm_id=self.task_id,
vm_id=self.workspace_id,
environment=self.environment,
vcpu_count=self.vcpu_count,
mem_mib=self.mem_mib,
@ -140,8 +140,8 @@ class TaskRecord:
def to_payload(self) -> dict[str, Any]:
return {
"layout_version": TASK_LAYOUT_VERSION,
"task_id": self.task_id,
"layout_version": WORKSPACE_LAYOUT_VERSION,
"workspace_id": self.workspace_id,
"environment": self.environment,
"vcpu_count": self.vcpu_count,
"mem_mib": self.mem_mib,
@ -161,9 +161,9 @@ class TaskRecord:
}
@classmethod
def from_payload(cls, payload: dict[str, Any]) -> TaskRecord:
def from_payload(cls, payload: dict[str, Any]) -> WorkspaceRecord:
return cls(
task_id=str(payload["task_id"]),
workspace_id=str(payload["workspace_id"]),
environment=str(payload["environment"]),
vcpu_count=int(payload["vcpu_count"]),
mem_mib=int(payload["mem_mib"]),
@ -179,7 +179,7 @@ class TaskRecord:
network=_deserialize_network(payload.get("network")),
command_count=int(payload.get("command_count", 0)),
last_command=_optional_dict(payload.get("last_command")),
workspace_seed=_task_workspace_seed_dict(payload.get("workspace_seed")),
workspace_seed=_workspace_seed_dict(payload.get("workspace_seed")),
)
@ -194,10 +194,15 @@ class PreparedWorkspaceSeed:
bytes_written: int = 0
cleanup_dir: Path | None = None
def to_payload(self, *, destination: str = TASK_WORKSPACE_GUEST_PATH) -> dict[str, Any]:
def to_payload(
self,
*,
destination: str = WORKSPACE_GUEST_PATH,
path_key: str = "seed_path",
) -> dict[str, Any]:
return {
"mode": self.mode,
"source_path": self.source_path,
path_key: self.source_path,
"destination": destination,
"entry_count": self.entry_count,
"bytes_written": self.bytes_written,
@ -255,21 +260,21 @@ def _string_dict(value: object) -> dict[str, str]:
def _empty_workspace_seed_payload() -> dict[str, Any]:
return {
"mode": "empty",
"source_path": None,
"destination": TASK_WORKSPACE_GUEST_PATH,
"seed_path": None,
"destination": WORKSPACE_GUEST_PATH,
"entry_count": 0,
"bytes_written": 0,
}
def _task_workspace_seed_dict(value: object) -> dict[str, Any]:
def _workspace_seed_dict(value: object) -> dict[str, Any]:
if not isinstance(value, dict):
return _empty_workspace_seed_payload()
payload = _empty_workspace_seed_payload()
payload.update(
{
"mode": str(value.get("mode", payload["mode"])),
"source_path": _optional_str(value.get("source_path")),
"seed_path": _optional_str(value.get("seed_path")),
"destination": str(value.get("destination", payload["destination"])),
"entry_count": int(value.get("entry_count", payload["entry_count"])),
"bytes_written": int(value.get("bytes_written", payload["bytes_written"])),
@ -374,7 +379,7 @@ def _normalize_workspace_destination(destination: str) -> tuple[str, PurePosixPa
destination_path = PurePosixPath(candidate)
if any(part == ".." for part in destination_path.parts):
raise ValueError("workspace destination must stay inside /workspace")
workspace_root = PurePosixPath(TASK_WORKSPACE_GUEST_PATH)
workspace_root = PurePosixPath(WORKSPACE_GUEST_PATH)
if not destination_path.is_absolute():
destination_path = workspace_root / destination_path
parts = [part for part in destination_path.parts if part not in {"", "."}]
@ -510,7 +515,7 @@ def _extract_seed_archive_to_host_workspace(
def _instance_workspace_host_dir(instance: VmInstance) -> Path:
raw_value = instance.metadata.get("workspace_host_dir")
if raw_value is None or raw_value == "":
raise RuntimeError("task workspace host directory is unavailable")
raise RuntimeError("workspace host directory is unavailable")
return Path(raw_value)
@ -518,13 +523,13 @@ def _patch_rootfs_guest_agent(rootfs_image: Path, guest_agent_path: Path) -> Non
debugfs_path = shutil.which("debugfs")
if debugfs_path is None:
raise RuntimeError(
"debugfs is required to seed task workspaces on guest-backed runtimes"
"debugfs is required to seed workspaces on guest-backed runtimes"
)
with tempfile.TemporaryDirectory(prefix="pyro-guest-agent-") as temp_dir:
staged_agent_path = Path(temp_dir) / "pyro_guest_agent.py"
shutil.copy2(guest_agent_path, staged_agent_path)
subprocess.run( # noqa: S603
[debugfs_path, "-w", "-R", f"rm {TASK_GUEST_AGENT_PATH}", str(rootfs_image)],
[debugfs_path, "-w", "-R", f"rm {WORKSPACE_GUEST_AGENT_PATH}", str(rootfs_image)],
text=True,
capture_output=True,
check=False,
@ -534,7 +539,7 @@ def _patch_rootfs_guest_agent(rootfs_image: Path, guest_agent_path: Path) -> Non
debugfs_path,
"-w",
"-R",
f"write {staged_agent_path} {TASK_GUEST_AGENT_PATH}",
f"write {staged_agent_path} {WORKSPACE_GUEST_AGENT_PATH}",
str(rootfs_image),
],
text=True,
@ -543,7 +548,7 @@ def _patch_rootfs_guest_agent(rootfs_image: Path, guest_agent_path: Path) -> Non
)
if proc.returncode != 0:
raise RuntimeError(
"failed to patch guest agent into task rootfs: "
"failed to patch guest agent into workspace rootfs: "
f"{proc.stderr.strip() or proc.stdout.strip()}"
)
@ -862,7 +867,7 @@ class FirecrackerBackend(VmBackend): # pragma: no cover
port,
archive_path,
destination=destination,
timeout_seconds=TASK_ARCHIVE_UPLOAD_TIMEOUT_SECONDS,
timeout_seconds=WORKSPACE_ARCHIVE_UPLOAD_TIMEOUT_SECONDS,
uds_path=uds_path,
)
return {
@ -885,7 +890,7 @@ class FirecrackerBackend(VmBackend): # pragma: no cover
class VmManager:
"""In-process lifecycle manager for ephemeral VM environments and tasks."""
"""In-process lifecycle manager for ephemeral VM environments and workspaces."""
MIN_VCPUS = 1
MAX_VCPUS = 8
@ -911,7 +916,7 @@ class VmManager:
) -> None:
self._backend_name = backend_name or "firecracker"
self._base_dir = base_dir or Path("/tmp/pyro-mcp")
self._tasks_dir = self._base_dir / "tasks"
self._workspaces_dir = self._base_dir / "workspaces"
resolved_cache_dir = cache_dir or default_cache_dir()
self._runtime_paths = runtime_paths
if self._backend_name == "firecracker":
@ -944,7 +949,7 @@ class VmManager:
self._lock = threading.Lock()
self._instances: dict[str, VmInstance] = {}
self._base_dir.mkdir(parents=True, exist_ok=True)
self._tasks_dir.mkdir(parents=True, exist_ok=True)
self._workspaces_dir.mkdir(parents=True, exist_ok=True)
self._backend = self._build_backend()
def _build_backend(self) -> VmBackend:
@ -989,8 +994,8 @@ class VmManager:
now = time.time()
with self._lock:
self._reap_expired_locked(now)
self._reap_expired_tasks_locked(now)
active_count = len(self._instances) + self._count_tasks_locked()
self._reap_expired_workspaces_locked(now)
active_count = len(self._instances) + self._count_workspaces_locked()
if active_count >= self._max_active_vms:
raise RuntimeError(
f"max active VMs reached ({self._max_active_vms}); delete old VMs first"
@ -1126,7 +1131,7 @@ class VmManager:
del self._instances[vm_id]
return {"deleted_vm_ids": expired_vm_ids, "count": len(expired_vm_ids)}
def create_task(
def create_workspace(
self,
*,
environment: str,
@ -1135,22 +1140,22 @@ class VmManager:
ttl_seconds: int = DEFAULT_TTL_SECONDS,
network: bool = False,
allow_host_compat: bool = DEFAULT_ALLOW_HOST_COMPAT,
source_path: str | Path | None = None,
seed_path: str | Path | None = None,
) -> dict[str, Any]:
self._validate_limits(vcpu_count=vcpu_count, mem_mib=mem_mib, ttl_seconds=ttl_seconds)
get_environment(environment, runtime_paths=self._runtime_paths)
prepared_seed = self._prepare_workspace_seed(source_path)
prepared_seed = self._prepare_workspace_seed(seed_path)
now = time.time()
task_id = uuid.uuid4().hex[:12]
task_dir = self._task_dir(task_id)
runtime_dir = self._task_runtime_dir(task_id)
workspace_dir = self._task_workspace_dir(task_id)
commands_dir = self._task_commands_dir(task_id)
task_dir.mkdir(parents=True, exist_ok=False)
workspace_dir.mkdir(parents=True, exist_ok=True)
workspace_id = uuid.uuid4().hex[:12]
workspace_dir = self._workspace_dir(workspace_id)
runtime_dir = self._workspace_runtime_dir(workspace_id)
host_workspace_dir = self._workspace_host_dir(workspace_id)
commands_dir = self._workspace_commands_dir(workspace_id)
workspace_dir.mkdir(parents=True, exist_ok=False)
host_workspace_dir.mkdir(parents=True, exist_ok=True)
commands_dir.mkdir(parents=True, exist_ok=True)
instance = VmInstance(
vm_id=task_id,
vm_id=workspace_id,
environment=environment,
vcpu_count=vcpu_count,
mem_mib=mem_mib,
@ -1162,13 +1167,13 @@ class VmManager:
allow_host_compat=allow_host_compat,
)
instance.metadata["allow_host_compat"] = str(allow_host_compat).lower()
instance.metadata["workspace_path"] = TASK_WORKSPACE_GUEST_PATH
instance.metadata["workspace_host_dir"] = str(workspace_dir)
instance.metadata["workspace_path"] = WORKSPACE_GUEST_PATH
instance.metadata["workspace_host_dir"] = str(host_workspace_dir)
try:
with self._lock:
self._reap_expired_locked(now)
self._reap_expired_tasks_locked(now)
active_count = len(self._instances) + self._count_tasks_locked()
self._reap_expired_workspaces_locked(now)
active_count = len(self._instances) + self._count_workspaces_locked()
if active_count >= self._max_active_vms:
raise RuntimeError(
f"max active VMs reached ({self._max_active_vms}); delete old VMs first"
@ -1178,7 +1183,7 @@ class VmManager:
prepared_seed.archive_path is not None
and self._runtime_capabilities.supports_guest_exec
):
self._ensure_task_guest_seed_support(instance)
self._ensure_workspace_guest_seed_support(instance)
with self._lock:
self._start_instance_locked(instance)
self._require_guest_exec_or_opt_in(instance)
@ -1187,7 +1192,7 @@ class VmManager:
import_summary = self._backend.import_archive(
instance,
archive_path=prepared_seed.archive_path,
destination=TASK_WORKSPACE_GUEST_PATH,
destination=WORKSPACE_GUEST_PATH,
)
workspace_seed["entry_count"] = int(import_summary["entry_count"])
workspace_seed["bytes_written"] = int(import_summary["bytes_written"])
@ -1195,14 +1200,14 @@ class VmManager:
elif self._runtime_capabilities.supports_guest_exec:
self._backend.exec(
instance,
f"mkdir -p {shlex.quote(TASK_WORKSPACE_GUEST_PATH)}",
f"mkdir -p {shlex.quote(WORKSPACE_GUEST_PATH)}",
10,
)
else:
instance.metadata["execution_mode"] = "host_compat"
task = TaskRecord.from_instance(instance, workspace_seed=workspace_seed)
self._save_task_locked(task)
return self._serialize_task(task)
workspace = WorkspaceRecord.from_instance(instance, workspace_seed=workspace_seed)
self._save_workspace_locked(workspace)
return self._serialize_workspace(workspace)
except Exception:
if runtime_dir.exists():
try:
@ -1215,17 +1220,17 @@ class VmManager:
self._backend.delete(instance)
except Exception:
pass
shutil.rmtree(task_dir, ignore_errors=True)
shutil.rmtree(workspace_dir, ignore_errors=True)
raise
finally:
prepared_seed.cleanup()
def push_task_sync(
def push_workspace_sync(
self,
task_id: str,
workspace_id: str,
*,
source_path: str | Path,
dest: str = TASK_WORKSPACE_GUEST_PATH,
dest: str = WORKSPACE_GUEST_PATH,
) -> dict[str, Any]:
prepared_seed = self._prepare_workspace_seed(source_path)
if prepared_seed.archive_path is None:
@ -1233,14 +1238,17 @@ class VmManager:
raise ValueError("source_path is required")
normalized_destination, _ = _normalize_workspace_destination(dest)
with self._lock:
task = self._load_task_locked(task_id)
self._ensure_task_not_expired_locked(task, time.time())
self._refresh_task_liveness_locked(task)
if task.state != "started":
workspace = self._load_workspace_locked(workspace_id)
self._ensure_workspace_not_expired_locked(workspace, time.time())
self._refresh_workspace_liveness_locked(workspace)
if workspace.state != "started":
raise RuntimeError(
f"task {task_id} must be in 'started' state before task_sync_push"
f"workspace {workspace_id} must be in 'started' state "
"before workspace_sync_push"
)
instance = task.to_instance(workdir=self._task_runtime_dir(task.task_id))
instance = workspace.to_instance(
workdir=self._workspace_runtime_dir(workspace.workspace_id)
)
try:
import_summary = self._backend.import_archive(
instance,
@ -1249,58 +1257,71 @@ class VmManager:
)
finally:
prepared_seed.cleanup()
workspace_sync = prepared_seed.to_payload(destination=normalized_destination)
workspace_sync = prepared_seed.to_payload(
destination=normalized_destination,
path_key="source_path",
)
workspace_sync["entry_count"] = int(import_summary["entry_count"])
workspace_sync["bytes_written"] = int(import_summary["bytes_written"])
workspace_sync["destination"] = str(import_summary["destination"])
with self._lock:
task = self._load_task_locked(task_id)
task.state = instance.state
task.firecracker_pid = instance.firecracker_pid
task.last_error = instance.last_error
task.metadata = dict(instance.metadata)
self._save_task_locked(task)
workspace = self._load_workspace_locked(workspace_id)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = instance.last_error
workspace.metadata = dict(instance.metadata)
self._save_workspace_locked(workspace)
return {
"task_id": task_id,
"workspace_id": workspace_id,
"execution_mode": instance.metadata.get("execution_mode", "pending"),
"workspace_sync": workspace_sync,
}
def exec_task(self, task_id: str, *, command: str, timeout_seconds: int = 30) -> dict[str, Any]:
def exec_workspace(
self,
workspace_id: str,
*,
command: str,
timeout_seconds: int = 30,
) -> dict[str, Any]:
if timeout_seconds <= 0:
raise ValueError("timeout_seconds must be positive")
with self._lock:
task = self._load_task_locked(task_id)
self._ensure_task_not_expired_locked(task, time.time())
self._refresh_task_liveness_locked(task)
if task.state != "started":
raise RuntimeError(f"task {task_id} must be in 'started' state before task_exec")
instance = task.to_instance(workdir=self._task_runtime_dir(task.task_id))
workspace = self._load_workspace_locked(workspace_id)
self._ensure_workspace_not_expired_locked(workspace, time.time())
self._refresh_workspace_liveness_locked(workspace)
if workspace.state != "started":
raise RuntimeError(
f"workspace {workspace_id} must be in 'started' state before workspace_exec"
)
instance = workspace.to_instance(
workdir=self._workspace_runtime_dir(workspace.workspace_id)
)
exec_result, execution_mode = self._exec_instance(
instance,
command=command,
timeout_seconds=timeout_seconds,
host_workdir=self._task_workspace_dir(task.task_id),
guest_cwd=TASK_WORKSPACE_GUEST_PATH,
host_workdir=self._workspace_host_dir(workspace.workspace_id),
guest_cwd=WORKSPACE_GUEST_PATH,
)
with self._lock:
task = self._load_task_locked(task_id)
task.state = instance.state
task.firecracker_pid = instance.firecracker_pid
task.last_error = instance.last_error
task.metadata = dict(instance.metadata)
entry = self._record_task_command_locked(
task,
workspace = self._load_workspace_locked(workspace_id)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = instance.last_error
workspace.metadata = dict(instance.metadata)
entry = self._record_workspace_command_locked(
workspace,
command=command,
exec_result=exec_result,
execution_mode=execution_mode,
cwd=TASK_WORKSPACE_GUEST_PATH,
cwd=WORKSPACE_GUEST_PATH,
)
self._save_task_locked(task)
self._save_workspace_locked(workspace)
return {
"task_id": task_id,
"environment": task.environment,
"environment_version": task.metadata.get("environment_version"),
"workspace_id": workspace_id,
"environment": workspace.environment,
"environment_version": workspace.metadata.get("environment_version"),
"command": command,
"stdout": exec_result.stdout,
"stderr": exec_result.stderr,
@ -1308,36 +1329,47 @@ class VmManager:
"duration_ms": exec_result.duration_ms,
"execution_mode": execution_mode,
"sequence": entry["sequence"],
"cwd": TASK_WORKSPACE_GUEST_PATH,
"cwd": WORKSPACE_GUEST_PATH,
}
def status_task(self, task_id: str) -> dict[str, Any]:
def status_workspace(self, workspace_id: str) -> dict[str, Any]:
with self._lock:
task = self._load_task_locked(task_id)
self._ensure_task_not_expired_locked(task, time.time())
self._refresh_task_liveness_locked(task)
self._save_task_locked(task)
return self._serialize_task(task)
workspace = self._load_workspace_locked(workspace_id)
self._ensure_workspace_not_expired_locked(workspace, time.time())
self._refresh_workspace_liveness_locked(workspace)
self._save_workspace_locked(workspace)
return self._serialize_workspace(workspace)
def logs_task(self, task_id: str) -> dict[str, Any]:
def logs_workspace(self, workspace_id: str) -> dict[str, Any]:
with self._lock:
task = self._load_task_locked(task_id)
self._ensure_task_not_expired_locked(task, time.time())
self._refresh_task_liveness_locked(task)
self._save_task_locked(task)
entries = self._read_task_logs_locked(task.task_id)
return {"task_id": task.task_id, "count": len(entries), "entries": entries}
workspace = self._load_workspace_locked(workspace_id)
self._ensure_workspace_not_expired_locked(workspace, time.time())
self._refresh_workspace_liveness_locked(workspace)
self._save_workspace_locked(workspace)
entries = self._read_workspace_logs_locked(workspace.workspace_id)
return {
"workspace_id": workspace.workspace_id,
"count": len(entries),
"entries": entries,
}
def delete_task(self, task_id: str, *, reason: str = "explicit_delete") -> dict[str, Any]:
def delete_workspace(
self,
workspace_id: str,
*,
reason: str = "explicit_delete",
) -> dict[str, Any]:
with self._lock:
task = self._load_task_locked(task_id)
instance = task.to_instance(workdir=self._task_runtime_dir(task.task_id))
if task.state == "started":
workspace = self._load_workspace_locked(workspace_id)
instance = workspace.to_instance(
workdir=self._workspace_runtime_dir(workspace.workspace_id)
)
if workspace.state == "started":
self._backend.stop(instance)
task.state = "stopped"
workspace.state = "stopped"
self._backend.delete(instance)
shutil.rmtree(self._task_dir(task_id), ignore_errors=True)
return {"task_id": task_id, "deleted": True, "reason": reason}
shutil.rmtree(self._workspace_dir(workspace_id), ignore_errors=True)
return {"workspace_id": workspace_id, "deleted": True, "reason": reason}
def _validate_limits(self, *, vcpu_count: int, mem_mib: int, ttl_seconds: int) -> None:
if not self.MIN_VCPUS <= vcpu_count <= self.MAX_VCPUS:
@ -1368,27 +1400,27 @@ class VmManager:
"metadata": instance.metadata,
}
def _serialize_task(self, task: TaskRecord) -> dict[str, Any]:
def _serialize_workspace(self, workspace: WorkspaceRecord) -> dict[str, Any]:
return {
"task_id": task.task_id,
"environment": task.environment,
"environment_version": task.metadata.get("environment_version"),
"vcpu_count": task.vcpu_count,
"mem_mib": task.mem_mib,
"ttl_seconds": task.ttl_seconds,
"created_at": task.created_at,
"expires_at": task.expires_at,
"state": task.state,
"network_enabled": task.network is not None,
"allow_host_compat": task.allow_host_compat,
"guest_ip": task.network.guest_ip if task.network is not None else None,
"tap_name": task.network.tap_name if task.network is not None else None,
"execution_mode": task.metadata.get("execution_mode", "pending"),
"workspace_path": TASK_WORKSPACE_GUEST_PATH,
"workspace_seed": _task_workspace_seed_dict(task.workspace_seed),
"command_count": task.command_count,
"last_command": task.last_command,
"metadata": task.metadata,
"workspace_id": workspace.workspace_id,
"environment": workspace.environment,
"environment_version": workspace.metadata.get("environment_version"),
"vcpu_count": workspace.vcpu_count,
"mem_mib": workspace.mem_mib,
"ttl_seconds": workspace.ttl_seconds,
"created_at": workspace.created_at,
"expires_at": workspace.expires_at,
"state": workspace.state,
"network_enabled": workspace.network is not None,
"allow_host_compat": workspace.allow_host_compat,
"guest_ip": workspace.network.guest_ip if workspace.network is not None else None,
"tap_name": workspace.network.tap_name if workspace.network is not None else None,
"execution_mode": workspace.metadata.get("execution_mode", "pending"),
"workspace_path": WORKSPACE_GUEST_PATH,
"workspace_seed": _workspace_seed_dict(workspace.workspace_seed),
"command_count": workspace.command_count,
"last_command": workspace.last_command,
"metadata": workspace.metadata,
}
def _require_guest_boot_or_opt_in(self, instance: VmInstance) -> None:
@ -1481,14 +1513,14 @@ class VmManager:
execution_mode = instance.metadata.get("execution_mode", "unknown")
return exec_result, execution_mode
def _prepare_workspace_seed(self, source_path: str | Path | None) -> PreparedWorkspaceSeed:
if source_path is None:
def _prepare_workspace_seed(self, seed_path: str | Path | None) -> PreparedWorkspaceSeed:
if seed_path is None:
return PreparedWorkspaceSeed(mode="empty", source_path=None)
resolved_source_path = Path(source_path).expanduser().resolve()
resolved_source_path = Path(seed_path).expanduser().resolve()
if not resolved_source_path.exists():
raise ValueError(f"source_path {resolved_source_path} does not exist")
raise ValueError(f"seed_path {resolved_source_path} does not exist")
if resolved_source_path.is_dir():
cleanup_dir = Path(tempfile.mkdtemp(prefix="pyro-task-seed-"))
cleanup_dir = Path(tempfile.mkdtemp(prefix="pyro-workspace-seed-"))
archive_path = cleanup_dir / "workspace-seed.tar"
try:
_write_directory_seed_archive(resolved_source_path, archive_path)
@ -1509,7 +1541,7 @@ class VmManager:
or not _is_supported_seed_archive(resolved_source_path)
):
raise ValueError(
"source_path must be a directory or a .tar/.tar.gz/.tgz archive"
"seed_path must be a directory or a .tar/.tar.gz/.tgz archive"
)
entry_count, bytes_written = _inspect_seed_archive(resolved_source_path)
return PreparedWorkspaceSeed(
@ -1520,94 +1552,102 @@ class VmManager:
bytes_written=bytes_written,
)
def _ensure_task_guest_seed_support(self, instance: VmInstance) -> None:
def _ensure_workspace_guest_seed_support(self, instance: VmInstance) -> None:
if self._runtime_paths is None or self._runtime_paths.guest_agent_path is None:
raise RuntimeError("runtime bundle does not provide a guest agent for task seeding")
raise RuntimeError(
"runtime bundle does not provide a guest agent for workspace seeding"
)
rootfs_image = instance.metadata.get("rootfs_image")
if rootfs_image is None or rootfs_image == "":
raise RuntimeError("task rootfs image is unavailable for guest workspace seeding")
raise RuntimeError("workspace rootfs image is unavailable for guest seeding")
_patch_rootfs_guest_agent(Path(rootfs_image), self._runtime_paths.guest_agent_path)
def _task_dir(self, task_id: str) -> Path:
return self._tasks_dir / task_id
def _workspace_dir(self, workspace_id: str) -> Path:
return self._workspaces_dir / workspace_id
def _task_runtime_dir(self, task_id: str) -> Path:
return self._task_dir(task_id) / TASK_RUNTIME_DIRNAME
def _workspace_runtime_dir(self, workspace_id: str) -> Path:
return self._workspace_dir(workspace_id) / WORKSPACE_RUNTIME_DIRNAME
def _task_workspace_dir(self, task_id: str) -> Path:
return self._task_dir(task_id) / TASK_WORKSPACE_DIRNAME
def _workspace_host_dir(self, workspace_id: str) -> Path:
return self._workspace_dir(workspace_id) / WORKSPACE_DIRNAME
def _task_commands_dir(self, task_id: str) -> Path:
return self._task_dir(task_id) / TASK_COMMANDS_DIRNAME
def _workspace_commands_dir(self, workspace_id: str) -> Path:
return self._workspace_dir(workspace_id) / WORKSPACE_COMMANDS_DIRNAME
def _task_metadata_path(self, task_id: str) -> Path:
return self._task_dir(task_id) / "task.json"
def _workspace_metadata_path(self, workspace_id: str) -> Path:
return self._workspace_dir(workspace_id) / "workspace.json"
def _count_tasks_locked(self) -> int:
return sum(1 for _ in self._tasks_dir.glob("*/task.json"))
def _count_workspaces_locked(self) -> int:
return sum(1 for _ in self._workspaces_dir.glob("*/workspace.json"))
def _load_task_locked(self, task_id: str) -> TaskRecord:
metadata_path = self._task_metadata_path(task_id)
def _load_workspace_locked(self, workspace_id: str) -> WorkspaceRecord:
metadata_path = self._workspace_metadata_path(workspace_id)
if not metadata_path.exists():
raise ValueError(f"task {task_id!r} does not exist")
raise ValueError(f"workspace {workspace_id!r} does not exist")
payload = json.loads(metadata_path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
raise RuntimeError(f"task record at {metadata_path} is invalid")
return TaskRecord.from_payload(payload)
raise RuntimeError(f"workspace record at {metadata_path} is invalid")
return WorkspaceRecord.from_payload(payload)
def _save_task_locked(self, task: TaskRecord) -> None:
metadata_path = self._task_metadata_path(task.task_id)
def _save_workspace_locked(self, workspace: WorkspaceRecord) -> None:
metadata_path = self._workspace_metadata_path(workspace.workspace_id)
metadata_path.parent.mkdir(parents=True, exist_ok=True)
metadata_path.write_text(
json.dumps(task.to_payload(), indent=2, sort_keys=True),
json.dumps(workspace.to_payload(), indent=2, sort_keys=True),
encoding="utf-8",
)
def _reap_expired_tasks_locked(self, now: float) -> None:
for metadata_path in list(self._tasks_dir.glob("*/task.json")):
def _reap_expired_workspaces_locked(self, now: float) -> None:
for metadata_path in list(self._workspaces_dir.glob("*/workspace.json")):
payload = json.loads(metadata_path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
shutil.rmtree(metadata_path.parent, ignore_errors=True)
continue
task = TaskRecord.from_payload(payload)
if task.expires_at > now:
workspace = WorkspaceRecord.from_payload(payload)
if workspace.expires_at > now:
continue
instance = task.to_instance(workdir=self._task_runtime_dir(task.task_id))
if task.state == "started":
instance = workspace.to_instance(
workdir=self._workspace_runtime_dir(workspace.workspace_id)
)
if workspace.state == "started":
self._backend.stop(instance)
task.state = "stopped"
workspace.state = "stopped"
self._backend.delete(instance)
shutil.rmtree(self._task_dir(task.task_id), ignore_errors=True)
shutil.rmtree(self._workspace_dir(workspace.workspace_id), ignore_errors=True)
def _ensure_task_not_expired_locked(self, task: TaskRecord, now: float) -> None:
if task.expires_at <= now:
task_id = task.task_id
self._reap_expired_tasks_locked(now)
raise RuntimeError(f"task {task_id!r} expired and was automatically deleted")
def _ensure_workspace_not_expired_locked(
self,
workspace: WorkspaceRecord,
now: float,
) -> None:
if workspace.expires_at <= now:
workspace_id = workspace.workspace_id
self._reap_expired_workspaces_locked(now)
raise RuntimeError(f"workspace {workspace_id!r} expired and was automatically deleted")
def _refresh_task_liveness_locked(self, task: TaskRecord) -> None:
if task.state != "started":
def _refresh_workspace_liveness_locked(self, workspace: WorkspaceRecord) -> None:
if workspace.state != "started":
return
execution_mode = task.metadata.get("execution_mode")
execution_mode = workspace.metadata.get("execution_mode")
if execution_mode == "host_compat":
return
if _pid_is_running(task.firecracker_pid):
if _pid_is_running(workspace.firecracker_pid):
return
task.state = "stopped"
task.firecracker_pid = None
task.last_error = "backing guest process is no longer running"
workspace.state = "stopped"
workspace.firecracker_pid = None
workspace.last_error = "backing guest process is no longer running"
def _record_task_command_locked(
def _record_workspace_command_locked(
self,
task: TaskRecord,
workspace: WorkspaceRecord,
*,
command: str,
exec_result: VmExecResult,
execution_mode: str,
cwd: str,
) -> dict[str, Any]:
sequence = task.command_count + 1
commands_dir = self._task_commands_dir(task.task_id)
sequence = workspace.command_count + 1
commands_dir = self._workspace_commands_dir(workspace.workspace_id)
commands_dir.mkdir(parents=True, exist_ok=True)
base_name = f"{sequence:06d}"
stdout_path = commands_dir / f"{base_name}.stdout"
@ -1627,8 +1667,8 @@ class VmManager:
"recorded_at": time.time(),
}
record_path.write_text(json.dumps(entry, indent=2, sort_keys=True), encoding="utf-8")
task.command_count = sequence
task.last_command = {
workspace.command_count = sequence
workspace.last_command = {
"sequence": sequence,
"command": command,
"cwd": cwd,
@ -1638,9 +1678,9 @@ class VmManager:
}
return entry
def _read_task_logs_locked(self, task_id: str) -> list[dict[str, Any]]:
def _read_workspace_logs_locked(self, workspace_id: str) -> list[dict[str, Any]]:
entries: list[dict[str, Any]] = []
commands_dir = self._task_commands_dir(task_id)
commands_dir = self._workspace_commands_dir(workspace_id)
if not commands_dir.exists():
return entries
for record_path in sorted(commands_dir.glob("*.json")):