Pivot persistent APIs to workspaces
Replace the public persistent-sandbox contract with workspace-first naming across CLI, SDK, MCP, payloads, and on-disk state. Rename the task surface to workspace equivalents, switch create-time seeding to `seed_path`, and store records under `workspaces/<workspace_id>/workspace.json` without carrying legacy task aliases or migrating old local task state. Keep `pyro run` and `vm_*` unchanged. Validation covered `uv lock`, focused public-contract/API/CLI/manager tests, `UV_CACHE_DIR=.uv-cache make check`, and `UV_CACHE_DIR=.uv-cache make dist-check`.
This commit is contained in:
parent
f57454bcb4
commit
48b82d8386
13 changed files with 743 additions and 618 deletions
|
|
@ -1,4 +1,4 @@
|
|||
"""Lifecycle manager for ephemeral VM environments and persistent tasks."""
|
||||
"""Lifecycle manager for ephemeral VM environments and persistent workspaces."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
|
@ -36,13 +36,13 @@ DEFAULT_TIMEOUT_SECONDS = 30
|
|||
DEFAULT_TTL_SECONDS = 600
|
||||
DEFAULT_ALLOW_HOST_COMPAT = False
|
||||
|
||||
TASK_LAYOUT_VERSION = 2
|
||||
TASK_WORKSPACE_DIRNAME = "workspace"
|
||||
TASK_COMMANDS_DIRNAME = "commands"
|
||||
TASK_RUNTIME_DIRNAME = "runtime"
|
||||
TASK_WORKSPACE_GUEST_PATH = "/workspace"
|
||||
TASK_GUEST_AGENT_PATH = "/opt/pyro/bin/pyro_guest_agent.py"
|
||||
TASK_ARCHIVE_UPLOAD_TIMEOUT_SECONDS = 60
|
||||
WORKSPACE_LAYOUT_VERSION = 2
|
||||
WORKSPACE_DIRNAME = "workspace"
|
||||
WORKSPACE_COMMANDS_DIRNAME = "commands"
|
||||
WORKSPACE_RUNTIME_DIRNAME = "runtime"
|
||||
WORKSPACE_GUEST_PATH = "/workspace"
|
||||
WORKSPACE_GUEST_AGENT_PATH = "/opt/pyro/bin/pyro_guest_agent.py"
|
||||
WORKSPACE_ARCHIVE_UPLOAD_TIMEOUT_SECONDS = 60
|
||||
|
||||
WorkspaceSeedMode = Literal["empty", "directory", "tar_archive"]
|
||||
|
||||
|
|
@ -69,10 +69,10 @@ class VmInstance:
|
|||
|
||||
|
||||
@dataclass
|
||||
class TaskRecord:
|
||||
"""Persistent task metadata stored on disk."""
|
||||
class WorkspaceRecord:
|
||||
"""Persistent workspace metadata stored on disk."""
|
||||
|
||||
task_id: str
|
||||
workspace_id: str
|
||||
environment: str
|
||||
vcpu_count: int
|
||||
mem_mib: int
|
||||
|
|
@ -98,9 +98,9 @@ class TaskRecord:
|
|||
command_count: int = 0,
|
||||
last_command: dict[str, Any] | None = None,
|
||||
workspace_seed: dict[str, Any] | None = None,
|
||||
) -> TaskRecord:
|
||||
) -> WorkspaceRecord:
|
||||
return cls(
|
||||
task_id=instance.vm_id,
|
||||
workspace_id=instance.vm_id,
|
||||
environment=instance.environment,
|
||||
vcpu_count=instance.vcpu_count,
|
||||
mem_mib=instance.mem_mib,
|
||||
|
|
@ -121,7 +121,7 @@ class TaskRecord:
|
|||
|
||||
def to_instance(self, *, workdir: Path) -> VmInstance:
|
||||
return VmInstance(
|
||||
vm_id=self.task_id,
|
||||
vm_id=self.workspace_id,
|
||||
environment=self.environment,
|
||||
vcpu_count=self.vcpu_count,
|
||||
mem_mib=self.mem_mib,
|
||||
|
|
@ -140,8 +140,8 @@ class TaskRecord:
|
|||
|
||||
def to_payload(self) -> dict[str, Any]:
|
||||
return {
|
||||
"layout_version": TASK_LAYOUT_VERSION,
|
||||
"task_id": self.task_id,
|
||||
"layout_version": WORKSPACE_LAYOUT_VERSION,
|
||||
"workspace_id": self.workspace_id,
|
||||
"environment": self.environment,
|
||||
"vcpu_count": self.vcpu_count,
|
||||
"mem_mib": self.mem_mib,
|
||||
|
|
@ -161,9 +161,9 @@ class TaskRecord:
|
|||
}
|
||||
|
||||
@classmethod
|
||||
def from_payload(cls, payload: dict[str, Any]) -> TaskRecord:
|
||||
def from_payload(cls, payload: dict[str, Any]) -> WorkspaceRecord:
|
||||
return cls(
|
||||
task_id=str(payload["task_id"]),
|
||||
workspace_id=str(payload["workspace_id"]),
|
||||
environment=str(payload["environment"]),
|
||||
vcpu_count=int(payload["vcpu_count"]),
|
||||
mem_mib=int(payload["mem_mib"]),
|
||||
|
|
@ -179,7 +179,7 @@ class TaskRecord:
|
|||
network=_deserialize_network(payload.get("network")),
|
||||
command_count=int(payload.get("command_count", 0)),
|
||||
last_command=_optional_dict(payload.get("last_command")),
|
||||
workspace_seed=_task_workspace_seed_dict(payload.get("workspace_seed")),
|
||||
workspace_seed=_workspace_seed_dict(payload.get("workspace_seed")),
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -194,10 +194,15 @@ class PreparedWorkspaceSeed:
|
|||
bytes_written: int = 0
|
||||
cleanup_dir: Path | None = None
|
||||
|
||||
def to_payload(self, *, destination: str = TASK_WORKSPACE_GUEST_PATH) -> dict[str, Any]:
|
||||
def to_payload(
|
||||
self,
|
||||
*,
|
||||
destination: str = WORKSPACE_GUEST_PATH,
|
||||
path_key: str = "seed_path",
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"mode": self.mode,
|
||||
"source_path": self.source_path,
|
||||
path_key: self.source_path,
|
||||
"destination": destination,
|
||||
"entry_count": self.entry_count,
|
||||
"bytes_written": self.bytes_written,
|
||||
|
|
@ -255,21 +260,21 @@ def _string_dict(value: object) -> dict[str, str]:
|
|||
def _empty_workspace_seed_payload() -> dict[str, Any]:
|
||||
return {
|
||||
"mode": "empty",
|
||||
"source_path": None,
|
||||
"destination": TASK_WORKSPACE_GUEST_PATH,
|
||||
"seed_path": None,
|
||||
"destination": WORKSPACE_GUEST_PATH,
|
||||
"entry_count": 0,
|
||||
"bytes_written": 0,
|
||||
}
|
||||
|
||||
|
||||
def _task_workspace_seed_dict(value: object) -> dict[str, Any]:
|
||||
def _workspace_seed_dict(value: object) -> dict[str, Any]:
|
||||
if not isinstance(value, dict):
|
||||
return _empty_workspace_seed_payload()
|
||||
payload = _empty_workspace_seed_payload()
|
||||
payload.update(
|
||||
{
|
||||
"mode": str(value.get("mode", payload["mode"])),
|
||||
"source_path": _optional_str(value.get("source_path")),
|
||||
"seed_path": _optional_str(value.get("seed_path")),
|
||||
"destination": str(value.get("destination", payload["destination"])),
|
||||
"entry_count": int(value.get("entry_count", payload["entry_count"])),
|
||||
"bytes_written": int(value.get("bytes_written", payload["bytes_written"])),
|
||||
|
|
@ -374,7 +379,7 @@ def _normalize_workspace_destination(destination: str) -> tuple[str, PurePosixPa
|
|||
destination_path = PurePosixPath(candidate)
|
||||
if any(part == ".." for part in destination_path.parts):
|
||||
raise ValueError("workspace destination must stay inside /workspace")
|
||||
workspace_root = PurePosixPath(TASK_WORKSPACE_GUEST_PATH)
|
||||
workspace_root = PurePosixPath(WORKSPACE_GUEST_PATH)
|
||||
if not destination_path.is_absolute():
|
||||
destination_path = workspace_root / destination_path
|
||||
parts = [part for part in destination_path.parts if part not in {"", "."}]
|
||||
|
|
@ -510,7 +515,7 @@ def _extract_seed_archive_to_host_workspace(
|
|||
def _instance_workspace_host_dir(instance: VmInstance) -> Path:
|
||||
raw_value = instance.metadata.get("workspace_host_dir")
|
||||
if raw_value is None or raw_value == "":
|
||||
raise RuntimeError("task workspace host directory is unavailable")
|
||||
raise RuntimeError("workspace host directory is unavailable")
|
||||
return Path(raw_value)
|
||||
|
||||
|
||||
|
|
@ -518,13 +523,13 @@ def _patch_rootfs_guest_agent(rootfs_image: Path, guest_agent_path: Path) -> Non
|
|||
debugfs_path = shutil.which("debugfs")
|
||||
if debugfs_path is None:
|
||||
raise RuntimeError(
|
||||
"debugfs is required to seed task workspaces on guest-backed runtimes"
|
||||
"debugfs is required to seed workspaces on guest-backed runtimes"
|
||||
)
|
||||
with tempfile.TemporaryDirectory(prefix="pyro-guest-agent-") as temp_dir:
|
||||
staged_agent_path = Path(temp_dir) / "pyro_guest_agent.py"
|
||||
shutil.copy2(guest_agent_path, staged_agent_path)
|
||||
subprocess.run( # noqa: S603
|
||||
[debugfs_path, "-w", "-R", f"rm {TASK_GUEST_AGENT_PATH}", str(rootfs_image)],
|
||||
[debugfs_path, "-w", "-R", f"rm {WORKSPACE_GUEST_AGENT_PATH}", str(rootfs_image)],
|
||||
text=True,
|
||||
capture_output=True,
|
||||
check=False,
|
||||
|
|
@ -534,7 +539,7 @@ def _patch_rootfs_guest_agent(rootfs_image: Path, guest_agent_path: Path) -> Non
|
|||
debugfs_path,
|
||||
"-w",
|
||||
"-R",
|
||||
f"write {staged_agent_path} {TASK_GUEST_AGENT_PATH}",
|
||||
f"write {staged_agent_path} {WORKSPACE_GUEST_AGENT_PATH}",
|
||||
str(rootfs_image),
|
||||
],
|
||||
text=True,
|
||||
|
|
@ -543,7 +548,7 @@ def _patch_rootfs_guest_agent(rootfs_image: Path, guest_agent_path: Path) -> Non
|
|||
)
|
||||
if proc.returncode != 0:
|
||||
raise RuntimeError(
|
||||
"failed to patch guest agent into task rootfs: "
|
||||
"failed to patch guest agent into workspace rootfs: "
|
||||
f"{proc.stderr.strip() or proc.stdout.strip()}"
|
||||
)
|
||||
|
||||
|
|
@ -862,7 +867,7 @@ class FirecrackerBackend(VmBackend): # pragma: no cover
|
|||
port,
|
||||
archive_path,
|
||||
destination=destination,
|
||||
timeout_seconds=TASK_ARCHIVE_UPLOAD_TIMEOUT_SECONDS,
|
||||
timeout_seconds=WORKSPACE_ARCHIVE_UPLOAD_TIMEOUT_SECONDS,
|
||||
uds_path=uds_path,
|
||||
)
|
||||
return {
|
||||
|
|
@ -885,7 +890,7 @@ class FirecrackerBackend(VmBackend): # pragma: no cover
|
|||
|
||||
|
||||
class VmManager:
|
||||
"""In-process lifecycle manager for ephemeral VM environments and tasks."""
|
||||
"""In-process lifecycle manager for ephemeral VM environments and workspaces."""
|
||||
|
||||
MIN_VCPUS = 1
|
||||
MAX_VCPUS = 8
|
||||
|
|
@ -911,7 +916,7 @@ class VmManager:
|
|||
) -> None:
|
||||
self._backend_name = backend_name or "firecracker"
|
||||
self._base_dir = base_dir or Path("/tmp/pyro-mcp")
|
||||
self._tasks_dir = self._base_dir / "tasks"
|
||||
self._workspaces_dir = self._base_dir / "workspaces"
|
||||
resolved_cache_dir = cache_dir or default_cache_dir()
|
||||
self._runtime_paths = runtime_paths
|
||||
if self._backend_name == "firecracker":
|
||||
|
|
@ -944,7 +949,7 @@ class VmManager:
|
|||
self._lock = threading.Lock()
|
||||
self._instances: dict[str, VmInstance] = {}
|
||||
self._base_dir.mkdir(parents=True, exist_ok=True)
|
||||
self._tasks_dir.mkdir(parents=True, exist_ok=True)
|
||||
self._workspaces_dir.mkdir(parents=True, exist_ok=True)
|
||||
self._backend = self._build_backend()
|
||||
|
||||
def _build_backend(self) -> VmBackend:
|
||||
|
|
@ -989,8 +994,8 @@ class VmManager:
|
|||
now = time.time()
|
||||
with self._lock:
|
||||
self._reap_expired_locked(now)
|
||||
self._reap_expired_tasks_locked(now)
|
||||
active_count = len(self._instances) + self._count_tasks_locked()
|
||||
self._reap_expired_workspaces_locked(now)
|
||||
active_count = len(self._instances) + self._count_workspaces_locked()
|
||||
if active_count >= self._max_active_vms:
|
||||
raise RuntimeError(
|
||||
f"max active VMs reached ({self._max_active_vms}); delete old VMs first"
|
||||
|
|
@ -1126,7 +1131,7 @@ class VmManager:
|
|||
del self._instances[vm_id]
|
||||
return {"deleted_vm_ids": expired_vm_ids, "count": len(expired_vm_ids)}
|
||||
|
||||
def create_task(
|
||||
def create_workspace(
|
||||
self,
|
||||
*,
|
||||
environment: str,
|
||||
|
|
@ -1135,22 +1140,22 @@ class VmManager:
|
|||
ttl_seconds: int = DEFAULT_TTL_SECONDS,
|
||||
network: bool = False,
|
||||
allow_host_compat: bool = DEFAULT_ALLOW_HOST_COMPAT,
|
||||
source_path: str | Path | None = None,
|
||||
seed_path: str | Path | None = None,
|
||||
) -> dict[str, Any]:
|
||||
self._validate_limits(vcpu_count=vcpu_count, mem_mib=mem_mib, ttl_seconds=ttl_seconds)
|
||||
get_environment(environment, runtime_paths=self._runtime_paths)
|
||||
prepared_seed = self._prepare_workspace_seed(source_path)
|
||||
prepared_seed = self._prepare_workspace_seed(seed_path)
|
||||
now = time.time()
|
||||
task_id = uuid.uuid4().hex[:12]
|
||||
task_dir = self._task_dir(task_id)
|
||||
runtime_dir = self._task_runtime_dir(task_id)
|
||||
workspace_dir = self._task_workspace_dir(task_id)
|
||||
commands_dir = self._task_commands_dir(task_id)
|
||||
task_dir.mkdir(parents=True, exist_ok=False)
|
||||
workspace_dir.mkdir(parents=True, exist_ok=True)
|
||||
workspace_id = uuid.uuid4().hex[:12]
|
||||
workspace_dir = self._workspace_dir(workspace_id)
|
||||
runtime_dir = self._workspace_runtime_dir(workspace_id)
|
||||
host_workspace_dir = self._workspace_host_dir(workspace_id)
|
||||
commands_dir = self._workspace_commands_dir(workspace_id)
|
||||
workspace_dir.mkdir(parents=True, exist_ok=False)
|
||||
host_workspace_dir.mkdir(parents=True, exist_ok=True)
|
||||
commands_dir.mkdir(parents=True, exist_ok=True)
|
||||
instance = VmInstance(
|
||||
vm_id=task_id,
|
||||
vm_id=workspace_id,
|
||||
environment=environment,
|
||||
vcpu_count=vcpu_count,
|
||||
mem_mib=mem_mib,
|
||||
|
|
@ -1162,13 +1167,13 @@ class VmManager:
|
|||
allow_host_compat=allow_host_compat,
|
||||
)
|
||||
instance.metadata["allow_host_compat"] = str(allow_host_compat).lower()
|
||||
instance.metadata["workspace_path"] = TASK_WORKSPACE_GUEST_PATH
|
||||
instance.metadata["workspace_host_dir"] = str(workspace_dir)
|
||||
instance.metadata["workspace_path"] = WORKSPACE_GUEST_PATH
|
||||
instance.metadata["workspace_host_dir"] = str(host_workspace_dir)
|
||||
try:
|
||||
with self._lock:
|
||||
self._reap_expired_locked(now)
|
||||
self._reap_expired_tasks_locked(now)
|
||||
active_count = len(self._instances) + self._count_tasks_locked()
|
||||
self._reap_expired_workspaces_locked(now)
|
||||
active_count = len(self._instances) + self._count_workspaces_locked()
|
||||
if active_count >= self._max_active_vms:
|
||||
raise RuntimeError(
|
||||
f"max active VMs reached ({self._max_active_vms}); delete old VMs first"
|
||||
|
|
@ -1178,7 +1183,7 @@ class VmManager:
|
|||
prepared_seed.archive_path is not None
|
||||
and self._runtime_capabilities.supports_guest_exec
|
||||
):
|
||||
self._ensure_task_guest_seed_support(instance)
|
||||
self._ensure_workspace_guest_seed_support(instance)
|
||||
with self._lock:
|
||||
self._start_instance_locked(instance)
|
||||
self._require_guest_exec_or_opt_in(instance)
|
||||
|
|
@ -1187,7 +1192,7 @@ class VmManager:
|
|||
import_summary = self._backend.import_archive(
|
||||
instance,
|
||||
archive_path=prepared_seed.archive_path,
|
||||
destination=TASK_WORKSPACE_GUEST_PATH,
|
||||
destination=WORKSPACE_GUEST_PATH,
|
||||
)
|
||||
workspace_seed["entry_count"] = int(import_summary["entry_count"])
|
||||
workspace_seed["bytes_written"] = int(import_summary["bytes_written"])
|
||||
|
|
@ -1195,14 +1200,14 @@ class VmManager:
|
|||
elif self._runtime_capabilities.supports_guest_exec:
|
||||
self._backend.exec(
|
||||
instance,
|
||||
f"mkdir -p {shlex.quote(TASK_WORKSPACE_GUEST_PATH)}",
|
||||
f"mkdir -p {shlex.quote(WORKSPACE_GUEST_PATH)}",
|
||||
10,
|
||||
)
|
||||
else:
|
||||
instance.metadata["execution_mode"] = "host_compat"
|
||||
task = TaskRecord.from_instance(instance, workspace_seed=workspace_seed)
|
||||
self._save_task_locked(task)
|
||||
return self._serialize_task(task)
|
||||
workspace = WorkspaceRecord.from_instance(instance, workspace_seed=workspace_seed)
|
||||
self._save_workspace_locked(workspace)
|
||||
return self._serialize_workspace(workspace)
|
||||
except Exception:
|
||||
if runtime_dir.exists():
|
||||
try:
|
||||
|
|
@ -1215,17 +1220,17 @@ class VmManager:
|
|||
self._backend.delete(instance)
|
||||
except Exception:
|
||||
pass
|
||||
shutil.rmtree(task_dir, ignore_errors=True)
|
||||
shutil.rmtree(workspace_dir, ignore_errors=True)
|
||||
raise
|
||||
finally:
|
||||
prepared_seed.cleanup()
|
||||
|
||||
def push_task_sync(
|
||||
def push_workspace_sync(
|
||||
self,
|
||||
task_id: str,
|
||||
workspace_id: str,
|
||||
*,
|
||||
source_path: str | Path,
|
||||
dest: str = TASK_WORKSPACE_GUEST_PATH,
|
||||
dest: str = WORKSPACE_GUEST_PATH,
|
||||
) -> dict[str, Any]:
|
||||
prepared_seed = self._prepare_workspace_seed(source_path)
|
||||
if prepared_seed.archive_path is None:
|
||||
|
|
@ -1233,14 +1238,17 @@ class VmManager:
|
|||
raise ValueError("source_path is required")
|
||||
normalized_destination, _ = _normalize_workspace_destination(dest)
|
||||
with self._lock:
|
||||
task = self._load_task_locked(task_id)
|
||||
self._ensure_task_not_expired_locked(task, time.time())
|
||||
self._refresh_task_liveness_locked(task)
|
||||
if task.state != "started":
|
||||
workspace = self._load_workspace_locked(workspace_id)
|
||||
self._ensure_workspace_not_expired_locked(workspace, time.time())
|
||||
self._refresh_workspace_liveness_locked(workspace)
|
||||
if workspace.state != "started":
|
||||
raise RuntimeError(
|
||||
f"task {task_id} must be in 'started' state before task_sync_push"
|
||||
f"workspace {workspace_id} must be in 'started' state "
|
||||
"before workspace_sync_push"
|
||||
)
|
||||
instance = task.to_instance(workdir=self._task_runtime_dir(task.task_id))
|
||||
instance = workspace.to_instance(
|
||||
workdir=self._workspace_runtime_dir(workspace.workspace_id)
|
||||
)
|
||||
try:
|
||||
import_summary = self._backend.import_archive(
|
||||
instance,
|
||||
|
|
@ -1249,58 +1257,71 @@ class VmManager:
|
|||
)
|
||||
finally:
|
||||
prepared_seed.cleanup()
|
||||
workspace_sync = prepared_seed.to_payload(destination=normalized_destination)
|
||||
workspace_sync = prepared_seed.to_payload(
|
||||
destination=normalized_destination,
|
||||
path_key="source_path",
|
||||
)
|
||||
workspace_sync["entry_count"] = int(import_summary["entry_count"])
|
||||
workspace_sync["bytes_written"] = int(import_summary["bytes_written"])
|
||||
workspace_sync["destination"] = str(import_summary["destination"])
|
||||
with self._lock:
|
||||
task = self._load_task_locked(task_id)
|
||||
task.state = instance.state
|
||||
task.firecracker_pid = instance.firecracker_pid
|
||||
task.last_error = instance.last_error
|
||||
task.metadata = dict(instance.metadata)
|
||||
self._save_task_locked(task)
|
||||
workspace = self._load_workspace_locked(workspace_id)
|
||||
workspace.state = instance.state
|
||||
workspace.firecracker_pid = instance.firecracker_pid
|
||||
workspace.last_error = instance.last_error
|
||||
workspace.metadata = dict(instance.metadata)
|
||||
self._save_workspace_locked(workspace)
|
||||
return {
|
||||
"task_id": task_id,
|
||||
"workspace_id": workspace_id,
|
||||
"execution_mode": instance.metadata.get("execution_mode", "pending"),
|
||||
"workspace_sync": workspace_sync,
|
||||
}
|
||||
|
||||
def exec_task(self, task_id: str, *, command: str, timeout_seconds: int = 30) -> dict[str, Any]:
|
||||
def exec_workspace(
|
||||
self,
|
||||
workspace_id: str,
|
||||
*,
|
||||
command: str,
|
||||
timeout_seconds: int = 30,
|
||||
) -> dict[str, Any]:
|
||||
if timeout_seconds <= 0:
|
||||
raise ValueError("timeout_seconds must be positive")
|
||||
with self._lock:
|
||||
task = self._load_task_locked(task_id)
|
||||
self._ensure_task_not_expired_locked(task, time.time())
|
||||
self._refresh_task_liveness_locked(task)
|
||||
if task.state != "started":
|
||||
raise RuntimeError(f"task {task_id} must be in 'started' state before task_exec")
|
||||
instance = task.to_instance(workdir=self._task_runtime_dir(task.task_id))
|
||||
workspace = self._load_workspace_locked(workspace_id)
|
||||
self._ensure_workspace_not_expired_locked(workspace, time.time())
|
||||
self._refresh_workspace_liveness_locked(workspace)
|
||||
if workspace.state != "started":
|
||||
raise RuntimeError(
|
||||
f"workspace {workspace_id} must be in 'started' state before workspace_exec"
|
||||
)
|
||||
instance = workspace.to_instance(
|
||||
workdir=self._workspace_runtime_dir(workspace.workspace_id)
|
||||
)
|
||||
exec_result, execution_mode = self._exec_instance(
|
||||
instance,
|
||||
command=command,
|
||||
timeout_seconds=timeout_seconds,
|
||||
host_workdir=self._task_workspace_dir(task.task_id),
|
||||
guest_cwd=TASK_WORKSPACE_GUEST_PATH,
|
||||
host_workdir=self._workspace_host_dir(workspace.workspace_id),
|
||||
guest_cwd=WORKSPACE_GUEST_PATH,
|
||||
)
|
||||
with self._lock:
|
||||
task = self._load_task_locked(task_id)
|
||||
task.state = instance.state
|
||||
task.firecracker_pid = instance.firecracker_pid
|
||||
task.last_error = instance.last_error
|
||||
task.metadata = dict(instance.metadata)
|
||||
entry = self._record_task_command_locked(
|
||||
task,
|
||||
workspace = self._load_workspace_locked(workspace_id)
|
||||
workspace.state = instance.state
|
||||
workspace.firecracker_pid = instance.firecracker_pid
|
||||
workspace.last_error = instance.last_error
|
||||
workspace.metadata = dict(instance.metadata)
|
||||
entry = self._record_workspace_command_locked(
|
||||
workspace,
|
||||
command=command,
|
||||
exec_result=exec_result,
|
||||
execution_mode=execution_mode,
|
||||
cwd=TASK_WORKSPACE_GUEST_PATH,
|
||||
cwd=WORKSPACE_GUEST_PATH,
|
||||
)
|
||||
self._save_task_locked(task)
|
||||
self._save_workspace_locked(workspace)
|
||||
return {
|
||||
"task_id": task_id,
|
||||
"environment": task.environment,
|
||||
"environment_version": task.metadata.get("environment_version"),
|
||||
"workspace_id": workspace_id,
|
||||
"environment": workspace.environment,
|
||||
"environment_version": workspace.metadata.get("environment_version"),
|
||||
"command": command,
|
||||
"stdout": exec_result.stdout,
|
||||
"stderr": exec_result.stderr,
|
||||
|
|
@ -1308,36 +1329,47 @@ class VmManager:
|
|||
"duration_ms": exec_result.duration_ms,
|
||||
"execution_mode": execution_mode,
|
||||
"sequence": entry["sequence"],
|
||||
"cwd": TASK_WORKSPACE_GUEST_PATH,
|
||||
"cwd": WORKSPACE_GUEST_PATH,
|
||||
}
|
||||
|
||||
def status_task(self, task_id: str) -> dict[str, Any]:
|
||||
def status_workspace(self, workspace_id: str) -> dict[str, Any]:
|
||||
with self._lock:
|
||||
task = self._load_task_locked(task_id)
|
||||
self._ensure_task_not_expired_locked(task, time.time())
|
||||
self._refresh_task_liveness_locked(task)
|
||||
self._save_task_locked(task)
|
||||
return self._serialize_task(task)
|
||||
workspace = self._load_workspace_locked(workspace_id)
|
||||
self._ensure_workspace_not_expired_locked(workspace, time.time())
|
||||
self._refresh_workspace_liveness_locked(workspace)
|
||||
self._save_workspace_locked(workspace)
|
||||
return self._serialize_workspace(workspace)
|
||||
|
||||
def logs_task(self, task_id: str) -> dict[str, Any]:
|
||||
def logs_workspace(self, workspace_id: str) -> dict[str, Any]:
|
||||
with self._lock:
|
||||
task = self._load_task_locked(task_id)
|
||||
self._ensure_task_not_expired_locked(task, time.time())
|
||||
self._refresh_task_liveness_locked(task)
|
||||
self._save_task_locked(task)
|
||||
entries = self._read_task_logs_locked(task.task_id)
|
||||
return {"task_id": task.task_id, "count": len(entries), "entries": entries}
|
||||
workspace = self._load_workspace_locked(workspace_id)
|
||||
self._ensure_workspace_not_expired_locked(workspace, time.time())
|
||||
self._refresh_workspace_liveness_locked(workspace)
|
||||
self._save_workspace_locked(workspace)
|
||||
entries = self._read_workspace_logs_locked(workspace.workspace_id)
|
||||
return {
|
||||
"workspace_id": workspace.workspace_id,
|
||||
"count": len(entries),
|
||||
"entries": entries,
|
||||
}
|
||||
|
||||
def delete_task(self, task_id: str, *, reason: str = "explicit_delete") -> dict[str, Any]:
|
||||
def delete_workspace(
|
||||
self,
|
||||
workspace_id: str,
|
||||
*,
|
||||
reason: str = "explicit_delete",
|
||||
) -> dict[str, Any]:
|
||||
with self._lock:
|
||||
task = self._load_task_locked(task_id)
|
||||
instance = task.to_instance(workdir=self._task_runtime_dir(task.task_id))
|
||||
if task.state == "started":
|
||||
workspace = self._load_workspace_locked(workspace_id)
|
||||
instance = workspace.to_instance(
|
||||
workdir=self._workspace_runtime_dir(workspace.workspace_id)
|
||||
)
|
||||
if workspace.state == "started":
|
||||
self._backend.stop(instance)
|
||||
task.state = "stopped"
|
||||
workspace.state = "stopped"
|
||||
self._backend.delete(instance)
|
||||
shutil.rmtree(self._task_dir(task_id), ignore_errors=True)
|
||||
return {"task_id": task_id, "deleted": True, "reason": reason}
|
||||
shutil.rmtree(self._workspace_dir(workspace_id), ignore_errors=True)
|
||||
return {"workspace_id": workspace_id, "deleted": True, "reason": reason}
|
||||
|
||||
def _validate_limits(self, *, vcpu_count: int, mem_mib: int, ttl_seconds: int) -> None:
|
||||
if not self.MIN_VCPUS <= vcpu_count <= self.MAX_VCPUS:
|
||||
|
|
@ -1368,27 +1400,27 @@ class VmManager:
|
|||
"metadata": instance.metadata,
|
||||
}
|
||||
|
||||
def _serialize_task(self, task: TaskRecord) -> dict[str, Any]:
|
||||
def _serialize_workspace(self, workspace: WorkspaceRecord) -> dict[str, Any]:
|
||||
return {
|
||||
"task_id": task.task_id,
|
||||
"environment": task.environment,
|
||||
"environment_version": task.metadata.get("environment_version"),
|
||||
"vcpu_count": task.vcpu_count,
|
||||
"mem_mib": task.mem_mib,
|
||||
"ttl_seconds": task.ttl_seconds,
|
||||
"created_at": task.created_at,
|
||||
"expires_at": task.expires_at,
|
||||
"state": task.state,
|
||||
"network_enabled": task.network is not None,
|
||||
"allow_host_compat": task.allow_host_compat,
|
||||
"guest_ip": task.network.guest_ip if task.network is not None else None,
|
||||
"tap_name": task.network.tap_name if task.network is not None else None,
|
||||
"execution_mode": task.metadata.get("execution_mode", "pending"),
|
||||
"workspace_path": TASK_WORKSPACE_GUEST_PATH,
|
||||
"workspace_seed": _task_workspace_seed_dict(task.workspace_seed),
|
||||
"command_count": task.command_count,
|
||||
"last_command": task.last_command,
|
||||
"metadata": task.metadata,
|
||||
"workspace_id": workspace.workspace_id,
|
||||
"environment": workspace.environment,
|
||||
"environment_version": workspace.metadata.get("environment_version"),
|
||||
"vcpu_count": workspace.vcpu_count,
|
||||
"mem_mib": workspace.mem_mib,
|
||||
"ttl_seconds": workspace.ttl_seconds,
|
||||
"created_at": workspace.created_at,
|
||||
"expires_at": workspace.expires_at,
|
||||
"state": workspace.state,
|
||||
"network_enabled": workspace.network is not None,
|
||||
"allow_host_compat": workspace.allow_host_compat,
|
||||
"guest_ip": workspace.network.guest_ip if workspace.network is not None else None,
|
||||
"tap_name": workspace.network.tap_name if workspace.network is not None else None,
|
||||
"execution_mode": workspace.metadata.get("execution_mode", "pending"),
|
||||
"workspace_path": WORKSPACE_GUEST_PATH,
|
||||
"workspace_seed": _workspace_seed_dict(workspace.workspace_seed),
|
||||
"command_count": workspace.command_count,
|
||||
"last_command": workspace.last_command,
|
||||
"metadata": workspace.metadata,
|
||||
}
|
||||
|
||||
def _require_guest_boot_or_opt_in(self, instance: VmInstance) -> None:
|
||||
|
|
@ -1481,14 +1513,14 @@ class VmManager:
|
|||
execution_mode = instance.metadata.get("execution_mode", "unknown")
|
||||
return exec_result, execution_mode
|
||||
|
||||
def _prepare_workspace_seed(self, source_path: str | Path | None) -> PreparedWorkspaceSeed:
|
||||
if source_path is None:
|
||||
def _prepare_workspace_seed(self, seed_path: str | Path | None) -> PreparedWorkspaceSeed:
|
||||
if seed_path is None:
|
||||
return PreparedWorkspaceSeed(mode="empty", source_path=None)
|
||||
resolved_source_path = Path(source_path).expanduser().resolve()
|
||||
resolved_source_path = Path(seed_path).expanduser().resolve()
|
||||
if not resolved_source_path.exists():
|
||||
raise ValueError(f"source_path {resolved_source_path} does not exist")
|
||||
raise ValueError(f"seed_path {resolved_source_path} does not exist")
|
||||
if resolved_source_path.is_dir():
|
||||
cleanup_dir = Path(tempfile.mkdtemp(prefix="pyro-task-seed-"))
|
||||
cleanup_dir = Path(tempfile.mkdtemp(prefix="pyro-workspace-seed-"))
|
||||
archive_path = cleanup_dir / "workspace-seed.tar"
|
||||
try:
|
||||
_write_directory_seed_archive(resolved_source_path, archive_path)
|
||||
|
|
@ -1509,7 +1541,7 @@ class VmManager:
|
|||
or not _is_supported_seed_archive(resolved_source_path)
|
||||
):
|
||||
raise ValueError(
|
||||
"source_path must be a directory or a .tar/.tar.gz/.tgz archive"
|
||||
"seed_path must be a directory or a .tar/.tar.gz/.tgz archive"
|
||||
)
|
||||
entry_count, bytes_written = _inspect_seed_archive(resolved_source_path)
|
||||
return PreparedWorkspaceSeed(
|
||||
|
|
@ -1520,94 +1552,102 @@ class VmManager:
|
|||
bytes_written=bytes_written,
|
||||
)
|
||||
|
||||
def _ensure_task_guest_seed_support(self, instance: VmInstance) -> None:
|
||||
def _ensure_workspace_guest_seed_support(self, instance: VmInstance) -> None:
|
||||
if self._runtime_paths is None or self._runtime_paths.guest_agent_path is None:
|
||||
raise RuntimeError("runtime bundle does not provide a guest agent for task seeding")
|
||||
raise RuntimeError(
|
||||
"runtime bundle does not provide a guest agent for workspace seeding"
|
||||
)
|
||||
rootfs_image = instance.metadata.get("rootfs_image")
|
||||
if rootfs_image is None or rootfs_image == "":
|
||||
raise RuntimeError("task rootfs image is unavailable for guest workspace seeding")
|
||||
raise RuntimeError("workspace rootfs image is unavailable for guest seeding")
|
||||
_patch_rootfs_guest_agent(Path(rootfs_image), self._runtime_paths.guest_agent_path)
|
||||
|
||||
def _task_dir(self, task_id: str) -> Path:
|
||||
return self._tasks_dir / task_id
|
||||
def _workspace_dir(self, workspace_id: str) -> Path:
|
||||
return self._workspaces_dir / workspace_id
|
||||
|
||||
def _task_runtime_dir(self, task_id: str) -> Path:
|
||||
return self._task_dir(task_id) / TASK_RUNTIME_DIRNAME
|
||||
def _workspace_runtime_dir(self, workspace_id: str) -> Path:
|
||||
return self._workspace_dir(workspace_id) / WORKSPACE_RUNTIME_DIRNAME
|
||||
|
||||
def _task_workspace_dir(self, task_id: str) -> Path:
|
||||
return self._task_dir(task_id) / TASK_WORKSPACE_DIRNAME
|
||||
def _workspace_host_dir(self, workspace_id: str) -> Path:
|
||||
return self._workspace_dir(workspace_id) / WORKSPACE_DIRNAME
|
||||
|
||||
def _task_commands_dir(self, task_id: str) -> Path:
|
||||
return self._task_dir(task_id) / TASK_COMMANDS_DIRNAME
|
||||
def _workspace_commands_dir(self, workspace_id: str) -> Path:
|
||||
return self._workspace_dir(workspace_id) / WORKSPACE_COMMANDS_DIRNAME
|
||||
|
||||
def _task_metadata_path(self, task_id: str) -> Path:
|
||||
return self._task_dir(task_id) / "task.json"
|
||||
def _workspace_metadata_path(self, workspace_id: str) -> Path:
|
||||
return self._workspace_dir(workspace_id) / "workspace.json"
|
||||
|
||||
def _count_tasks_locked(self) -> int:
|
||||
return sum(1 for _ in self._tasks_dir.glob("*/task.json"))
|
||||
def _count_workspaces_locked(self) -> int:
|
||||
return sum(1 for _ in self._workspaces_dir.glob("*/workspace.json"))
|
||||
|
||||
def _load_task_locked(self, task_id: str) -> TaskRecord:
|
||||
metadata_path = self._task_metadata_path(task_id)
|
||||
def _load_workspace_locked(self, workspace_id: str) -> WorkspaceRecord:
|
||||
metadata_path = self._workspace_metadata_path(workspace_id)
|
||||
if not metadata_path.exists():
|
||||
raise ValueError(f"task {task_id!r} does not exist")
|
||||
raise ValueError(f"workspace {workspace_id!r} does not exist")
|
||||
payload = json.loads(metadata_path.read_text(encoding="utf-8"))
|
||||
if not isinstance(payload, dict):
|
||||
raise RuntimeError(f"task record at {metadata_path} is invalid")
|
||||
return TaskRecord.from_payload(payload)
|
||||
raise RuntimeError(f"workspace record at {metadata_path} is invalid")
|
||||
return WorkspaceRecord.from_payload(payload)
|
||||
|
||||
def _save_task_locked(self, task: TaskRecord) -> None:
|
||||
metadata_path = self._task_metadata_path(task.task_id)
|
||||
def _save_workspace_locked(self, workspace: WorkspaceRecord) -> None:
|
||||
metadata_path = self._workspace_metadata_path(workspace.workspace_id)
|
||||
metadata_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
metadata_path.write_text(
|
||||
json.dumps(task.to_payload(), indent=2, sort_keys=True),
|
||||
json.dumps(workspace.to_payload(), indent=2, sort_keys=True),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
def _reap_expired_tasks_locked(self, now: float) -> None:
|
||||
for metadata_path in list(self._tasks_dir.glob("*/task.json")):
|
||||
def _reap_expired_workspaces_locked(self, now: float) -> None:
|
||||
for metadata_path in list(self._workspaces_dir.glob("*/workspace.json")):
|
||||
payload = json.loads(metadata_path.read_text(encoding="utf-8"))
|
||||
if not isinstance(payload, dict):
|
||||
shutil.rmtree(metadata_path.parent, ignore_errors=True)
|
||||
continue
|
||||
task = TaskRecord.from_payload(payload)
|
||||
if task.expires_at > now:
|
||||
workspace = WorkspaceRecord.from_payload(payload)
|
||||
if workspace.expires_at > now:
|
||||
continue
|
||||
instance = task.to_instance(workdir=self._task_runtime_dir(task.task_id))
|
||||
if task.state == "started":
|
||||
instance = workspace.to_instance(
|
||||
workdir=self._workspace_runtime_dir(workspace.workspace_id)
|
||||
)
|
||||
if workspace.state == "started":
|
||||
self._backend.stop(instance)
|
||||
task.state = "stopped"
|
||||
workspace.state = "stopped"
|
||||
self._backend.delete(instance)
|
||||
shutil.rmtree(self._task_dir(task.task_id), ignore_errors=True)
|
||||
shutil.rmtree(self._workspace_dir(workspace.workspace_id), ignore_errors=True)
|
||||
|
||||
def _ensure_task_not_expired_locked(self, task: TaskRecord, now: float) -> None:
|
||||
if task.expires_at <= now:
|
||||
task_id = task.task_id
|
||||
self._reap_expired_tasks_locked(now)
|
||||
raise RuntimeError(f"task {task_id!r} expired and was automatically deleted")
|
||||
def _ensure_workspace_not_expired_locked(
|
||||
self,
|
||||
workspace: WorkspaceRecord,
|
||||
now: float,
|
||||
) -> None:
|
||||
if workspace.expires_at <= now:
|
||||
workspace_id = workspace.workspace_id
|
||||
self._reap_expired_workspaces_locked(now)
|
||||
raise RuntimeError(f"workspace {workspace_id!r} expired and was automatically deleted")
|
||||
|
||||
def _refresh_task_liveness_locked(self, task: TaskRecord) -> None:
|
||||
if task.state != "started":
|
||||
def _refresh_workspace_liveness_locked(self, workspace: WorkspaceRecord) -> None:
|
||||
if workspace.state != "started":
|
||||
return
|
||||
execution_mode = task.metadata.get("execution_mode")
|
||||
execution_mode = workspace.metadata.get("execution_mode")
|
||||
if execution_mode == "host_compat":
|
||||
return
|
||||
if _pid_is_running(task.firecracker_pid):
|
||||
if _pid_is_running(workspace.firecracker_pid):
|
||||
return
|
||||
task.state = "stopped"
|
||||
task.firecracker_pid = None
|
||||
task.last_error = "backing guest process is no longer running"
|
||||
workspace.state = "stopped"
|
||||
workspace.firecracker_pid = None
|
||||
workspace.last_error = "backing guest process is no longer running"
|
||||
|
||||
def _record_task_command_locked(
|
||||
def _record_workspace_command_locked(
|
||||
self,
|
||||
task: TaskRecord,
|
||||
workspace: WorkspaceRecord,
|
||||
*,
|
||||
command: str,
|
||||
exec_result: VmExecResult,
|
||||
execution_mode: str,
|
||||
cwd: str,
|
||||
) -> dict[str, Any]:
|
||||
sequence = task.command_count + 1
|
||||
commands_dir = self._task_commands_dir(task.task_id)
|
||||
sequence = workspace.command_count + 1
|
||||
commands_dir = self._workspace_commands_dir(workspace.workspace_id)
|
||||
commands_dir.mkdir(parents=True, exist_ok=True)
|
||||
base_name = f"{sequence:06d}"
|
||||
stdout_path = commands_dir / f"{base_name}.stdout"
|
||||
|
|
@ -1627,8 +1667,8 @@ class VmManager:
|
|||
"recorded_at": time.time(),
|
||||
}
|
||||
record_path.write_text(json.dumps(entry, indent=2, sort_keys=True), encoding="utf-8")
|
||||
task.command_count = sequence
|
||||
task.last_command = {
|
||||
workspace.command_count = sequence
|
||||
workspace.last_command = {
|
||||
"sequence": sequence,
|
||||
"command": command,
|
||||
"cwd": cwd,
|
||||
|
|
@ -1638,9 +1678,9 @@ class VmManager:
|
|||
}
|
||||
return entry
|
||||
|
||||
def _read_task_logs_locked(self, task_id: str) -> list[dict[str, Any]]:
|
||||
def _read_workspace_logs_locked(self, workspace_id: str) -> list[dict[str, Any]]:
|
||||
entries: list[dict[str, Any]] = []
|
||||
commands_dir = self._task_commands_dir(task_id)
|
||||
commands_dir = self._workspace_commands_dir(workspace_id)
|
||||
if not commands_dir.exists():
|
||||
return entries
|
||||
for record_path in sorted(commands_dir.glob("*.json")):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue