pyro-mcp/src/pyro_mcp/vm_manager.py
Thales Maciel 446f7fce04 Add workspace naming and discovery
Make concurrent workspaces easier to rediscover and resume without relying on opaque IDs alone.

Add optional workspace names, key/value labels, workspace list, and workspace update across the CLI, Python SDK, and MCP surface, and persist last_activity_at so list ordering reflects real mutating activity.

Update the stable contract, install/first-run docs, roadmap, and Python workspace example to teach the new discovery flow, and validate it with focused manager/CLI/API/server coverage plus uv lock, make check, make dist-check, and a real multi-workspace smoke for create, list, update, exec, reorder, and delete.
2026-03-12 23:16:10 -03:00

6317 lines
245 KiB
Python

"""Lifecycle manager for ephemeral VM environments and persistent workspaces."""
from __future__ import annotations
import difflib
import io
import json
import os
import re
import shlex
import shutil
import signal
import socket
import subprocess
import sys
import tarfile
import tempfile
import threading
import time
import urllib.error
import urllib.request
import uuid
from dataclasses import dataclass, field
from pathlib import Path, PurePosixPath
from typing import Any, Literal, cast
from pyro_mcp.runtime import (
RuntimeCapabilities,
RuntimePaths,
resolve_runtime_paths,
runtime_capabilities,
)
from pyro_mcp.vm_environments import EnvironmentStore, default_cache_dir, get_environment
from pyro_mcp.vm_firecracker import build_launch_plan
from pyro_mcp.vm_guest import VsockExecClient
from pyro_mcp.vm_network import NetworkConfig, TapNetworkManager
from pyro_mcp.workspace_disk import (
export_workspace_disk_image,
list_workspace_disk,
read_workspace_disk_file,
scrub_workspace_runtime_paths,
)
from pyro_mcp.workspace_files import (
DEFAULT_WORKSPACE_FILE_READ_MAX_BYTES as DEFAULT_WORKSPACE_FILE_READ_LIMIT,
)
from pyro_mcp.workspace_files import (
WORKSPACE_FILE_MAX_BYTES as WORKSPACE_FILE_MAX_LIMIT,
)
from pyro_mcp.workspace_files import (
WORKSPACE_PATCH_MAX_BYTES as WORKSPACE_PATCH_MAX_LIMIT,
)
from pyro_mcp.workspace_files import (
WorkspaceTextPatch,
apply_unified_text_patch,
delete_workspace_path,
list_workspace_files,
normalize_workspace_path,
parse_unified_text_patch,
read_workspace_file,
write_workspace_file,
)
from pyro_mcp.workspace_ports import DEFAULT_PUBLISHED_PORT_HOST
from pyro_mcp.workspace_shells import (
create_local_shell,
get_local_shell,
remove_local_shell,
shell_signal_names,
)
VmState = Literal["created", "started", "stopped"]
WorkspaceShellState = Literal["running", "stopped"]
WorkspaceServiceState = Literal["running", "exited", "stopped", "failed"]
WorkspaceNetworkPolicy = Literal["off", "egress", "egress+published-ports"]
DEFAULT_VCPU_COUNT = 1
DEFAULT_MEM_MIB = 1024
DEFAULT_TIMEOUT_SECONDS = 30
DEFAULT_TTL_SECONDS = 600
DEFAULT_ALLOW_HOST_COMPAT = False
WORKSPACE_LAYOUT_VERSION = 8
WORKSPACE_BASELINE_DIRNAME = "baseline"
WORKSPACE_BASELINE_ARCHIVE_NAME = "workspace.tar"
WORKSPACE_SNAPSHOTS_DIRNAME = "snapshots"
WORKSPACE_DIRNAME = "workspace"
WORKSPACE_COMMANDS_DIRNAME = "commands"
WORKSPACE_SHELLS_DIRNAME = "shells"
WORKSPACE_SERVICES_DIRNAME = "services"
WORKSPACE_SECRETS_DIRNAME = "secrets"
WORKSPACE_RUNTIME_DIRNAME = "runtime"
WORKSPACE_GUEST_PATH = "/workspace"
WORKSPACE_GUEST_AGENT_PATH = "/opt/pyro/bin/pyro_guest_agent.py"
WORKSPACE_GUEST_INIT_PATH = "/opt/pyro/bin/pyro-init"
WORKSPACE_GUEST_SECRETS_PATH = "/run/pyro-secrets"
WORKSPACE_ARCHIVE_UPLOAD_TIMEOUT_SECONDS = 60
WORKSPACE_SECRET_MAX_BYTES = 64 * 1024
DEFAULT_SHELL_COLS = 120
DEFAULT_SHELL_ROWS = 30
DEFAULT_SHELL_MAX_CHARS = 65536
DEFAULT_WORKSPACE_DISK_READ_MAX_BYTES = 65536
DEFAULT_WORKSPACE_FILE_READ_MAX_BYTES = DEFAULT_WORKSPACE_FILE_READ_LIMIT
WORKSPACE_FILE_MAX_BYTES = WORKSPACE_FILE_MAX_LIMIT
WORKSPACE_PATCH_MAX_BYTES = WORKSPACE_PATCH_MAX_LIMIT
DEFAULT_SERVICE_READY_TIMEOUT_SECONDS = 30
DEFAULT_SERVICE_READY_INTERVAL_MS = 500
DEFAULT_SERVICE_LOG_TAIL_LINES = 200
DEFAULT_WORKSPACE_NETWORK_POLICY: WorkspaceNetworkPolicy = "off"
WORKSPACE_SHELL_SIGNAL_NAMES = shell_signal_names()
WORKSPACE_SERVICE_NAME_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]{0,63}$")
WORKSPACE_SNAPSHOT_NAME_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]{0,63}$")
WORKSPACE_SECRET_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]{0,63}$")
WORKSPACE_LABEL_KEY_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]{0,63}$")
WorkspaceSeedMode = Literal["empty", "directory", "tar_archive"]
WorkspaceArtifactType = Literal["file", "directory", "symlink"]
WorkspaceServiceReadinessType = Literal["file", "tcp", "http", "command"]
WorkspaceSnapshotKind = Literal["baseline", "named"]
WorkspaceSecretSourceKind = Literal["literal", "file"]
@dataclass
class VmInstance:
"""In-memory VM lifecycle record."""
vm_id: str
environment: str
vcpu_count: int
mem_mib: int
ttl_seconds: int
created_at: float
expires_at: float
workdir: Path
state: VmState = "created"
network_requested: bool = False
allow_host_compat: bool = DEFAULT_ALLOW_HOST_COMPAT
firecracker_pid: int | None = None
last_error: str | None = None
metadata: dict[str, str] = field(default_factory=dict)
network: NetworkConfig | None = None
@dataclass
class WorkspaceRecord:
"""Persistent workspace metadata stored on disk."""
workspace_id: str
environment: str
vcpu_count: int
mem_mib: int
ttl_seconds: int
created_at: float
expires_at: float
state: VmState
network_policy: WorkspaceNetworkPolicy
allow_host_compat: bool
firecracker_pid: int | None = None
last_error: str | None = None
metadata: dict[str, str] = field(default_factory=dict)
network: NetworkConfig | None = None
name: str | None = None
labels: dict[str, str] = field(default_factory=dict)
last_activity_at: float = 0.0
command_count: int = 0
last_command: dict[str, Any] | None = None
workspace_seed: dict[str, Any] = field(default_factory=dict)
secrets: list[WorkspaceSecretRecord] = field(default_factory=list)
reset_count: int = 0
last_reset_at: float | None = None
@classmethod
def from_instance(
cls,
instance: VmInstance,
*,
network_policy: WorkspaceNetworkPolicy = DEFAULT_WORKSPACE_NETWORK_POLICY,
command_count: int = 0,
last_command: dict[str, Any] | None = None,
workspace_seed: dict[str, Any] | None = None,
secrets: list[WorkspaceSecretRecord] | None = None,
name: str | None = None,
labels: dict[str, str] | None = None,
) -> WorkspaceRecord:
return cls(
workspace_id=instance.vm_id,
environment=instance.environment,
vcpu_count=instance.vcpu_count,
mem_mib=instance.mem_mib,
ttl_seconds=instance.ttl_seconds,
created_at=instance.created_at,
expires_at=instance.expires_at,
state=instance.state,
network_policy=network_policy,
allow_host_compat=instance.allow_host_compat,
firecracker_pid=instance.firecracker_pid,
last_error=instance.last_error,
metadata=dict(instance.metadata),
network=instance.network,
name=name,
labels=dict(labels or {}),
last_activity_at=instance.created_at,
command_count=command_count,
last_command=last_command,
workspace_seed=dict(workspace_seed or _empty_workspace_seed_payload()),
secrets=list(secrets or []),
reset_count=0,
last_reset_at=None,
)
def to_instance(self, *, workdir: Path) -> VmInstance:
return VmInstance(
vm_id=self.workspace_id,
environment=self.environment,
vcpu_count=self.vcpu_count,
mem_mib=self.mem_mib,
ttl_seconds=self.ttl_seconds,
created_at=self.created_at,
expires_at=self.expires_at,
workdir=workdir,
state=self.state,
network_requested=self.network_policy != "off",
allow_host_compat=self.allow_host_compat,
firecracker_pid=self.firecracker_pid,
last_error=self.last_error,
metadata=dict(self.metadata),
network=self.network,
)
def to_payload(self) -> dict[str, Any]:
return {
"layout_version": WORKSPACE_LAYOUT_VERSION,
"workspace_id": self.workspace_id,
"environment": self.environment,
"vcpu_count": self.vcpu_count,
"mem_mib": self.mem_mib,
"ttl_seconds": self.ttl_seconds,
"created_at": self.created_at,
"expires_at": self.expires_at,
"state": self.state,
"network_policy": self.network_policy,
"allow_host_compat": self.allow_host_compat,
"firecracker_pid": self.firecracker_pid,
"last_error": self.last_error,
"metadata": self.metadata,
"network": _serialize_network(self.network),
"name": self.name,
"labels": self.labels,
"last_activity_at": self.last_activity_at,
"command_count": self.command_count,
"last_command": self.last_command,
"workspace_seed": self.workspace_seed,
"secrets": [secret.to_payload() for secret in self.secrets],
"reset_count": self.reset_count,
"last_reset_at": self.last_reset_at,
}
@classmethod
def from_payload(cls, payload: dict[str, Any]) -> WorkspaceRecord:
return cls(
workspace_id=str(payload["workspace_id"]),
environment=str(payload["environment"]),
vcpu_count=int(payload["vcpu_count"]),
mem_mib=int(payload["mem_mib"]),
ttl_seconds=int(payload["ttl_seconds"]),
created_at=float(payload["created_at"]),
expires_at=float(payload["expires_at"]),
state=cast(VmState, str(payload.get("state", "stopped"))),
network_policy=_workspace_network_policy_from_payload(payload),
allow_host_compat=bool(payload.get("allow_host_compat", DEFAULT_ALLOW_HOST_COMPAT)),
firecracker_pid=_optional_int(payload.get("firecracker_pid")),
last_error=_optional_str(payload.get("last_error")),
metadata=_string_dict(payload.get("metadata")),
network=_deserialize_network(payload.get("network")),
name=_normalize_workspace_name(_optional_str(payload.get("name")), allow_none=True),
labels=_normalize_workspace_labels(payload.get("labels")),
last_activity_at=float(
payload.get("last_activity_at", float(payload["created_at"]))
),
command_count=int(payload.get("command_count", 0)),
last_command=_optional_dict(payload.get("last_command")),
workspace_seed=_workspace_seed_dict(payload.get("workspace_seed")),
secrets=_workspace_secret_records(payload.get("secrets")),
reset_count=int(payload.get("reset_count", 0)),
last_reset_at=(
None
if payload.get("last_reset_at") is None
else float(payload.get("last_reset_at", 0.0))
),
)
@dataclass(frozen=True)
class WorkspaceSecretRecord:
"""Persistent secret metadata stored on disk per workspace."""
name: str
source_kind: WorkspaceSecretSourceKind
stored_path: str
def to_payload(self) -> dict[str, Any]:
return {
"name": self.name,
"source_kind": self.source_kind,
"stored_path": self.stored_path,
}
@classmethod
def from_payload(cls, payload: dict[str, Any]) -> WorkspaceSecretRecord:
return cls(
name=str(payload["name"]),
source_kind=cast(WorkspaceSecretSourceKind, str(payload.get("source_kind", "literal"))),
stored_path=str(payload["stored_path"]),
)
@dataclass
class WorkspaceSnapshotRecord:
"""Persistent snapshot metadata stored on disk per workspace."""
workspace_id: str
snapshot_name: str
kind: WorkspaceSnapshotKind
created_at: float
entry_count: int
bytes_written: int
def to_payload(self) -> dict[str, Any]:
return {
"layout_version": WORKSPACE_LAYOUT_VERSION,
"workspace_id": self.workspace_id,
"snapshot_name": self.snapshot_name,
"kind": self.kind,
"created_at": self.created_at,
"entry_count": self.entry_count,
"bytes_written": self.bytes_written,
}
@classmethod
def from_payload(cls, payload: dict[str, Any]) -> WorkspaceSnapshotRecord:
return cls(
workspace_id=str(payload["workspace_id"]),
snapshot_name=str(payload["snapshot_name"]),
kind=cast(WorkspaceSnapshotKind, str(payload.get("kind", "named"))),
created_at=float(payload["created_at"]),
entry_count=int(payload.get("entry_count", 0)),
bytes_written=int(payload.get("bytes_written", 0)),
)
@dataclass
class WorkspaceShellRecord:
"""Persistent shell metadata stored on disk per workspace."""
workspace_id: str
shell_id: str
cwd: str
cols: int
rows: int
state: WorkspaceShellState
started_at: float
ended_at: float | None = None
exit_code: int | None = None
execution_mode: str = "pending"
metadata: dict[str, str] = field(default_factory=dict)
def to_payload(self) -> dict[str, Any]:
return {
"layout_version": WORKSPACE_LAYOUT_VERSION,
"workspace_id": self.workspace_id,
"shell_id": self.shell_id,
"cwd": self.cwd,
"cols": self.cols,
"rows": self.rows,
"state": self.state,
"started_at": self.started_at,
"ended_at": self.ended_at,
"exit_code": self.exit_code,
"execution_mode": self.execution_mode,
"metadata": dict(self.metadata),
}
@classmethod
def from_payload(cls, payload: dict[str, Any]) -> WorkspaceShellRecord:
return cls(
workspace_id=str(payload["workspace_id"]),
shell_id=str(payload["shell_id"]),
cwd=str(payload.get("cwd", WORKSPACE_GUEST_PATH)),
cols=int(payload.get("cols", DEFAULT_SHELL_COLS)),
rows=int(payload.get("rows", DEFAULT_SHELL_ROWS)),
state=cast(WorkspaceShellState, str(payload.get("state", "stopped"))),
started_at=float(payload.get("started_at", 0.0)),
ended_at=(
None if payload.get("ended_at") is None else float(payload.get("ended_at", 0.0))
),
exit_code=(
None if payload.get("exit_code") is None else int(payload.get("exit_code", 0))
),
execution_mode=str(payload.get("execution_mode", "pending")),
metadata=_string_dict(payload.get("metadata")),
)
@dataclass
class WorkspaceServiceRecord:
"""Persistent service metadata stored on disk per workspace."""
workspace_id: str
service_name: str
command: str
cwd: str
state: WorkspaceServiceState
started_at: float
readiness: dict[str, Any] | None = None
ready_at: float | None = None
ended_at: float | None = None
exit_code: int | None = None
pid: int | None = None
execution_mode: str = "pending"
stop_reason: str | None = None
published_ports: list[WorkspacePublishedPortRecord] = field(default_factory=list)
metadata: dict[str, str] = field(default_factory=dict)
def to_payload(self) -> dict[str, Any]:
return {
"layout_version": WORKSPACE_LAYOUT_VERSION,
"workspace_id": self.workspace_id,
"service_name": self.service_name,
"command": self.command,
"cwd": self.cwd,
"state": self.state,
"started_at": self.started_at,
"readiness": self.readiness,
"ready_at": self.ready_at,
"ended_at": self.ended_at,
"exit_code": self.exit_code,
"pid": self.pid,
"execution_mode": self.execution_mode,
"stop_reason": self.stop_reason,
"published_ports": [
published_port.to_payload() for published_port in self.published_ports
],
"metadata": dict(self.metadata),
}
@classmethod
def from_payload(cls, payload: dict[str, Any]) -> WorkspaceServiceRecord:
readiness_payload = payload.get("readiness")
readiness = None
if isinstance(readiness_payload, dict):
readiness = dict(readiness_payload)
return cls(
workspace_id=str(payload["workspace_id"]),
service_name=str(payload["service_name"]),
command=str(payload.get("command", "")),
cwd=str(payload.get("cwd", WORKSPACE_GUEST_PATH)),
state=cast(WorkspaceServiceState, str(payload.get("state", "stopped"))),
started_at=float(payload.get("started_at", 0.0)),
readiness=readiness,
ready_at=(
None if payload.get("ready_at") is None else float(payload.get("ready_at", 0.0))
),
ended_at=(
None if payload.get("ended_at") is None else float(payload.get("ended_at", 0.0))
),
exit_code=(
None if payload.get("exit_code") is None else int(payload.get("exit_code", 0))
),
pid=None if payload.get("pid") is None else int(payload.get("pid", 0)),
execution_mode=str(payload.get("execution_mode", "pending")),
stop_reason=_optional_str(payload.get("stop_reason")),
published_ports=_workspace_published_port_records(payload.get("published_ports")),
metadata=_string_dict(payload.get("metadata")),
)
@dataclass(frozen=True)
class WorkspacePublishedPortRecord:
"""Persisted localhost published-port metadata for one service."""
guest_port: int
host_port: int
host: str = DEFAULT_PUBLISHED_PORT_HOST
protocol: str = "tcp"
proxy_pid: int | None = None
def to_payload(self) -> dict[str, Any]:
return {
"guest_port": self.guest_port,
"host_port": self.host_port,
"host": self.host,
"protocol": self.protocol,
"proxy_pid": self.proxy_pid,
}
@classmethod
def from_payload(cls, payload: dict[str, Any]) -> WorkspacePublishedPortRecord:
return cls(
guest_port=int(payload["guest_port"]),
host_port=int(payload["host_port"]),
host=str(payload.get("host", DEFAULT_PUBLISHED_PORT_HOST)),
protocol=str(payload.get("protocol", "tcp")),
proxy_pid=(
None
if payload.get("proxy_pid") is None
else int(payload.get("proxy_pid", 0))
),
)
@dataclass(frozen=True)
class WorkspacePublishedPortSpec:
"""Requested published-port configuration for one service."""
guest_port: int
host_port: int | None = None
@dataclass(frozen=True)
class PreparedWorkspaceSeed:
"""Prepared host-side seed archive plus metadata."""
mode: WorkspaceSeedMode
source_path: str | None
archive_path: Path | None = None
entry_count: int = 0
bytes_written: int = 0
cleanup_dir: Path | None = None
def to_payload(
self,
*,
destination: str = WORKSPACE_GUEST_PATH,
path_key: str = "seed_path",
) -> dict[str, Any]:
return {
"mode": self.mode,
path_key: self.source_path,
"destination": destination,
"entry_count": self.entry_count,
"bytes_written": self.bytes_written,
}
def cleanup(self) -> None:
if self.cleanup_dir is not None:
shutil.rmtree(self.cleanup_dir, ignore_errors=True)
@dataclass(frozen=True)
class VmExecResult:
"""Command execution output."""
stdout: str
stderr: str
exit_code: int
duration_ms: int
@dataclass(frozen=True)
class ExportedWorkspaceArchive:
workspace_path: str
artifact_type: WorkspaceArtifactType
archive_path: Path
entry_count: int
bytes_written: int
@dataclass(frozen=True)
class WorkspaceTreeEntry:
path: str
artifact_type: WorkspaceArtifactType
disk_path: Path
size_bytes: int = 0
link_target: str | None = None
def _optional_int(value: object) -> int | None:
if value is None:
return None
if isinstance(value, bool):
return int(value)
if isinstance(value, int):
return value
if isinstance(value, float):
return int(value)
if isinstance(value, str):
return int(value)
raise TypeError("expected integer-compatible payload")
def _optional_str(value: object) -> str | None:
if value is None:
return None
return str(value)
def _optional_dict(value: object) -> dict[str, Any] | None:
if value is None:
return None
if not isinstance(value, dict):
raise TypeError("expected dictionary payload")
return dict(value)
def _string_dict(value: object) -> dict[str, str]:
if not isinstance(value, dict):
return {}
return {str(key): str(item) for key, item in value.items()}
def _empty_workspace_seed_payload() -> dict[str, Any]:
return {
"mode": "empty",
"seed_path": None,
"destination": WORKSPACE_GUEST_PATH,
"entry_count": 0,
"bytes_written": 0,
}
def _workspace_seed_dict(value: object) -> dict[str, Any]:
if not isinstance(value, dict):
return _empty_workspace_seed_payload()
payload = _empty_workspace_seed_payload()
payload.update(
{
"mode": str(value.get("mode", payload["mode"])),
"seed_path": _optional_str(value.get("seed_path")),
"destination": str(value.get("destination", payload["destination"])),
"entry_count": int(value.get("entry_count", payload["entry_count"])),
"bytes_written": int(value.get("bytes_written", payload["bytes_written"])),
}
)
return payload
def _normalize_workspace_network_policy(policy: str) -> WorkspaceNetworkPolicy:
normalized = policy.strip().lower()
if normalized not in {"off", "egress", "egress+published-ports"}:
raise ValueError("network_policy must be one of: off, egress, egress+published-ports")
return cast(WorkspaceNetworkPolicy, normalized)
def _workspace_network_policy_from_payload(payload: dict[str, Any]) -> WorkspaceNetworkPolicy:
raw_policy = payload.get("network_policy")
if raw_policy is not None:
return _normalize_workspace_network_policy(str(raw_policy))
raw_network_requested = payload.get("network_requested", False)
if isinstance(raw_network_requested, str):
network_requested = raw_network_requested.strip().lower() in {"1", "true", "yes", "on"}
else:
network_requested = bool(raw_network_requested)
if network_requested:
return "egress"
return DEFAULT_WORKSPACE_NETWORK_POLICY
def _serialize_workspace_published_port_public(
published_port: WorkspacePublishedPortRecord,
) -> dict[str, Any]:
return {
"host": published_port.host,
"host_port": published_port.host_port,
"guest_port": published_port.guest_port,
"protocol": published_port.protocol,
}
def _workspace_published_port_records(value: object) -> list[WorkspacePublishedPortRecord]:
if not isinstance(value, list):
return []
records: list[WorkspacePublishedPortRecord] = []
for item in value:
if not isinstance(item, dict):
continue
records.append(WorkspacePublishedPortRecord.from_payload(item))
return records
def _workspace_secret_records(value: object) -> list[WorkspaceSecretRecord]:
if not isinstance(value, list):
return []
records: list[WorkspaceSecretRecord] = []
for item in value:
if not isinstance(item, dict):
continue
records.append(WorkspaceSecretRecord.from_payload(item))
return records
def _serialize_workspace_secret_public(secret: WorkspaceSecretRecord) -> dict[str, Any]:
return {
"name": secret.name,
"source_kind": secret.source_kind,
}
def _redact_text(text: str, secret_values: list[str]) -> str:
redacted = text
for secret_value in sorted(
{value for value in secret_values if value != ""},
key=len,
reverse=True,
):
redacted = redacted.replace(secret_value, "[REDACTED]")
return redacted
def _redact_exception(exc: Exception, secret_values: list[str]) -> Exception:
redacted_message = _redact_text(str(exc), secret_values)
if redacted_message == str(exc):
return exc
return exc.__class__(redacted_message)
def _serialize_network(network: NetworkConfig | None) -> dict[str, Any] | None:
if network is None:
return None
return {
"vm_id": network.vm_id,
"tap_name": network.tap_name,
"guest_ip": network.guest_ip,
"gateway_ip": network.gateway_ip,
"subnet_cidr": network.subnet_cidr,
"mac_address": network.mac_address,
"dns_servers": list(network.dns_servers),
}
def _deserialize_network(payload: object) -> NetworkConfig | None:
if payload is None:
return None
if not isinstance(payload, dict):
raise TypeError("expected dictionary payload")
dns_servers = payload.get("dns_servers", [])
dns_values = tuple(str(item) for item in dns_servers) if isinstance(dns_servers, list) else ()
return NetworkConfig(
vm_id=str(payload["vm_id"]),
tap_name=str(payload["tap_name"]),
guest_ip=str(payload["guest_ip"]),
gateway_ip=str(payload["gateway_ip"]),
subnet_cidr=str(payload["subnet_cidr"]),
mac_address=str(payload["mac_address"]),
dns_servers=dns_values,
)
def _run_host_command(
workdir: Path,
command: str,
timeout_seconds: int,
*,
env_overrides: dict[str, str] | None = None,
) -> VmExecResult:
started = time.monotonic()
env = {"PATH": os.environ.get("PATH", ""), "HOME": str(workdir)}
if env_overrides is not None:
env.update(env_overrides)
try:
proc = subprocess.run( # noqa: S603
["bash", "-lc", command], # noqa: S607
cwd=workdir,
env=env,
text=True,
capture_output=True,
timeout=timeout_seconds,
check=False,
)
return VmExecResult(
stdout=proc.stdout,
stderr=proc.stderr,
exit_code=proc.returncode,
duration_ms=int((time.monotonic() - started) * 1000),
)
except subprocess.TimeoutExpired:
return VmExecResult(
stdout="",
stderr=f"command timed out after {timeout_seconds}s",
exit_code=124,
duration_ms=int((time.monotonic() - started) * 1000),
)
def _copy_rootfs(source: Path, dest: Path) -> str:
dest.parent.mkdir(parents=True, exist_ok=True)
try:
proc = subprocess.run( # noqa: S603
["cp", "--reflink=auto", str(source), str(dest)],
text=True,
capture_output=True,
check=False,
)
if proc.returncode == 0:
return "reflink_or_copy"
except OSError:
pass
shutil.copy2(source, dest)
return "copy2"
def _wrap_guest_command(command: str, *, cwd: str | None = None) -> str:
if cwd is None:
return command
quoted_cwd = shlex.quote(cwd)
return f"mkdir -p {quoted_cwd} && cd {quoted_cwd} && {command}"
def _is_supported_seed_archive(path: Path) -> bool:
name = path.name.lower()
return name.endswith(".tar") or name.endswith(".tar.gz") or name.endswith(".tgz")
def _normalize_workspace_destination(destination: str) -> tuple[str, PurePosixPath]:
candidate = destination.strip()
if candidate == "":
raise ValueError("workspace destination must not be empty")
destination_path = PurePosixPath(candidate)
if any(part == ".." for part in destination_path.parts):
raise ValueError("workspace destination must stay inside /workspace")
workspace_root = PurePosixPath(WORKSPACE_GUEST_PATH)
if not destination_path.is_absolute():
destination_path = workspace_root / destination_path
parts = [part for part in destination_path.parts if part not in {"", "."}]
normalized = PurePosixPath("/") / PurePosixPath(*parts)
if normalized == PurePosixPath("/"):
raise ValueError("workspace destination must stay inside /workspace")
if normalized.parts[: len(workspace_root.parts)] != workspace_root.parts:
raise ValueError("workspace destination must stay inside /workspace")
suffix = normalized.relative_to(workspace_root)
return str(normalized), suffix
def _workspace_host_destination(workspace_dir: Path, destination: str) -> Path:
_, suffix = _normalize_workspace_destination(destination)
if str(suffix) in {"", "."}:
return workspace_dir
return workspace_dir.joinpath(*suffix.parts)
def _normalize_workspace_disk_path(path: str) -> str:
candidate = path.strip()
if candidate == "":
raise ValueError("workspace disk path must not be empty")
if candidate.startswith("/"):
raw_path = PurePosixPath(candidate)
normalized_parts: list[str] = []
for part in raw_path.parts:
if part in {"", "/", "."}:
continue
if part == "..":
if normalized_parts:
normalized_parts.pop()
continue
normalized_parts.append(part)
if not normalized_parts:
return "/"
return str(PurePosixPath("/") / PurePosixPath(*normalized_parts))
normalized, _ = _normalize_workspace_destination(candidate)
return normalized
def _normalize_workspace_file_path(path: str) -> str:
return normalize_workspace_path(path)
def _validate_workspace_file_read_max_bytes(max_bytes: int) -> int:
if max_bytes <= 0:
raise ValueError("max_bytes must be positive")
if max_bytes > WORKSPACE_FILE_MAX_BYTES:
raise ValueError(
f"max_bytes must be at most {WORKSPACE_FILE_MAX_BYTES} bytes"
)
return max_bytes
def _validate_workspace_text_payload(text: str, *, field_name: str) -> str:
encoded = text.encode("utf-8")
if len(encoded) > WORKSPACE_FILE_MAX_BYTES:
raise ValueError(
f"{field_name} must be at most {WORKSPACE_FILE_MAX_BYTES} bytes when encoded as UTF-8"
)
return text
def _validate_workspace_patch_text(patch: str) -> str:
if patch.strip() == "":
raise ValueError("patch must not be empty")
encoded = patch.encode("utf-8")
if len(encoded) > WORKSPACE_PATCH_MAX_BYTES:
raise ValueError(
f"patch must be at most {WORKSPACE_PATCH_MAX_BYTES} bytes when encoded as UTF-8"
)
return patch
def _decode_workspace_patch_text(path: str, content_bytes: bytes) -> str:
try:
return content_bytes.decode("utf-8")
except UnicodeDecodeError as exc:
raise RuntimeError(
f"workspace patch only supports UTF-8 text files: {path}"
) from exc
def _normalize_archive_member_name(name: str) -> PurePosixPath:
candidate = name.strip()
if candidate == "":
raise RuntimeError("archive member path is empty")
member_path = PurePosixPath(candidate)
if member_path.is_absolute():
raise RuntimeError(f"absolute archive member paths are not allowed: {name}")
parts = [part for part in member_path.parts if part not in {"", "."}]
if any(part == ".." for part in parts):
raise RuntimeError(f"unsafe archive member path: {name}")
normalized = PurePosixPath(*parts)
if str(normalized) in {"", "."}:
raise RuntimeError(f"unsafe archive member path: {name}")
return normalized
def _validate_archive_symlink_target(member_name: PurePosixPath, link_target: str) -> None:
target = link_target.strip()
if target == "":
raise RuntimeError(f"symlink {member_name} has an empty target")
link_path = PurePosixPath(target)
if link_path.is_absolute():
raise RuntimeError(f"symlink {member_name} escapes the workspace")
combined = member_name.parent.joinpath(link_path)
parts = [part for part in combined.parts if part not in {"", "."}]
if any(part == ".." for part in parts):
raise RuntimeError(f"symlink {member_name} escapes the workspace")
def _inspect_seed_archive(archive_path: Path) -> tuple[int, int]:
entry_count = 0
bytes_written = 0
with tarfile.open(archive_path, "r:*") as archive:
for member in archive.getmembers():
member_name = _normalize_archive_member_name(member.name)
entry_count += 1
if member.isdir():
continue
if member.isfile():
bytes_written += member.size
continue
if member.issym():
_validate_archive_symlink_target(member_name, member.linkname)
continue
if member.islnk():
raise RuntimeError(
f"hard links are not allowed in workspace archives: {member.name}"
)
raise RuntimeError(f"unsupported archive member type: {member.name}")
return entry_count, bytes_written
def _write_directory_seed_archive(source_dir: Path, archive_path: Path) -> None:
archive_path.parent.mkdir(parents=True, exist_ok=True)
with tarfile.open(archive_path, "w") as archive:
for child in sorted(source_dir.iterdir(), key=lambda item: item.name):
archive.add(child, arcname=child.name, recursive=True)
def _write_empty_seed_archive(archive_path: Path) -> None:
archive_path.parent.mkdir(parents=True, exist_ok=True)
with tarfile.open(archive_path, "w"):
pass
def _prepare_workspace_secrets(
secrets: list[dict[str, str]] | None,
*,
secrets_dir: Path,
) -> tuple[list[WorkspaceSecretRecord], dict[str, str]]:
if not secrets:
return [], {}
secrets_dir.mkdir(parents=True, exist_ok=True)
records: list[WorkspaceSecretRecord] = []
values_by_name: dict[str, str] = {}
for index, item in enumerate(secrets, start=1):
if not isinstance(item, dict):
raise ValueError(f"secret #{index} must be a dictionary")
raw_name = item.get("name")
if raw_name is None:
raise ValueError(f"secret #{index} is missing 'name'")
name = _normalize_workspace_secret_name(str(raw_name))
if name in values_by_name:
raise ValueError(f"duplicate secret name: {name}")
has_value = "value" in item
has_file_path = "file_path" in item
if has_value == has_file_path:
raise ValueError(
f"secret {name!r} must provide exactly one of 'value' or 'file_path'"
)
source_kind: WorkspaceSecretSourceKind
if has_value:
value = _validate_workspace_secret_value(name, str(item["value"]))
source_kind = "literal"
else:
raw_file_path = str(item["file_path"]).strip()
if raw_file_path == "":
raise ValueError(f"secret {name!r} file_path must not be empty")
resolved_file_path = Path(raw_file_path).expanduser().resolve()
if not resolved_file_path.exists() or not resolved_file_path.is_file():
raise ValueError(f"secret file for {name!r} does not exist: {resolved_file_path}")
try:
raw_bytes = resolved_file_path.read_bytes()
except OSError as exc:
raise ValueError(
f"failed to read secret file for {name!r}: {resolved_file_path}"
) from exc
if len(raw_bytes) > WORKSPACE_SECRET_MAX_BYTES:
raise ValueError(
f"secret {name!r} must be at most {WORKSPACE_SECRET_MAX_BYTES} bytes"
)
try:
value = raw_bytes.decode("utf-8")
except UnicodeDecodeError as exc:
raise ValueError(f"secret {name!r} must be valid UTF-8 text") from exc
value = _validate_workspace_secret_value(name, value)
source_kind = "file"
stored_path = f"{name}.secret"
secret_path = secrets_dir / stored_path
secret_path.write_text(value, encoding="utf-8")
secret_path.chmod(0o600)
values_by_name[name] = value
records.append(
WorkspaceSecretRecord(
name=name,
source_kind=source_kind,
stored_path=stored_path,
)
)
secrets_dir.chmod(0o700)
records.sort(key=lambda item: item.name)
return records, {record.name: values_by_name[record.name] for record in records}
def _load_workspace_secret_values(
*,
workspace_dir: Path,
secrets: list[WorkspaceSecretRecord],
) -> dict[str, str]:
values: dict[str, str] = {}
for secret in secrets:
secret_path = workspace_dir / WORKSPACE_SECRETS_DIRNAME / secret.stored_path
if not secret_path.exists() or not secret_path.is_file():
raise RuntimeError(f"secret material is unavailable for {secret.name!r}")
values[secret.name] = secret_path.read_text(encoding="utf-8")
return values
def _build_workspace_secret_archive(
*,
workspace_dir: Path,
secrets: list[WorkspaceSecretRecord],
archive_path: Path,
) -> tuple[int, int]:
archive_path.parent.mkdir(parents=True, exist_ok=True)
entry_count = 0
bytes_written = 0
with tarfile.open(archive_path, "w") as archive:
for secret in secrets:
secret_path = workspace_dir / WORKSPACE_SECRETS_DIRNAME / secret.stored_path
value = secret_path.read_bytes()
info = tarfile.TarInfo(name=secret.name)
info.size = len(value)
info.mode = 0o600
archive.addfile(info, io.BytesIO(value))
entry_count += 1
bytes_written += len(value)
return entry_count, bytes_written
def _persist_workspace_baseline(
prepared_seed: PreparedWorkspaceSeed,
*,
baseline_archive_path: Path,
) -> None:
baseline_archive_path.parent.mkdir(parents=True, exist_ok=True)
if prepared_seed.archive_path is None:
_write_empty_seed_archive(baseline_archive_path)
return
shutil.copy2(prepared_seed.archive_path, baseline_archive_path)
def _write_workspace_export_archive(
source_path: Path,
*,
archive_path: Path,
) -> WorkspaceArtifactType:
archive_path.parent.mkdir(parents=True, exist_ok=True)
if source_path.is_symlink():
artifact_type: WorkspaceArtifactType = "symlink"
elif source_path.is_file():
artifact_type = "file"
elif source_path.is_dir():
artifact_type = "directory"
else:
raise RuntimeError(f"unsupported workspace path type: {source_path}")
def validate_source(current_path: Path, relative_path: PurePosixPath) -> None:
if current_path.is_symlink():
_validate_archive_symlink_target(relative_path, os.readlink(current_path))
return
if current_path.is_file():
return
if current_path.is_dir():
for child in sorted(current_path.iterdir(), key=lambda item: item.name):
validate_source(child, relative_path / child.name)
return
raise RuntimeError(f"unsupported workspace path type: {current_path}")
if artifact_type == "directory":
for child in sorted(source_path.iterdir(), key=lambda item: item.name):
validate_source(child, PurePosixPath(child.name))
else:
validate_source(source_path, PurePosixPath(source_path.name))
with tarfile.open(archive_path, "w") as archive:
archive.dereference = False
if artifact_type == "directory":
for child in sorted(source_path.iterdir(), key=lambda item: item.name):
archive.add(child, arcname=child.name, recursive=True)
else:
archive.add(source_path, arcname=source_path.name, recursive=False)
return artifact_type
def _extract_seed_archive_to_host_workspace(
archive_path: Path,
*,
workspace_dir: Path,
destination: str,
) -> dict[str, Any]:
normalized_destination, _ = _normalize_workspace_destination(destination)
destination_root = _workspace_host_destination(workspace_dir, normalized_destination)
destination_root.mkdir(parents=True, exist_ok=True)
entry_count = 0
bytes_written = 0
with tarfile.open(archive_path, "r:*") as archive:
for member in archive.getmembers():
member_name = _normalize_archive_member_name(member.name)
target_path = destination_root.joinpath(*member_name.parts)
entry_count += 1
_ensure_no_symlink_parents(workspace_dir, target_path, member.name)
if member.isdir():
if target_path.is_symlink() or (target_path.exists() and not target_path.is_dir()):
raise RuntimeError(f"directory conflicts with existing path: {member.name}")
target_path.mkdir(parents=True, exist_ok=True)
continue
if member.isfile():
target_path.parent.mkdir(parents=True, exist_ok=True)
if target_path.is_symlink() or target_path.is_dir():
raise RuntimeError(f"file conflicts with existing path: {member.name}")
source = archive.extractfile(member)
if source is None:
raise RuntimeError(f"failed to read archive member: {member.name}")
with target_path.open("wb") as handle:
shutil.copyfileobj(source, handle)
bytes_written += member.size
continue
if member.issym():
_validate_archive_symlink_target(member_name, member.linkname)
target_path.parent.mkdir(parents=True, exist_ok=True)
if target_path.exists() and not target_path.is_symlink():
raise RuntimeError(f"symlink conflicts with existing path: {member.name}")
if target_path.is_symlink():
target_path.unlink()
os.symlink(member.linkname, target_path)
continue
if member.islnk():
raise RuntimeError(
f"hard links are not allowed in workspace archives: {member.name}"
)
raise RuntimeError(f"unsupported archive member type: {member.name}")
return {
"destination": normalized_destination,
"entry_count": entry_count,
"bytes_written": bytes_written,
}
def _prepare_workspace_export_archive(
*,
workspace_dir: Path,
workspace_path: str,
archive_path: Path,
) -> ExportedWorkspaceArchive:
normalized_workspace_path, _ = _normalize_workspace_destination(workspace_path)
source_path = _workspace_host_destination(workspace_dir, normalized_workspace_path)
if not source_path.exists() and not source_path.is_symlink():
raise RuntimeError(f"workspace path does not exist: {normalized_workspace_path}")
artifact_type = _write_workspace_export_archive(source_path, archive_path=archive_path)
entry_count, bytes_written = _inspect_seed_archive(archive_path)
return ExportedWorkspaceArchive(
workspace_path=normalized_workspace_path,
artifact_type=artifact_type,
archive_path=archive_path,
entry_count=entry_count,
bytes_written=bytes_written,
)
def _extract_workspace_export_archive(
archive_path: Path,
*,
output_path: Path,
artifact_type: WorkspaceArtifactType,
) -> dict[str, Any]:
output_path.parent.mkdir(parents=True, exist_ok=True)
if output_path.exists() or output_path.is_symlink():
raise RuntimeError(f"output_path already exists: {output_path}")
entry_count = 0
bytes_written = 0
if artifact_type == "directory":
output_path.mkdir(parents=True, exist_ok=False)
with tarfile.open(archive_path, "r:*") as archive:
for member in archive.getmembers():
member_name = _normalize_archive_member_name(member.name)
target_path = output_path.joinpath(*member_name.parts)
entry_count += 1
_ensure_no_symlink_parents(output_path, target_path, member.name)
if member.isdir():
if target_path.is_symlink() or (
target_path.exists() and not target_path.is_dir()
):
raise RuntimeError(f"directory conflicts with existing path: {member.name}")
target_path.mkdir(parents=True, exist_ok=True)
continue
if member.isfile():
target_path.parent.mkdir(parents=True, exist_ok=True)
if target_path.is_symlink() or target_path.is_dir():
raise RuntimeError(f"file conflicts with existing path: {member.name}")
source = archive.extractfile(member)
if source is None:
raise RuntimeError(f"failed to read archive member: {member.name}")
with target_path.open("wb") as handle:
shutil.copyfileobj(source, handle)
bytes_written += member.size
continue
if member.issym():
_validate_archive_symlink_target(member_name, member.linkname)
target_path.parent.mkdir(parents=True, exist_ok=True)
if target_path.exists() and not target_path.is_symlink():
raise RuntimeError(f"symlink conflicts with existing path: {member.name}")
if target_path.is_symlink():
target_path.unlink()
os.symlink(member.linkname, target_path)
continue
if member.islnk():
raise RuntimeError(
f"hard links are not allowed in workspace archives: {member.name}"
)
raise RuntimeError(f"unsupported archive member type: {member.name}")
return {
"output_path": str(output_path),
"artifact_type": artifact_type,
"entry_count": entry_count,
"bytes_written": bytes_written,
}
with tarfile.open(archive_path, "r:*") as archive:
members = archive.getmembers()
if len(members) != 1:
raise RuntimeError(
"expected exactly one archive member for "
f"{artifact_type} export, got {len(members)}"
)
member = members[0]
_normalize_archive_member_name(member.name)
entry_count = 1
if artifact_type == "file":
if not member.isfile():
raise RuntimeError("exported archive did not contain a regular file")
source = archive.extractfile(member)
if source is None:
raise RuntimeError(f"failed to read archive member: {member.name}")
with output_path.open("wb") as handle:
shutil.copyfileobj(source, handle)
bytes_written = member.size
elif artifact_type == "symlink":
if not member.issym():
raise RuntimeError("exported archive did not contain a symlink")
_validate_archive_symlink_target(PurePosixPath(member.name), member.linkname)
os.symlink(member.linkname, output_path)
else:
raise RuntimeError(f"unsupported artifact type: {artifact_type}")
return {
"output_path": str(output_path),
"artifact_type": artifact_type,
"entry_count": entry_count,
"bytes_written": bytes_written,
}
def _normalize_workspace_service_name(service_name: str) -> str:
normalized = service_name.strip()
if normalized == "":
raise ValueError("service_name must not be empty")
if WORKSPACE_SERVICE_NAME_RE.fullmatch(normalized) is None:
raise ValueError(
"service_name must match "
r"^[A-Za-z0-9][A-Za-z0-9._-]{0,63}$"
)
return normalized
def _normalize_workspace_name(
name: str | None,
*,
allow_none: bool = False,
) -> str | None:
if name is None:
if allow_none:
return None
raise ValueError("name must not be empty")
normalized = name.strip()
if normalized == "":
if allow_none:
return None
raise ValueError("name must not be empty")
if len(normalized) > 120:
raise ValueError("name must be at most 120 characters")
return normalized
def _normalize_workspace_label_key(label_key: str) -> str:
normalized = label_key.strip()
if normalized == "":
raise ValueError("label key must not be empty")
if WORKSPACE_LABEL_KEY_RE.fullmatch(normalized) is None:
raise ValueError(
"label key must match "
r"^[A-Za-z0-9][A-Za-z0-9._-]{0,63}$"
)
return normalized
def _normalize_workspace_label_value(label_key: str, label_value: str) -> str:
normalized = label_value.strip()
if normalized == "":
raise ValueError(f"label {label_key!r} must not be empty")
if len(normalized) > 120:
raise ValueError(f"label {label_key!r} must be at most 120 characters")
if "\n" in normalized or "\r" in normalized:
raise ValueError(f"label {label_key!r} must not contain newlines")
try:
normalized.encode("utf-8")
except UnicodeEncodeError as exc:
raise ValueError(f"label {label_key!r} must be valid UTF-8 text") from exc
return normalized
def _normalize_workspace_labels(value: object) -> dict[str, str]:
if value is None:
return {}
if not isinstance(value, dict):
raise ValueError("labels must be an object mapping keys to values")
normalized: dict[str, str] = {}
for raw_key, raw_value in value.items():
key = _normalize_workspace_label_key(str(raw_key))
label_value = _normalize_workspace_label_value(key, str(raw_value))
normalized[key] = label_value
return dict(sorted(normalized.items()))
def _normalize_workspace_snapshot_name(
snapshot_name: str,
*,
allow_baseline: bool = False,
) -> str:
normalized = snapshot_name.strip()
if normalized == "":
raise ValueError("snapshot_name must not be empty")
if normalized == "baseline" and not allow_baseline:
raise ValueError("snapshot_name 'baseline' is reserved")
if WORKSPACE_SNAPSHOT_NAME_RE.fullmatch(normalized) is None:
raise ValueError(
"snapshot_name must match "
r"^[A-Za-z0-9][A-Za-z0-9._-]{0,63}$"
)
return normalized
def _normalize_workspace_secret_name(secret_name: str) -> str:
normalized = secret_name.strip()
if normalized == "":
raise ValueError("secret name must not be empty")
if WORKSPACE_SECRET_NAME_RE.fullmatch(normalized) is None:
raise ValueError(
"secret name must match "
r"^[A-Za-z_][A-Za-z0-9_]{0,63}$"
)
return normalized
def _validate_workspace_secret_value(secret_name: str, value: str) -> str:
try:
encoded = value.encode("utf-8")
except UnicodeEncodeError as exc:
raise ValueError(f"secret {secret_name!r} must be valid UTF-8 text") from exc
if value == "":
raise ValueError(f"secret {secret_name!r} must not be empty")
if len(encoded) > WORKSPACE_SECRET_MAX_BYTES:
raise ValueError(
f"secret {secret_name!r} must be at most {WORKSPACE_SECRET_MAX_BYTES} bytes"
)
return value
def _normalize_workspace_secret_env_mapping(
secret_env: dict[str, str] | None,
) -> dict[str, str]:
if secret_env is None:
return {}
normalized: dict[str, str] = {}
for secret_name, env_name in secret_env.items():
normalized_secret_name = _normalize_workspace_secret_name(str(secret_name))
normalized_env_name = _normalize_workspace_secret_name(str(env_name))
if normalized_secret_name in normalized:
raise ValueError(
f"secret_env references secret {normalized_secret_name!r} more than once"
)
normalized[normalized_secret_name] = normalized_env_name
return normalized
def _normalize_workspace_published_port(
*,
guest_port: object,
host_port: object | None = None,
) -> WorkspacePublishedPortSpec:
if isinstance(guest_port, bool) or not isinstance(guest_port, int | str):
raise ValueError("published guest_port must be an integer")
try:
normalized_guest_port = int(guest_port)
except (TypeError, ValueError) as exc:
raise ValueError("published guest_port must be an integer") from exc
if normalized_guest_port <= 0 or normalized_guest_port > 65535:
raise ValueError("published guest_port must be between 1 and 65535")
normalized_host_port: int | None = None
if host_port is not None:
if isinstance(host_port, bool) or not isinstance(host_port, int | str):
raise ValueError("published host_port must be an integer")
try:
normalized_host_port = int(host_port)
except (TypeError, ValueError) as exc:
raise ValueError("published host_port must be an integer") from exc
if normalized_host_port <= 1024 or normalized_host_port > 65535:
raise ValueError("published host_port must be between 1025 and 65535")
return WorkspacePublishedPortSpec(
guest_port=normalized_guest_port,
host_port=normalized_host_port,
)
def _normalize_workspace_published_port_specs(
published_ports: list[dict[str, Any]] | None,
) -> list[WorkspacePublishedPortSpec]:
if not published_ports:
return []
normalized: list[WorkspacePublishedPortSpec] = []
seen_guest_ports: set[tuple[int | None, int]] = set()
for index, item in enumerate(published_ports, start=1):
if not isinstance(item, dict):
raise ValueError(f"published port #{index} must be a dictionary")
spec = _normalize_workspace_published_port(
guest_port=item.get("guest_port"),
host_port=item.get("host_port"),
)
dedupe_key = (spec.host_port, spec.guest_port)
if dedupe_key in seen_guest_ports:
raise ValueError(
"published ports must not repeat the same host/guest port mapping"
)
seen_guest_ports.add(dedupe_key)
normalized.append(spec)
return normalized
def _normalize_workspace_service_readiness(
readiness: dict[str, Any] | None,
) -> dict[str, Any] | None:
if readiness is None:
return None
readiness_type = str(readiness.get("type", "")).strip().lower()
if readiness_type not in {"file", "tcp", "http", "command"}:
raise ValueError("readiness.type must be one of: file, tcp, http, command")
if readiness_type == "file":
path = str(readiness.get("path", "")).strip()
if path == "":
raise ValueError("readiness.path is required for file readiness")
normalized_path, _ = _normalize_workspace_destination(path)
return {"type": "file", "path": normalized_path}
if readiness_type == "tcp":
address = str(readiness.get("address", "")).strip()
if ":" not in address:
raise ValueError("readiness.address must be in HOST:PORT format")
host, raw_port = address.rsplit(":", 1)
host = host.strip()
if host == "":
raise ValueError("readiness.address host must not be empty")
try:
port = int(raw_port)
except ValueError as exc:
raise ValueError("readiness.address port must be an integer") from exc
if port <= 0 or port > 65535:
raise ValueError("readiness.address port must be between 1 and 65535")
return {"type": "tcp", "address": f"{host}:{port}"}
if readiness_type == "http":
url = str(readiness.get("url", "")).strip()
if url == "":
raise ValueError("readiness.url is required for http readiness")
return {"type": "http", "url": url}
command = str(readiness.get("command", "")).strip()
if command == "":
raise ValueError("readiness.command is required for command readiness")
return {"type": "command", "command": command}
def _workspace_service_status_path(services_dir: Path, service_name: str) -> Path:
return services_dir / f"{service_name}.status"
def _workspace_service_stdout_path(services_dir: Path, service_name: str) -> Path:
return services_dir / f"{service_name}.stdout"
def _workspace_service_stderr_path(services_dir: Path, service_name: str) -> Path:
return services_dir / f"{service_name}.stderr"
def _workspace_service_runner_path(services_dir: Path, service_name: str) -> Path:
return services_dir / f"{service_name}.runner.sh"
def _workspace_service_port_ready_path(
services_dir: Path,
service_name: str,
host_port: int,
guest_port: int,
) -> Path:
return services_dir / f"{service_name}.port-{host_port}-to-{guest_port}.ready.json"
def _read_service_exit_code(status_path: Path) -> int | None:
if not status_path.exists():
return None
raw_value = status_path.read_text(encoding="utf-8", errors="ignore").strip()
if raw_value == "":
return None
return int(raw_value)
def _tail_text(path: Path, *, tail_lines: int | None) -> tuple[str, bool]:
if not path.exists():
return "", False
text = path.read_text(encoding="utf-8", errors="replace")
if tail_lines is None:
return text, False
lines = text.splitlines(keepends=True)
if len(lines) <= tail_lines:
return text, False
return "".join(lines[-tail_lines:]), True
def _stop_process_group(pid: int, *, wait_seconds: int = 5) -> tuple[bool, bool]:
try:
os.killpg(pid, signal.SIGTERM)
except ProcessLookupError:
return False, False
deadline = time.monotonic() + wait_seconds
while time.monotonic() < deadline:
if not _pid_is_running(pid):
return True, False
time.sleep(0.1)
try:
os.killpg(pid, signal.SIGKILL)
except ProcessLookupError:
return True, False
deadline = time.monotonic() + wait_seconds
while time.monotonic() < deadline:
if not _pid_is_running(pid):
return True, True
time.sleep(0.1)
return True, True
def _run_service_probe_command(
cwd: Path,
command: str,
*,
env_overrides: dict[str, str] | None = None,
) -> int:
env = {"PATH": os.environ.get("PATH", ""), "HOME": str(cwd)}
if env_overrides is not None:
env.update(env_overrides)
proc = subprocess.run( # noqa: S603
["bash", "-lc", command], # noqa: S607
cwd=cwd,
env=env,
text=True,
capture_output=True,
timeout=10,
check=False,
)
return proc.returncode
def _service_ready_on_host(
*,
readiness: dict[str, Any] | None,
workspace_dir: Path,
cwd: Path,
env_overrides: dict[str, str] | None = None,
) -> bool:
if readiness is None:
return True
readiness_type = str(readiness["type"])
if readiness_type == "file":
ready_path = _workspace_host_destination(workspace_dir, str(readiness["path"]))
return ready_path.exists()
if readiness_type == "tcp":
host, raw_port = str(readiness["address"]).rsplit(":", 1)
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
sock.settimeout(1)
try:
sock.connect((host, int(raw_port)))
except OSError:
return False
return True
if readiness_type == "http":
request = urllib.request.Request(str(readiness["url"]), method="GET")
try:
with urllib.request.urlopen(request, timeout=2) as response: # noqa: S310
return 200 <= int(response.status) < 400
except (urllib.error.URLError, TimeoutError, ValueError):
return False
if readiness_type == "command":
try:
return (
_run_service_probe_command(
cwd,
str(readiness["command"]),
env_overrides=env_overrides,
)
== 0
)
except (OSError, subprocess.TimeoutExpired):
return False
raise RuntimeError(f"unsupported readiness type: {readiness_type}")
def _refresh_local_service_record(
service: WorkspaceServiceRecord,
*,
services_dir: Path,
) -> WorkspaceServiceRecord:
if service.state != "running" or service.pid is None:
return service
if _pid_is_running(service.pid):
return service
refreshed = WorkspaceServiceRecord(
workspace_id=service.workspace_id,
service_name=service.service_name,
command=service.command,
cwd=service.cwd,
state="exited",
started_at=service.started_at,
readiness=dict(service.readiness) if service.readiness is not None else None,
ready_at=service.ready_at,
ended_at=service.ended_at or time.time(),
exit_code=_read_service_exit_code(
_workspace_service_status_path(services_dir, service.service_name)
),
pid=service.pid,
execution_mode=service.execution_mode,
stop_reason=service.stop_reason,
published_ports=list(service.published_ports),
metadata=dict(service.metadata),
)
return refreshed
def _start_local_service(
*,
services_dir: Path,
workspace_dir: Path,
workspace_id: str,
service_name: str,
command: str,
cwd_text: str,
readiness: dict[str, Any] | None,
ready_timeout_seconds: int,
ready_interval_ms: int,
env_overrides: dict[str, str] | None = None,
) -> WorkspaceServiceRecord:
services_dir.mkdir(parents=True, exist_ok=True)
cwd = _workspace_host_destination(workspace_dir, cwd_text)
cwd.mkdir(parents=True, exist_ok=True)
stdout_path = _workspace_service_stdout_path(services_dir, service_name)
stderr_path = _workspace_service_stderr_path(services_dir, service_name)
status_path = _workspace_service_status_path(services_dir, service_name)
runner_path = _workspace_service_runner_path(services_dir, service_name)
stdout_path.write_text("", encoding="utf-8")
stderr_path.write_text("", encoding="utf-8")
status_path.unlink(missing_ok=True)
runner_path.write_text(
"\n".join(
[
"#!/bin/sh",
"set +e",
f"cd {shlex.quote(str(cwd))}",
(
f"/bin/sh -lc {shlex.quote(command)}"
f" >> {shlex.quote(str(stdout_path))}"
f" 2>> {shlex.quote(str(stderr_path))}"
),
"status=$?",
f"printf '%s' \"$status\" > {shlex.quote(str(status_path))}",
"exit \"$status\"",
]
)
+ "\n",
encoding="utf-8",
)
runner_path.chmod(0o700)
env = {"PATH": os.environ.get("PATH", ""), "HOME": str(cwd)}
if env_overrides is not None:
env.update(env_overrides)
process = subprocess.Popen( # noqa: S603
[str(runner_path)],
cwd=str(cwd),
env=env,
text=True,
start_new_session=True,
)
started_at = time.time()
service = WorkspaceServiceRecord(
workspace_id=workspace_id,
service_name=service_name,
command=command,
cwd=cwd_text,
state="running",
started_at=started_at,
readiness=dict(readiness) if readiness is not None else None,
ready_at=None,
ended_at=None,
exit_code=None,
pid=process.pid,
execution_mode="host_compat",
stop_reason=None,
)
deadline = time.monotonic() + ready_timeout_seconds
while True:
service = _refresh_local_service_record(service, services_dir=services_dir)
if service.state != "running":
service.state = "failed"
service.stop_reason = "process_exited_before_ready"
if service.ended_at is None:
service.ended_at = time.time()
return service
if _service_ready_on_host(
readiness=readiness,
workspace_dir=workspace_dir,
cwd=cwd,
env_overrides=env_overrides,
):
service.ready_at = time.time()
return service
if time.monotonic() >= deadline:
_stop_process_group(process.pid)
service = _refresh_local_service_record(service, services_dir=services_dir)
service.state = "failed"
service.stop_reason = "readiness_timeout"
if service.ended_at is None:
service.ended_at = time.time()
return service
time.sleep(max(ready_interval_ms, 1) / 1000)
def _stop_local_service(
service: WorkspaceServiceRecord,
*,
services_dir: Path,
) -> WorkspaceServiceRecord:
if service.pid is None:
return service
stopped, killed = _stop_process_group(service.pid)
refreshed = _refresh_local_service_record(service, services_dir=services_dir)
if stopped:
refreshed.state = "stopped"
refreshed.stop_reason = "sigkill" if killed else "sigterm"
refreshed.ended_at = refreshed.ended_at or time.time()
return refreshed
def _start_workspace_published_port_proxy(
*,
services_dir: Path,
service_name: str,
workspace_id: str,
guest_ip: str,
spec: WorkspacePublishedPortSpec,
) -> WorkspacePublishedPortRecord:
ready_path = _workspace_service_port_ready_path(
services_dir,
service_name,
spec.host_port or 0,
spec.guest_port,
)
ready_path.unlink(missing_ok=True)
command = [
sys.executable,
"-m",
"pyro_mcp.workspace_ports",
"--listen-host",
DEFAULT_PUBLISHED_PORT_HOST,
"--listen-port",
str(spec.host_port or 0),
"--target-host",
guest_ip,
"--target-port",
str(spec.guest_port),
"--ready-file",
str(ready_path),
]
process = subprocess.Popen( # noqa: S603
command,
text=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
start_new_session=True,
)
deadline = time.monotonic() + 5
while time.monotonic() < deadline:
if ready_path.exists():
payload = json.loads(ready_path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
raise RuntimeError("published port proxy ready payload is invalid")
ready_path.unlink(missing_ok=True)
return WorkspacePublishedPortRecord(
guest_port=int(payload.get("target_port", spec.guest_port)),
host_port=int(payload["host_port"]),
host=str(payload.get("host", DEFAULT_PUBLISHED_PORT_HOST)),
protocol=str(payload.get("protocol", "tcp")),
proxy_pid=process.pid,
)
if process.poll() is not None:
raise RuntimeError(
"failed to start published port proxy for "
f"service {service_name!r} in workspace {workspace_id!r}"
)
time.sleep(0.05)
_stop_workspace_published_port_proxy(
WorkspacePublishedPortRecord(
guest_port=spec.guest_port,
host_port=spec.host_port or 0,
proxy_pid=process.pid,
)
)
ready_path.unlink(missing_ok=True)
raise RuntimeError(
"timed out waiting for published port proxy readiness for "
f"service {service_name!r} in workspace {workspace_id!r}"
)
def _stop_workspace_published_port_proxy(published_port: WorkspacePublishedPortRecord) -> None:
if published_port.proxy_pid is None:
return
try:
os.killpg(published_port.proxy_pid, signal.SIGTERM)
except ProcessLookupError:
return
deadline = time.monotonic() + 5
while time.monotonic() < deadline:
if not _pid_is_running(published_port.proxy_pid):
return
time.sleep(0.05)
try:
os.killpg(published_port.proxy_pid, signal.SIGKILL)
except ProcessLookupError:
return
def _instance_workspace_host_dir(instance: VmInstance) -> Path:
raw_value = instance.metadata.get("workspace_host_dir")
if raw_value is None or raw_value == "":
raise RuntimeError("workspace host directory is unavailable")
return Path(raw_value)
def _patch_rootfs_runtime_file(
rootfs_image: Path,
*,
source_path: Path,
destination_path: str,
asset_label: str,
file_mode: str | None = None,
) -> None:
debugfs_path = shutil.which("debugfs")
if debugfs_path is None:
raise RuntimeError(
"debugfs is required to seed workspaces on guest-backed runtimes"
)
with tempfile.TemporaryDirectory(prefix=f"pyro-{asset_label}-") as temp_dir:
staged_path = Path(temp_dir) / Path(destination_path).name
shutil.copy2(source_path, staged_path)
subprocess.run( # noqa: S603
[debugfs_path, "-w", "-R", f"rm {destination_path}", str(rootfs_image)],
text=True,
capture_output=True,
check=False,
)
proc = subprocess.run( # noqa: S603
[
debugfs_path,
"-w",
"-R",
f"write {staged_path} {destination_path}",
str(rootfs_image),
],
text=True,
capture_output=True,
check=False,
)
if proc.returncode == 0 and file_mode is not None:
proc = subprocess.run( # noqa: S603
[
debugfs_path,
"-w",
"-R",
f"set_inode_field {destination_path} mode {file_mode}",
str(rootfs_image),
],
text=True,
capture_output=True,
check=False,
)
if proc.returncode != 0:
raise RuntimeError(
f"failed to patch {asset_label} into workspace rootfs: "
f"{proc.stderr.strip() or proc.stdout.strip()}"
)
def _ensure_no_symlink_parents(root: Path, target_path: Path, member_name: str) -> None:
relative_path = target_path.relative_to(root)
current = root
for part in relative_path.parts[:-1]:
current = current / part
if current.is_symlink():
raise RuntimeError(
f"archive member would traverse through a symlinked path: {member_name}"
)
def _pid_is_running(pid: int | None) -> bool:
if pid is None:
return False
try:
os.kill(pid, 0)
except ProcessLookupError:
return False
except PermissionError:
return True
return True
def _collect_workspace_tree(root: Path) -> dict[str, WorkspaceTreeEntry]:
entries: dict[str, WorkspaceTreeEntry] = {}
def walk(current: Path, relative_parts: tuple[str, ...] = ()) -> bool:
has_entries = False
for child in sorted(current.iterdir(), key=lambda item: item.name):
child_relative_parts = relative_parts + (child.name,)
relative_path = "/".join(child_relative_parts)
if child.is_symlink():
entries[relative_path] = WorkspaceTreeEntry(
path=relative_path,
artifact_type="symlink",
disk_path=child,
link_target=os.readlink(child),
)
has_entries = True
continue
if child.is_file():
entries[relative_path] = WorkspaceTreeEntry(
path=relative_path,
artifact_type="file",
disk_path=child,
size_bytes=child.stat().st_size,
)
has_entries = True
continue
if child.is_dir():
child_has_entries = walk(child, child_relative_parts)
if not child_has_entries:
entries[relative_path] = WorkspaceTreeEntry(
path=relative_path,
artifact_type="directory",
disk_path=child,
)
has_entries = True
else:
has_entries = True
continue
raise RuntimeError(f"unsupported workspace artifact type: {child}")
return has_entries
walk(root)
return entries
def _is_probably_text(data: bytes) -> bool:
if b"\x00" in data:
return False
try:
data.decode("utf-8")
except UnicodeDecodeError:
return False
return True
def _build_text_patch(
*,
path: str,
before_text: str,
after_text: str,
status: str,
) -> str:
if status == "added":
fromfile = "/dev/null"
tofile = f"b/{path}"
elif status == "deleted":
fromfile = f"a/{path}"
tofile = "/dev/null"
else:
fromfile = f"a/{path}"
tofile = f"b/{path}"
lines = list(
difflib.unified_diff(
before_text.splitlines(keepends=True),
after_text.splitlines(keepends=True),
fromfile=fromfile,
tofile=tofile,
n=3,
)
)
if not lines:
return ""
return "".join(lines)
def _diff_workspace_trees(
baseline_root: Path,
current_root: Path,
) -> dict[str, Any]:
baseline_entries = _collect_workspace_tree(baseline_root)
current_entries = _collect_workspace_tree(current_root)
changed_entries: list[dict[str, Any]] = []
patch_parts: list[str] = []
summary = {
"total": 0,
"added": 0,
"modified": 0,
"deleted": 0,
"type_changed": 0,
"text_patched": 0,
"non_text": 0,
}
for path in sorted(set(baseline_entries) | set(current_entries)):
baseline_entry = baseline_entries.get(path)
current_entry = current_entries.get(path)
entry_payload: dict[str, Any] | None = None
text_patch = ""
if baseline_entry is None and current_entry is not None:
entry_payload = {
"path": path,
"status": "added",
"artifact_type": current_entry.artifact_type,
"text_patch": None,
}
if current_entry.artifact_type == "file":
current_bytes = current_entry.disk_path.read_bytes()
if _is_probably_text(current_bytes):
text_patch = _build_text_patch(
path=path,
before_text="",
after_text=current_bytes.decode("utf-8"),
status="added",
)
elif current_entry is None and baseline_entry is not None:
entry_payload = {
"path": path,
"status": "deleted",
"artifact_type": baseline_entry.artifact_type,
"text_patch": None,
}
if baseline_entry.artifact_type == "file":
baseline_bytes = baseline_entry.disk_path.read_bytes()
if _is_probably_text(baseline_bytes):
text_patch = _build_text_patch(
path=path,
before_text=baseline_bytes.decode("utf-8"),
after_text="",
status="deleted",
)
elif baseline_entry is not None and current_entry is not None:
if baseline_entry.artifact_type != current_entry.artifact_type:
entry_payload = {
"path": path,
"status": "type_changed",
"artifact_type": current_entry.artifact_type,
"text_patch": None,
}
elif current_entry.artifact_type == "directory":
continue
elif current_entry.artifact_type == "symlink":
if baseline_entry.link_target != current_entry.link_target:
entry_payload = {
"path": path,
"status": "modified",
"artifact_type": current_entry.artifact_type,
"text_patch": None,
}
else:
baseline_bytes = baseline_entry.disk_path.read_bytes()
current_bytes = current_entry.disk_path.read_bytes()
if baseline_bytes == current_bytes:
continue
entry_payload = {
"path": path,
"status": "modified",
"artifact_type": current_entry.artifact_type,
"text_patch": None,
}
if _is_probably_text(baseline_bytes) and _is_probably_text(current_bytes):
text_patch = _build_text_patch(
path=path,
before_text=baseline_bytes.decode("utf-8"),
after_text=current_bytes.decode("utf-8"),
status="modified",
)
if entry_payload is None:
continue
summary["total"] += 1
summary[str(entry_payload["status"])] += 1
if text_patch != "":
entry_payload["text_patch"] = text_patch
patch_parts.append(text_patch)
summary["text_patched"] += 1
else:
summary["non_text"] += 1
changed_entries.append(entry_payload)
return {
"changed": bool(changed_entries),
"summary": summary,
"entries": changed_entries,
"patch": "".join(patch_parts),
}
class VmBackend:
"""Backend interface for lifecycle operations."""
def create(self, instance: VmInstance) -> None: # pragma: no cover
raise NotImplementedError
def start(self, instance: VmInstance) -> None: # pragma: no cover
raise NotImplementedError
def exec( # pragma: no cover
self,
instance: VmInstance,
command: str,
timeout_seconds: int,
*,
workdir: Path | None = None,
env: dict[str, str] | None = None,
) -> VmExecResult:
raise NotImplementedError
def stop(self, instance: VmInstance) -> None: # pragma: no cover
raise NotImplementedError
def delete(self, instance: VmInstance) -> None: # pragma: no cover
raise NotImplementedError
def import_archive( # pragma: no cover
self,
instance: VmInstance,
*,
archive_path: Path,
destination: str,
) -> dict[str, Any]:
raise NotImplementedError
def install_secrets( # pragma: no cover
self,
instance: VmInstance,
*,
archive_path: Path,
) -> dict[str, Any]:
raise NotImplementedError
def export_archive( # pragma: no cover
self,
instance: VmInstance,
*,
workspace_path: str,
archive_path: Path,
) -> dict[str, Any]:
raise NotImplementedError
def list_workspace_entries( # pragma: no cover
self,
instance: VmInstance,
*,
workspace_path: str,
recursive: bool,
) -> dict[str, Any]:
raise NotImplementedError
def read_workspace_file( # pragma: no cover
self,
instance: VmInstance,
*,
workspace_path: str,
max_bytes: int,
) -> dict[str, Any]:
raise NotImplementedError
def write_workspace_file( # pragma: no cover
self,
instance: VmInstance,
*,
workspace_path: str,
text: str,
) -> dict[str, Any]:
raise NotImplementedError
def delete_workspace_path( # pragma: no cover
self,
instance: VmInstance,
*,
workspace_path: str,
) -> dict[str, Any]:
raise NotImplementedError
def open_shell( # pragma: no cover
self,
instance: VmInstance,
*,
workspace_id: str,
shell_id: str,
cwd: str,
cols: int,
rows: int,
env: dict[str, str] | None = None,
redact_values: list[str] | None = None,
) -> dict[str, Any]:
raise NotImplementedError
def read_shell( # pragma: no cover
self,
instance: VmInstance,
*,
workspace_id: str,
shell_id: str,
cursor: int,
max_chars: int,
) -> dict[str, Any]:
raise NotImplementedError
def write_shell( # pragma: no cover
self,
instance: VmInstance,
*,
workspace_id: str,
shell_id: str,
input_text: str,
append_newline: bool,
) -> dict[str, Any]:
raise NotImplementedError
def signal_shell( # pragma: no cover
self,
instance: VmInstance,
*,
workspace_id: str,
shell_id: str,
signal_name: str,
) -> dict[str, Any]:
raise NotImplementedError
def close_shell( # pragma: no cover
self,
instance: VmInstance,
*,
workspace_id: str,
shell_id: str,
) -> dict[str, Any]:
raise NotImplementedError
def start_service( # pragma: no cover
self,
instance: VmInstance,
*,
workspace_id: str,
service_name: str,
command: str,
cwd: str,
readiness: dict[str, Any] | None,
ready_timeout_seconds: int,
ready_interval_ms: int,
env: dict[str, str] | None = None,
) -> dict[str, Any]:
raise NotImplementedError
def status_service( # pragma: no cover
self,
instance: VmInstance,
*,
workspace_id: str,
service_name: str,
) -> dict[str, Any]:
raise NotImplementedError
def logs_service( # pragma: no cover
self,
instance: VmInstance,
*,
workspace_id: str,
service_name: str,
tail_lines: int | None,
) -> dict[str, Any]:
raise NotImplementedError
def stop_service( # pragma: no cover
self,
instance: VmInstance,
*,
workspace_id: str,
service_name: str,
) -> dict[str, Any]:
raise NotImplementedError
class MockBackend(VmBackend):
"""Host-process backend used for development and testability."""
def create(self, instance: VmInstance) -> None:
instance.workdir.mkdir(parents=True, exist_ok=False)
def start(self, instance: VmInstance) -> None:
marker_path = instance.workdir / ".started"
marker_path.write_text("started\n", encoding="utf-8")
def exec(
self,
instance: VmInstance,
command: str,
timeout_seconds: int,
*,
workdir: Path | None = None,
env: dict[str, str] | None = None,
) -> VmExecResult:
return _run_host_command(
workdir or instance.workdir,
command,
timeout_seconds,
env_overrides=env,
)
def stop(self, instance: VmInstance) -> None:
marker_path = instance.workdir / ".stopped"
marker_path.write_text("stopped\n", encoding="utf-8")
def delete(self, instance: VmInstance) -> None:
shutil.rmtree(instance.workdir, ignore_errors=True)
def import_archive(
self,
instance: VmInstance,
*,
archive_path: Path,
destination: str,
) -> dict[str, Any]:
return _extract_seed_archive_to_host_workspace(
archive_path,
workspace_dir=_instance_workspace_host_dir(instance),
destination=destination,
)
def install_secrets(
self,
instance: VmInstance,
*,
archive_path: Path,
) -> dict[str, Any]:
del instance
entry_count, bytes_written = _inspect_seed_archive(archive_path)
return {
"destination": WORKSPACE_GUEST_SECRETS_PATH,
"entry_count": entry_count,
"bytes_written": bytes_written,
}
def export_archive(
self,
instance: VmInstance,
*,
workspace_path: str,
archive_path: Path,
) -> dict[str, Any]:
exported = _prepare_workspace_export_archive(
workspace_dir=_instance_workspace_host_dir(instance),
workspace_path=workspace_path,
archive_path=archive_path,
)
return {
"workspace_path": exported.workspace_path,
"artifact_type": exported.artifact_type,
"entry_count": exported.entry_count,
"bytes_written": exported.bytes_written,
"execution_mode": "host_compat",
}
def list_workspace_entries(
self,
instance: VmInstance,
*,
workspace_path: str,
recursive: bool,
) -> dict[str, Any]:
listing = list_workspace_files(
_instance_workspace_host_dir(instance),
workspace_path=workspace_path,
recursive=recursive,
)
return {
"path": listing.path,
"artifact_type": listing.artifact_type,
"entries": [entry.to_payload() for entry in listing.entries],
"execution_mode": "host_compat",
}
def read_workspace_file(
self,
instance: VmInstance,
*,
workspace_path: str,
max_bytes: int,
) -> dict[str, Any]:
file_result = read_workspace_file(
_instance_workspace_host_dir(instance),
workspace_path=workspace_path,
max_bytes=max_bytes,
)
return {
"path": file_result.path,
"size_bytes": file_result.size_bytes,
"content_bytes": file_result.content_bytes,
"execution_mode": "host_compat",
}
def write_workspace_file(
self,
instance: VmInstance,
*,
workspace_path: str,
text: str,
) -> dict[str, Any]:
result = write_workspace_file(
_instance_workspace_host_dir(instance),
workspace_path=workspace_path,
text=text,
)
return {
"path": result.path,
"size_bytes": result.size_bytes,
"bytes_written": result.bytes_written,
"execution_mode": "host_compat",
}
def delete_workspace_path(
self,
instance: VmInstance,
*,
workspace_path: str,
) -> dict[str, Any]:
result = delete_workspace_path(
_instance_workspace_host_dir(instance),
workspace_path=workspace_path,
)
return {
"path": result.path,
"deleted": result.deleted,
"execution_mode": "host_compat",
}
def open_shell(
self,
instance: VmInstance,
*,
workspace_id: str,
shell_id: str,
cwd: str,
cols: int,
rows: int,
env: dict[str, str] | None = None,
redact_values: list[str] | None = None,
) -> dict[str, Any]:
session = create_local_shell(
workspace_id=workspace_id,
shell_id=shell_id,
cwd=_workspace_host_destination(_instance_workspace_host_dir(instance), cwd),
display_cwd=cwd,
cols=cols,
rows=rows,
env_overrides=env,
redact_values=redact_values,
)
summary = session.summary()
summary["execution_mode"] = "host_compat"
return summary
def read_shell(
self,
instance: VmInstance,
*,
workspace_id: str,
shell_id: str,
cursor: int,
max_chars: int,
) -> dict[str, Any]:
del instance
session = get_local_shell(workspace_id=workspace_id, shell_id=shell_id)
payload = session.read(cursor=cursor, max_chars=max_chars)
payload["execution_mode"] = "host_compat"
return payload
def write_shell(
self,
instance: VmInstance,
*,
workspace_id: str,
shell_id: str,
input_text: str,
append_newline: bool,
) -> dict[str, Any]:
del instance
session = get_local_shell(workspace_id=workspace_id, shell_id=shell_id)
payload = session.write(input_text, append_newline=append_newline)
payload["execution_mode"] = "host_compat"
return payload
def signal_shell(
self,
instance: VmInstance,
*,
workspace_id: str,
shell_id: str,
signal_name: str,
) -> dict[str, Any]:
del instance
session = get_local_shell(workspace_id=workspace_id, shell_id=shell_id)
payload = session.send_signal(signal_name)
payload["execution_mode"] = "host_compat"
return payload
def close_shell(
self,
instance: VmInstance,
*,
workspace_id: str,
shell_id: str,
) -> dict[str, Any]:
del instance
session = remove_local_shell(workspace_id=workspace_id, shell_id=shell_id)
if session is None:
raise ValueError(f"shell {shell_id!r} does not exist in workspace {workspace_id!r}")
payload = session.close()
payload["execution_mode"] = "host_compat"
return payload
def start_service(
self,
instance: VmInstance,
*,
workspace_id: str,
service_name: str,
command: str,
cwd: str,
readiness: dict[str, Any] | None,
ready_timeout_seconds: int,
ready_interval_ms: int,
env: dict[str, str] | None = None,
) -> dict[str, Any]:
services_dir = instance.workdir.parent / WORKSPACE_SERVICES_DIRNAME
service = _start_local_service(
services_dir=services_dir,
workspace_dir=_instance_workspace_host_dir(instance),
workspace_id=workspace_id,
service_name=service_name,
command=command,
cwd_text=cwd,
readiness=readiness,
ready_timeout_seconds=ready_timeout_seconds,
ready_interval_ms=ready_interval_ms,
env_overrides=env,
)
return service.to_payload()
def status_service(
self,
instance: VmInstance,
*,
workspace_id: str,
service_name: str,
) -> dict[str, Any]:
services_dir = instance.workdir.parent / WORKSPACE_SERVICES_DIRNAME
service = self._load_workspace_service(services_dir, workspace_id, service_name)
refreshed = _refresh_local_service_record(
service,
services_dir=services_dir,
)
return refreshed.to_payload()
def logs_service(
self,
instance: VmInstance,
*,
workspace_id: str,
service_name: str,
tail_lines: int | None,
) -> dict[str, Any]:
services_dir = instance.workdir.parent / WORKSPACE_SERVICES_DIRNAME
service = self._load_workspace_service(services_dir, workspace_id, service_name)
refreshed = _refresh_local_service_record(service, services_dir=services_dir)
payload = refreshed.to_payload()
stdout, stdout_truncated = _tail_text(
_workspace_service_stdout_path(services_dir, service_name),
tail_lines=tail_lines,
)
stderr, stderr_truncated = _tail_text(
_workspace_service_stderr_path(services_dir, service_name),
tail_lines=tail_lines,
)
payload.update(
{
"stdout": stdout,
"stderr": stderr,
"tail_lines": tail_lines,
"truncated": stdout_truncated or stderr_truncated,
}
)
return payload
def stop_service(
self,
instance: VmInstance,
*,
workspace_id: str,
service_name: str,
) -> dict[str, Any]:
services_dir = instance.workdir.parent / WORKSPACE_SERVICES_DIRNAME
service = self._load_workspace_service(services_dir, workspace_id, service_name)
stopped = _stop_local_service(
service,
services_dir=services_dir,
)
return stopped.to_payload()
def _load_workspace_service(
self,
services_dir: Path,
workspace_id: str,
service_name: str,
) -> WorkspaceServiceRecord:
record_path = services_dir / f"{service_name}.json"
if not record_path.exists():
raise ValueError(
f"service {service_name!r} does not exist in workspace {workspace_id!r}"
)
payload = json.loads(record_path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
raise RuntimeError(f"service record at {record_path} is invalid")
return WorkspaceServiceRecord.from_payload(payload)
class FirecrackerBackend(VmBackend): # pragma: no cover
"""Host-gated backend that validates Firecracker prerequisites."""
def __init__(
self,
environment_store: EnvironmentStore,
firecracker_bin: Path,
jailer_bin: Path,
runtime_capabilities: RuntimeCapabilities,
network_manager: TapNetworkManager | None = None,
guest_exec_client: VsockExecClient | None = None,
) -> None:
self._environment_store = environment_store
self._firecracker_bin = firecracker_bin
self._jailer_bin = jailer_bin
self._runtime_capabilities = runtime_capabilities
self._network_manager = network_manager or TapNetworkManager()
self._guest_exec_client = guest_exec_client or VsockExecClient()
self._processes: dict[str, subprocess.Popen[str]] = {}
if not self._firecracker_bin.exists():
raise RuntimeError(f"bundled firecracker binary not found at {self._firecracker_bin}")
if not self._jailer_bin.exists():
raise RuntimeError(f"bundled jailer binary not found at {self._jailer_bin}")
if not Path("/dev/kvm").exists():
raise RuntimeError("/dev/kvm is not available on this host")
def create(self, instance: VmInstance) -> None:
instance.workdir.mkdir(parents=True, exist_ok=False)
try:
installed_environment = self._environment_store.ensure_installed(instance.environment)
if (
not installed_environment.kernel_image.exists()
or not installed_environment.rootfs_image.exists()
):
raise RuntimeError(
f"missing environment artifacts for {instance.environment}; expected "
f"{installed_environment.kernel_image} and {installed_environment.rootfs_image}"
)
instance.metadata["environment_version"] = installed_environment.version
instance.metadata["environment_source"] = installed_environment.source
if installed_environment.source_digest is not None:
instance.metadata["environment_digest"] = installed_environment.source_digest
instance.metadata["environment_install_dir"] = str(installed_environment.install_dir)
instance.metadata["kernel_image"] = str(installed_environment.kernel_image)
rootfs_copy = instance.workdir / "rootfs.ext4"
instance.metadata["rootfs_clone_mode"] = _copy_rootfs(
installed_environment.rootfs_image,
rootfs_copy,
)
instance.metadata["rootfs_image"] = str(rootfs_copy)
if instance.network_requested:
network = self._network_manager.allocate(instance.vm_id)
instance.network = network
instance.metadata.update(self._network_manager.to_metadata(network))
else:
instance.network = None
instance.metadata["network_enabled"] = "false"
except Exception:
shutil.rmtree(instance.workdir, ignore_errors=True)
raise
def start(self, instance: VmInstance) -> None:
launch_plan = build_launch_plan(instance)
for stale_socket_path in (
launch_plan.api_socket_path,
instance.workdir / "vsock.sock",
):
stale_socket_path.unlink(missing_ok=True)
instance.metadata["firecracker_config_path"] = str(launch_plan.config_path)
instance.metadata["guest_network_path"] = str(launch_plan.guest_network_path)
instance.metadata["guest_exec_path"] = str(launch_plan.guest_exec_path)
instance.metadata["guest_cid"] = str(launch_plan.guest_cid)
instance.metadata["guest_exec_port"] = str(launch_plan.vsock_port)
instance.metadata["guest_exec_uds_path"] = str(instance.workdir / "vsock.sock")
serial_log_path = instance.workdir / "serial.log"
firecracker_log_path = instance.workdir / "firecracker.log"
firecracker_log_path.touch()
instance.metadata["serial_log_path"] = str(serial_log_path)
instance.metadata["firecracker_log_path"] = str(firecracker_log_path)
proc = subprocess.run( # noqa: S603
[str(self._firecracker_bin), "--version"],
text=True,
capture_output=True,
check=False,
)
if proc.returncode != 0:
raise RuntimeError(f"firecracker startup preflight failed: {proc.stderr.strip()}")
instance.metadata["firecracker_version"] = proc.stdout.strip()
instance.metadata["jailer_path"] = str(self._jailer_bin)
if not self._runtime_capabilities.supports_vm_boot:
instance.metadata["execution_mode"] = "host_compat"
instance.metadata["boot_mode"] = "shim"
if self._runtime_capabilities.reason is not None:
instance.metadata["runtime_reason"] = self._runtime_capabilities.reason
return
with serial_log_path.open("w", encoding="utf-8") as serial_fp:
process = subprocess.Popen( # noqa: S603
[
str(self._firecracker_bin),
"--no-api",
"--config-file",
str(launch_plan.config_path),
"--log-path",
str(firecracker_log_path),
"--level",
"Info",
],
stdout=serial_fp,
stderr=subprocess.STDOUT,
text=True,
start_new_session=True,
)
self._processes[instance.vm_id] = process
time.sleep(2)
if process.poll() is not None:
serial_log = serial_log_path.read_text(encoding="utf-8", errors="ignore")
firecracker_log = firecracker_log_path.read_text(encoding="utf-8", errors="ignore")
self._processes.pop(instance.vm_id, None)
raise RuntimeError(
"firecracker microVM exited during startup: "
f"{(serial_log or firecracker_log).strip()}"
)
instance.firecracker_pid = process.pid
instance.metadata["execution_mode"] = (
"guest_vsock" if self._runtime_capabilities.supports_guest_exec else "guest_boot_only"
)
instance.metadata["boot_mode"] = "native"
def exec(
self,
instance: VmInstance,
command: str,
timeout_seconds: int,
*,
workdir: Path | None = None,
env: dict[str, str] | None = None,
) -> VmExecResult:
if self._runtime_capabilities.supports_guest_exec:
guest_cid = int(instance.metadata["guest_cid"])
port = int(instance.metadata["guest_exec_port"])
uds_path = instance.metadata.get("guest_exec_uds_path")
deadline = time.monotonic() + min(timeout_seconds, 10)
while True:
try:
response = self._guest_exec_client.exec(
guest_cid,
port,
command,
timeout_seconds,
env=env,
uds_path=uds_path,
)
break
except (OSError, RuntimeError) as exc:
if time.monotonic() >= deadline:
raise RuntimeError(
f"guest exec transport did not become ready: {exc}"
) from exc
time.sleep(0.2)
return VmExecResult(
stdout=response.stdout,
stderr=response.stderr,
exit_code=response.exit_code,
duration_ms=response.duration_ms,
)
instance.metadata["execution_mode"] = "host_compat"
return _run_host_command(
workdir or instance.workdir,
command,
timeout_seconds,
env_overrides=env,
)
def stop(self, instance: VmInstance) -> None:
process = self._processes.pop(instance.vm_id, None)
if process is not None:
process.terminate()
try:
process.wait(timeout=5)
except subprocess.TimeoutExpired:
process.kill()
process.wait(timeout=5)
instance.firecracker_pid = None
return
if instance.firecracker_pid is None:
return
try:
os.kill(instance.firecracker_pid, signal.SIGTERM)
except ProcessLookupError:
instance.firecracker_pid = None
return
deadline = time.monotonic() + 5
while time.monotonic() < deadline:
try:
os.kill(instance.firecracker_pid, 0)
except ProcessLookupError:
instance.firecracker_pid = None
return
time.sleep(0.1)
os.kill(instance.firecracker_pid, signal.SIGKILL)
instance.firecracker_pid = None
def delete(self, instance: VmInstance) -> None:
self._processes.pop(instance.vm_id, None)
if instance.network is not None:
self._network_manager.cleanup(instance.network)
shutil.rmtree(instance.workdir, ignore_errors=True)
def import_archive(
self,
instance: VmInstance,
*,
archive_path: Path,
destination: str,
) -> dict[str, Any]:
if self._runtime_capabilities.supports_guest_exec:
guest_cid = int(instance.metadata["guest_cid"])
port = int(instance.metadata["guest_exec_port"])
uds_path = instance.metadata.get("guest_exec_uds_path")
deadline = time.monotonic() + 10
while True:
try:
response = self._guest_exec_client.upload_archive(
guest_cid,
port,
archive_path,
destination=destination,
timeout_seconds=WORKSPACE_ARCHIVE_UPLOAD_TIMEOUT_SECONDS,
uds_path=uds_path,
)
return {
"destination": response.destination,
"entry_count": response.entry_count,
"bytes_written": response.bytes_written,
}
except (OSError, RuntimeError) as exc:
if time.monotonic() >= deadline:
raise RuntimeError(
f"guest archive transport did not become ready: {exc}"
) from exc
time.sleep(0.2)
instance.metadata["execution_mode"] = "host_compat"
return _extract_seed_archive_to_host_workspace(
archive_path,
workspace_dir=_instance_workspace_host_dir(instance),
destination=destination,
)
def install_secrets(
self,
instance: VmInstance,
*,
archive_path: Path,
) -> dict[str, Any]:
if self._runtime_capabilities.supports_guest_exec:
guest_cid = int(instance.metadata["guest_cid"])
port = int(instance.metadata["guest_exec_port"])
uds_path = instance.metadata.get("guest_exec_uds_path")
deadline = time.monotonic() + 10
while True:
try:
response = self._guest_exec_client.install_secrets(
guest_cid,
port,
archive_path,
timeout_seconds=WORKSPACE_ARCHIVE_UPLOAD_TIMEOUT_SECONDS,
uds_path=uds_path,
)
return {
"destination": response.destination,
"entry_count": response.entry_count,
"bytes_written": response.bytes_written,
}
except (OSError, RuntimeError) as exc:
if time.monotonic() >= deadline:
raise RuntimeError(
f"guest secret transport did not become ready: {exc}"
) from exc
time.sleep(0.2)
raise RuntimeError("workspace secrets require guest execution")
def export_archive(
self,
instance: VmInstance,
*,
workspace_path: str,
archive_path: Path,
) -> dict[str, Any]:
if self._runtime_capabilities.supports_guest_exec:
guest_cid = int(instance.metadata["guest_cid"])
port = int(instance.metadata["guest_exec_port"])
uds_path = instance.metadata.get("guest_exec_uds_path")
deadline = time.monotonic() + 10
while True:
try:
response = self._guest_exec_client.export_archive(
guest_cid,
port,
workspace_path=workspace_path,
archive_path=archive_path,
timeout_seconds=WORKSPACE_ARCHIVE_UPLOAD_TIMEOUT_SECONDS,
uds_path=uds_path,
)
return {
"workspace_path": response.workspace_path,
"artifact_type": response.artifact_type,
"entry_count": response.entry_count,
"bytes_written": response.bytes_written,
"execution_mode": instance.metadata.get("execution_mode", "pending"),
}
except (OSError, RuntimeError) as exc:
if time.monotonic() >= deadline:
raise RuntimeError(
f"guest export transport did not become ready: {exc}"
) from exc
time.sleep(0.2)
instance.metadata["execution_mode"] = "host_compat"
exported = _prepare_workspace_export_archive(
workspace_dir=_instance_workspace_host_dir(instance),
workspace_path=workspace_path,
archive_path=archive_path,
)
return {
"workspace_path": exported.workspace_path,
"artifact_type": exported.artifact_type,
"entry_count": exported.entry_count,
"bytes_written": exported.bytes_written,
"execution_mode": "host_compat",
}
def list_workspace_entries(
self,
instance: VmInstance,
*,
workspace_path: str,
recursive: bool,
) -> dict[str, Any]:
if self._runtime_capabilities.supports_guest_exec:
guest_cid = int(instance.metadata["guest_cid"])
port = int(instance.metadata["guest_exec_port"])
uds_path = instance.metadata.get("guest_exec_uds_path")
payload = self._guest_exec_client.list_workspace_entries(
guest_cid,
port,
workspace_path=workspace_path,
recursive=recursive,
uds_path=uds_path,
)
payload["execution_mode"] = instance.metadata.get("execution_mode", "pending")
return payload
instance.metadata["execution_mode"] = "host_compat"
listing = list_workspace_files(
_instance_workspace_host_dir(instance),
workspace_path=workspace_path,
recursive=recursive,
)
return {
"path": listing.path,
"artifact_type": listing.artifact_type,
"entries": [entry.to_payload() for entry in listing.entries],
"execution_mode": "host_compat",
}
def read_workspace_file(
self,
instance: VmInstance,
*,
workspace_path: str,
max_bytes: int,
) -> dict[str, Any]:
if self._runtime_capabilities.supports_guest_exec:
guest_cid = int(instance.metadata["guest_cid"])
port = int(instance.metadata["guest_exec_port"])
uds_path = instance.metadata.get("guest_exec_uds_path")
payload = self._guest_exec_client.read_workspace_file(
guest_cid,
port,
workspace_path=workspace_path,
max_bytes=max_bytes,
uds_path=uds_path,
)
payload["execution_mode"] = instance.metadata.get("execution_mode", "pending")
return payload
instance.metadata["execution_mode"] = "host_compat"
file_result = read_workspace_file(
_instance_workspace_host_dir(instance),
workspace_path=workspace_path,
max_bytes=max_bytes,
)
return {
"path": file_result.path,
"size_bytes": file_result.size_bytes,
"content_bytes": file_result.content_bytes,
"execution_mode": "host_compat",
}
def write_workspace_file(
self,
instance: VmInstance,
*,
workspace_path: str,
text: str,
) -> dict[str, Any]:
if self._runtime_capabilities.supports_guest_exec:
guest_cid = int(instance.metadata["guest_cid"])
port = int(instance.metadata["guest_exec_port"])
uds_path = instance.metadata.get("guest_exec_uds_path")
payload = self._guest_exec_client.write_workspace_file(
guest_cid,
port,
workspace_path=workspace_path,
text=text,
uds_path=uds_path,
)
payload["execution_mode"] = instance.metadata.get("execution_mode", "pending")
return payload
instance.metadata["execution_mode"] = "host_compat"
result = write_workspace_file(
_instance_workspace_host_dir(instance),
workspace_path=workspace_path,
text=text,
)
return {
"path": result.path,
"size_bytes": result.size_bytes,
"bytes_written": result.bytes_written,
"execution_mode": "host_compat",
}
def delete_workspace_path(
self,
instance: VmInstance,
*,
workspace_path: str,
) -> dict[str, Any]:
if self._runtime_capabilities.supports_guest_exec:
guest_cid = int(instance.metadata["guest_cid"])
port = int(instance.metadata["guest_exec_port"])
uds_path = instance.metadata.get("guest_exec_uds_path")
payload = self._guest_exec_client.delete_workspace_path(
guest_cid,
port,
workspace_path=workspace_path,
uds_path=uds_path,
)
payload["execution_mode"] = instance.metadata.get("execution_mode", "pending")
return payload
instance.metadata["execution_mode"] = "host_compat"
result = delete_workspace_path(
_instance_workspace_host_dir(instance),
workspace_path=workspace_path,
)
return {
"path": result.path,
"deleted": result.deleted,
"execution_mode": "host_compat",
}
def open_shell(
self,
instance: VmInstance,
*,
workspace_id: str,
shell_id: str,
cwd: str,
cols: int,
rows: int,
env: dict[str, str] | None = None,
redact_values: list[str] | None = None,
) -> dict[str, Any]:
del workspace_id
guest_cid = int(instance.metadata["guest_cid"])
port = int(instance.metadata["guest_exec_port"])
uds_path = instance.metadata.get("guest_exec_uds_path")
response = self._guest_exec_client.open_shell(
guest_cid,
port,
shell_id=shell_id,
cwd=cwd,
cols=cols,
rows=rows,
env=env,
redact_values=redact_values,
uds_path=uds_path,
)
return {
"shell_id": response.shell_id or shell_id,
"cwd": response.cwd,
"cols": response.cols,
"rows": response.rows,
"state": response.state,
"started_at": response.started_at,
"ended_at": response.ended_at,
"exit_code": response.exit_code,
"execution_mode": instance.metadata.get("execution_mode", "pending"),
}
def read_shell(
self,
instance: VmInstance,
*,
workspace_id: str,
shell_id: str,
cursor: int,
max_chars: int,
) -> dict[str, Any]:
del workspace_id
guest_cid = int(instance.metadata["guest_cid"])
port = int(instance.metadata["guest_exec_port"])
uds_path = instance.metadata.get("guest_exec_uds_path")
response = self._guest_exec_client.read_shell(
guest_cid,
port,
shell_id=shell_id,
cursor=cursor,
max_chars=max_chars,
uds_path=uds_path,
)
return {
"shell_id": response.shell_id,
"cwd": response.cwd,
"cols": response.cols,
"rows": response.rows,
"state": response.state,
"started_at": response.started_at,
"ended_at": response.ended_at,
"exit_code": response.exit_code,
"cursor": response.cursor,
"next_cursor": response.next_cursor,
"output": response.output,
"truncated": response.truncated,
"execution_mode": instance.metadata.get("execution_mode", "pending"),
}
def write_shell(
self,
instance: VmInstance,
*,
workspace_id: str,
shell_id: str,
input_text: str,
append_newline: bool,
) -> dict[str, Any]:
del workspace_id
guest_cid = int(instance.metadata["guest_cid"])
port = int(instance.metadata["guest_exec_port"])
uds_path = instance.metadata.get("guest_exec_uds_path")
payload = self._guest_exec_client.write_shell(
guest_cid,
port,
shell_id=shell_id,
input_text=input_text,
append_newline=append_newline,
uds_path=uds_path,
)
payload["execution_mode"] = instance.metadata.get("execution_mode", "pending")
return payload
def signal_shell(
self,
instance: VmInstance,
*,
workspace_id: str,
shell_id: str,
signal_name: str,
) -> dict[str, Any]:
del workspace_id
guest_cid = int(instance.metadata["guest_cid"])
port = int(instance.metadata["guest_exec_port"])
uds_path = instance.metadata.get("guest_exec_uds_path")
payload = self._guest_exec_client.signal_shell(
guest_cid,
port,
shell_id=shell_id,
signal_name=signal_name,
uds_path=uds_path,
)
payload["execution_mode"] = instance.metadata.get("execution_mode", "pending")
return payload
def close_shell(
self,
instance: VmInstance,
*,
workspace_id: str,
shell_id: str,
) -> dict[str, Any]:
del workspace_id
guest_cid = int(instance.metadata["guest_cid"])
port = int(instance.metadata["guest_exec_port"])
uds_path = instance.metadata.get("guest_exec_uds_path")
payload = self._guest_exec_client.close_shell(
guest_cid,
port,
shell_id=shell_id,
uds_path=uds_path,
)
payload["execution_mode"] = instance.metadata.get("execution_mode", "pending")
return payload
def start_service(
self,
instance: VmInstance,
*,
workspace_id: str,
service_name: str,
command: str,
cwd: str,
readiness: dict[str, Any] | None,
ready_timeout_seconds: int,
ready_interval_ms: int,
env: dict[str, str] | None = None,
) -> dict[str, Any]:
if self._runtime_capabilities.supports_guest_exec:
guest_cid = int(instance.metadata["guest_cid"])
port = int(instance.metadata["guest_exec_port"])
uds_path = instance.metadata.get("guest_exec_uds_path")
payload = self._guest_exec_client.start_service(
guest_cid,
port,
service_name=service_name,
command=command,
cwd=cwd,
readiness=readiness,
ready_timeout_seconds=ready_timeout_seconds,
ready_interval_ms=ready_interval_ms,
env=env,
uds_path=uds_path,
)
payload["execution_mode"] = instance.metadata.get("execution_mode", "pending")
return payload
if not instance.allow_host_compat:
raise RuntimeError("services require guest execution or explicit host compatibility")
service = _start_local_service(
services_dir=instance.workdir.parent / WORKSPACE_SERVICES_DIRNAME,
workspace_dir=_instance_workspace_host_dir(instance),
workspace_id=workspace_id,
service_name=service_name,
command=command,
cwd_text=cwd,
readiness=readiness,
ready_timeout_seconds=ready_timeout_seconds,
ready_interval_ms=ready_interval_ms,
env_overrides=env,
)
return service.to_payload()
def status_service(
self,
instance: VmInstance,
*,
workspace_id: str,
service_name: str,
) -> dict[str, Any]:
if self._runtime_capabilities.supports_guest_exec:
guest_cid = int(instance.metadata["guest_cid"])
port = int(instance.metadata["guest_exec_port"])
uds_path = instance.metadata.get("guest_exec_uds_path")
payload = self._guest_exec_client.status_service(
guest_cid,
port,
service_name=service_name,
uds_path=uds_path,
)
payload["execution_mode"] = instance.metadata.get("execution_mode", "pending")
return payload
if not instance.allow_host_compat:
raise RuntimeError("services require guest execution or explicit host compatibility")
services_dir = instance.workdir.parent / WORKSPACE_SERVICES_DIRNAME
record_path = services_dir / f"{service_name}.json"
if not record_path.exists():
raise ValueError(
f"service {service_name!r} does not exist in workspace {workspace_id!r}"
)
payload = json.loads(record_path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
raise RuntimeError(f"service record at {record_path} is invalid")
service = WorkspaceServiceRecord.from_payload(payload)
refreshed = _refresh_local_service_record(service, services_dir=services_dir)
return refreshed.to_payload()
def logs_service(
self,
instance: VmInstance,
*,
workspace_id: str,
service_name: str,
tail_lines: int | None,
) -> dict[str, Any]:
if self._runtime_capabilities.supports_guest_exec:
guest_cid = int(instance.metadata["guest_cid"])
port = int(instance.metadata["guest_exec_port"])
uds_path = instance.metadata.get("guest_exec_uds_path")
payload = self._guest_exec_client.logs_service(
guest_cid,
port,
service_name=service_name,
tail_lines=tail_lines,
uds_path=uds_path,
)
payload["execution_mode"] = instance.metadata.get("execution_mode", "pending")
return payload
if not instance.allow_host_compat:
raise RuntimeError("services require guest execution or explicit host compatibility")
services_dir = instance.workdir.parent / WORKSPACE_SERVICES_DIRNAME
record_path = services_dir / f"{service_name}.json"
if not record_path.exists():
raise ValueError(
f"service {service_name!r} does not exist in workspace {workspace_id!r}"
)
payload = json.loads(record_path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
raise RuntimeError(f"service record at {record_path} is invalid")
service = _refresh_local_service_record(
WorkspaceServiceRecord.from_payload(payload),
services_dir=services_dir,
)
response = service.to_payload()
stdout, stdout_truncated = _tail_text(
_workspace_service_stdout_path(services_dir, service_name),
tail_lines=tail_lines,
)
stderr, stderr_truncated = _tail_text(
_workspace_service_stderr_path(services_dir, service_name),
tail_lines=tail_lines,
)
response.update(
{
"stdout": stdout,
"stderr": stderr,
"tail_lines": tail_lines,
"truncated": stdout_truncated or stderr_truncated,
}
)
return response
def stop_service(
self,
instance: VmInstance,
*,
workspace_id: str,
service_name: str,
) -> dict[str, Any]:
if self._runtime_capabilities.supports_guest_exec:
guest_cid = int(instance.metadata["guest_cid"])
port = int(instance.metadata["guest_exec_port"])
uds_path = instance.metadata.get("guest_exec_uds_path")
payload = self._guest_exec_client.stop_service(
guest_cid,
port,
service_name=service_name,
uds_path=uds_path,
)
payload["execution_mode"] = instance.metadata.get("execution_mode", "pending")
return payload
if not instance.allow_host_compat:
raise RuntimeError("services require guest execution or explicit host compatibility")
services_dir = instance.workdir.parent / WORKSPACE_SERVICES_DIRNAME
record_path = services_dir / f"{service_name}.json"
if not record_path.exists():
raise ValueError(
f"service {service_name!r} does not exist in workspace {workspace_id!r}"
)
payload = json.loads(record_path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
raise RuntimeError(f"service record at {record_path} is invalid")
service = WorkspaceServiceRecord.from_payload(payload)
stopped = _stop_local_service(service, services_dir=services_dir)
return stopped.to_payload()
class VmManager:
"""In-process lifecycle manager for ephemeral VM environments and workspaces."""
MIN_VCPUS = 1
MAX_VCPUS = 8
MIN_MEM_MIB = 256
MAX_MEM_MIB = 32768
MIN_TTL_SECONDS = 60
MAX_TTL_SECONDS = 3600
DEFAULT_VCPU_COUNT = DEFAULT_VCPU_COUNT
DEFAULT_MEM_MIB = DEFAULT_MEM_MIB
DEFAULT_TIMEOUT_SECONDS = DEFAULT_TIMEOUT_SECONDS
DEFAULT_TTL_SECONDS = DEFAULT_TTL_SECONDS
DEFAULT_ALLOW_HOST_COMPAT = DEFAULT_ALLOW_HOST_COMPAT
def __init__(
self,
*,
backend_name: str | None = None,
base_dir: Path | None = None,
cache_dir: Path | None = None,
max_active_vms: int = 4,
runtime_paths: RuntimePaths | None = None,
network_manager: TapNetworkManager | None = None,
) -> None:
self._backend_name = backend_name or "firecracker"
self._base_dir = base_dir or Path("/tmp/pyro-mcp")
self._workspaces_dir = self._base_dir / "workspaces"
resolved_cache_dir = cache_dir or default_cache_dir()
self._runtime_paths = runtime_paths
if self._backend_name == "firecracker":
self._runtime_paths = self._runtime_paths or resolve_runtime_paths()
self._runtime_capabilities = runtime_capabilities(self._runtime_paths)
self._environment_store = EnvironmentStore(
runtime_paths=self._runtime_paths,
cache_dir=resolved_cache_dir,
)
else:
self._runtime_capabilities = RuntimeCapabilities(
supports_vm_boot=False,
supports_guest_exec=False,
supports_guest_network=False,
reason="mock backend does not boot a guest",
)
if self._runtime_paths is None:
self._runtime_paths = resolve_runtime_paths(verify_checksums=False)
self._environment_store = EnvironmentStore(
runtime_paths=self._runtime_paths,
cache_dir=resolved_cache_dir,
)
self._max_active_vms = max_active_vms
if network_manager is not None:
self._network_manager = network_manager
elif self._backend_name == "firecracker":
self._network_manager = TapNetworkManager(enabled=True)
else:
self._network_manager = TapNetworkManager(enabled=False)
self._lock = threading.Lock()
self._instances: dict[str, VmInstance] = {}
self._base_dir.mkdir(parents=True, exist_ok=True)
self._workspaces_dir.mkdir(parents=True, exist_ok=True)
self._backend = self._build_backend()
def _build_backend(self) -> VmBackend:
if self._backend_name == "mock":
return MockBackend()
if self._backend_name == "firecracker":
if self._runtime_paths is None:
raise RuntimeError("runtime paths were not initialized for firecracker backend")
return FirecrackerBackend(
self._environment_store,
firecracker_bin=self._runtime_paths.firecracker_bin,
jailer_bin=self._runtime_paths.jailer_bin,
runtime_capabilities=self._runtime_capabilities,
network_manager=self._network_manager,
)
raise ValueError("invalid backend; expected one of: mock, firecracker")
def list_environments(self) -> list[dict[str, object]]:
return self._environment_store.list_environments()
def pull_environment(self, environment: str) -> dict[str, object]:
return self._environment_store.pull_environment(environment)
def inspect_environment(self, environment: str) -> dict[str, object]:
return self._environment_store.inspect_environment(environment)
def prune_environments(self) -> dict[str, object]:
return self._environment_store.prune_environments()
def create_vm(
self,
*,
environment: str,
vcpu_count: int = DEFAULT_VCPU_COUNT,
mem_mib: int = DEFAULT_MEM_MIB,
ttl_seconds: int = DEFAULT_TTL_SECONDS,
network: bool = False,
allow_host_compat: bool = DEFAULT_ALLOW_HOST_COMPAT,
) -> dict[str, Any]:
self._validate_limits(vcpu_count=vcpu_count, mem_mib=mem_mib, ttl_seconds=ttl_seconds)
get_environment(environment, runtime_paths=self._runtime_paths)
now = time.time()
with self._lock:
self._reap_expired_locked(now)
self._reap_expired_workspaces_locked(now)
active_count = len(self._instances) + self._count_workspaces_locked()
if active_count >= self._max_active_vms:
raise RuntimeError(
f"max active VMs reached ({self._max_active_vms}); delete old VMs first"
)
vm_id = uuid.uuid4().hex[:12]
instance = VmInstance(
vm_id=vm_id,
environment=environment,
vcpu_count=vcpu_count,
mem_mib=mem_mib,
ttl_seconds=ttl_seconds,
created_at=now,
expires_at=now + ttl_seconds,
workdir=self._base_dir / vm_id,
network_requested=network,
allow_host_compat=allow_host_compat,
)
instance.metadata["allow_host_compat"] = str(allow_host_compat).lower()
self._backend.create(instance)
self._instances[vm_id] = instance
return self._serialize(instance)
def run_vm(
self,
*,
environment: str,
command: str,
vcpu_count: int = DEFAULT_VCPU_COUNT,
mem_mib: int = DEFAULT_MEM_MIB,
timeout_seconds: int = DEFAULT_TIMEOUT_SECONDS,
ttl_seconds: int = DEFAULT_TTL_SECONDS,
network: bool = False,
allow_host_compat: bool = DEFAULT_ALLOW_HOST_COMPAT,
) -> dict[str, Any]:
created = self.create_vm(
environment=environment,
vcpu_count=vcpu_count,
mem_mib=mem_mib,
ttl_seconds=ttl_seconds,
network=network,
allow_host_compat=allow_host_compat,
)
vm_id = str(created["vm_id"])
try:
self.start_vm(vm_id)
return self.exec_vm(vm_id, command=command, timeout_seconds=timeout_seconds)
except Exception:
try:
self.delete_vm(vm_id, reason="run_vm_error_cleanup")
except ValueError:
pass
raise
def start_vm(self, vm_id: str) -> dict[str, Any]:
with self._lock:
instance = self._get_instance_locked(vm_id)
self._ensure_not_expired_locked(instance, time.time())
self._start_instance_locked(instance)
return self._serialize(instance)
def exec_vm(self, vm_id: str, *, command: str, timeout_seconds: int) -> dict[str, Any]:
with self._lock:
instance = self._get_instance_locked(vm_id)
self._ensure_not_expired_locked(instance, time.time())
exec_instance = instance
exec_result, execution_mode = self._exec_instance(
exec_instance,
command=command,
timeout_seconds=timeout_seconds,
)
cleanup = self.delete_vm(vm_id, reason="post_exec_cleanup")
return {
"vm_id": vm_id,
"environment": exec_instance.environment,
"environment_version": exec_instance.metadata.get("environment_version"),
"command": command,
"stdout": exec_result.stdout,
"stderr": exec_result.stderr,
"exit_code": exec_result.exit_code,
"duration_ms": exec_result.duration_ms,
"execution_mode": execution_mode,
"cleanup": cleanup,
}
def stop_vm(self, vm_id: str) -> dict[str, Any]:
with self._lock:
instance = self._get_instance_locked(vm_id)
self._backend.stop(instance)
instance.state = "stopped"
return self._serialize(instance)
def delete_vm(self, vm_id: str, *, reason: str = "explicit_delete") -> dict[str, Any]:
with self._lock:
instance = self._get_instance_locked(vm_id)
if instance.state == "started":
self._backend.stop(instance)
instance.state = "stopped"
self._backend.delete(instance)
del self._instances[vm_id]
return {"vm_id": vm_id, "deleted": True, "reason": reason}
def status_vm(self, vm_id: str) -> dict[str, Any]:
with self._lock:
instance = self._get_instance_locked(vm_id)
self._ensure_not_expired_locked(instance, time.time())
return self._serialize(instance)
def network_info_vm(self, vm_id: str) -> dict[str, Any]:
with self._lock:
instance = self._get_instance_locked(vm_id)
self._ensure_not_expired_locked(instance, time.time())
if instance.network is None:
return {
"vm_id": vm_id,
"network_enabled": False,
"outbound_connectivity_expected": False,
"reason": "network configuration is unavailable for this VM",
}
return {"vm_id": vm_id, **self._network_manager.network_info(instance.network)}
def reap_expired(self) -> dict[str, Any]:
now = time.time()
with self._lock:
expired_vm_ids = [
vm_id for vm_id, inst in self._instances.items() if inst.expires_at <= now
]
for vm_id in expired_vm_ids:
instance = self._instances[vm_id]
if instance.state == "started":
self._backend.stop(instance)
instance.state = "stopped"
self._backend.delete(instance)
del self._instances[vm_id]
return {"deleted_vm_ids": expired_vm_ids, "count": len(expired_vm_ids)}
def create_workspace(
self,
*,
environment: str,
vcpu_count: int = DEFAULT_VCPU_COUNT,
mem_mib: int = DEFAULT_MEM_MIB,
ttl_seconds: int = DEFAULT_TTL_SECONDS,
network_policy: WorkspaceNetworkPolicy | str = DEFAULT_WORKSPACE_NETWORK_POLICY,
allow_host_compat: bool = DEFAULT_ALLOW_HOST_COMPAT,
seed_path: str | Path | None = None,
secrets: list[dict[str, str]] | None = None,
name: str | None = None,
labels: dict[str, str] | None = None,
) -> dict[str, Any]:
self._validate_limits(vcpu_count=vcpu_count, mem_mib=mem_mib, ttl_seconds=ttl_seconds)
get_environment(environment, runtime_paths=self._runtime_paths)
normalized_network_policy = _normalize_workspace_network_policy(str(network_policy))
normalized_name = None if name is None else _normalize_workspace_name(name)
normalized_labels = _normalize_workspace_labels(labels)
prepared_seed = self._prepare_workspace_seed(seed_path)
now = time.time()
workspace_id = uuid.uuid4().hex[:12]
workspace_dir = self._workspace_dir(workspace_id)
runtime_dir = self._workspace_runtime_dir(workspace_id)
host_workspace_dir = self._workspace_host_dir(workspace_id)
commands_dir = self._workspace_commands_dir(workspace_id)
shells_dir = self._workspace_shells_dir(workspace_id)
services_dir = self._workspace_services_dir(workspace_id)
secrets_dir = self._workspace_secrets_dir(workspace_id)
snapshots_dir = self._workspace_snapshots_dir(workspace_id)
baseline_archive_path = self._workspace_baseline_archive_path(workspace_id)
workspace_dir.mkdir(parents=True, exist_ok=False)
host_workspace_dir.mkdir(parents=True, exist_ok=True)
commands_dir.mkdir(parents=True, exist_ok=True)
shells_dir.mkdir(parents=True, exist_ok=True)
services_dir.mkdir(parents=True, exist_ok=True)
secrets_dir.mkdir(parents=True, exist_ok=True)
snapshots_dir.mkdir(parents=True, exist_ok=True)
secret_records, _ = _prepare_workspace_secrets(secrets, secrets_dir=secrets_dir)
_persist_workspace_baseline(
prepared_seed,
baseline_archive_path=baseline_archive_path,
)
instance = VmInstance(
vm_id=workspace_id,
environment=environment,
vcpu_count=vcpu_count,
mem_mib=mem_mib,
ttl_seconds=ttl_seconds,
created_at=now,
expires_at=now + ttl_seconds,
workdir=runtime_dir,
network_requested=normalized_network_policy != "off",
allow_host_compat=allow_host_compat,
)
instance.metadata["allow_host_compat"] = str(allow_host_compat).lower()
instance.metadata["workspace_path"] = WORKSPACE_GUEST_PATH
instance.metadata["workspace_host_dir"] = str(host_workspace_dir)
instance.metadata["network_policy"] = normalized_network_policy
try:
with self._lock:
self._reap_expired_locked(now)
self._reap_expired_workspaces_locked(now)
active_count = len(self._instances) + self._count_workspaces_locked()
if active_count >= self._max_active_vms:
raise RuntimeError(
f"max active VMs reached ({self._max_active_vms}); delete old VMs first"
)
self._require_workspace_network_policy_support(
network_policy=normalized_network_policy
)
self._backend.create(instance)
if self._runtime_capabilities.supports_guest_exec:
self._ensure_workspace_guest_bootstrap_support(instance)
with self._lock:
self._start_instance_locked(instance)
workspace = WorkspaceRecord.from_instance(
instance,
network_policy=normalized_network_policy,
workspace_seed=prepared_seed.to_payload(),
secrets=secret_records,
name=normalized_name,
labels=normalized_labels,
)
if workspace.secrets:
self._install_workspace_secrets_locked(workspace, instance)
self._require_guest_exec_or_opt_in(instance)
import_summary = self._backend.import_archive(
instance,
archive_path=baseline_archive_path,
destination=WORKSPACE_GUEST_PATH,
)
workspace_seed = dict(workspace.workspace_seed)
workspace_seed["entry_count"] = int(import_summary["entry_count"])
workspace_seed["bytes_written"] = int(import_summary["bytes_written"])
workspace_seed["destination"] = str(import_summary["destination"])
workspace.workspace_seed = workspace_seed
self._save_workspace_locked(workspace)
return self._serialize_workspace(workspace)
except Exception:
if runtime_dir.exists():
try:
if instance.state == "started":
self._backend.stop(instance)
instance.state = "stopped"
except Exception:
pass
try:
self._backend.delete(instance)
except Exception:
pass
shutil.rmtree(workspace_dir, ignore_errors=True)
raise
finally:
prepared_seed.cleanup()
def push_workspace_sync(
self,
workspace_id: str,
*,
source_path: str | Path,
dest: str = WORKSPACE_GUEST_PATH,
) -> dict[str, Any]:
prepared_seed = self._prepare_workspace_seed(source_path)
if prepared_seed.archive_path is None:
prepared_seed.cleanup()
raise ValueError("source_path is required")
normalized_destination, _ = _normalize_workspace_destination(dest)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
self._ensure_workspace_not_expired_locked(workspace, time.time())
self._refresh_workspace_liveness_locked(workspace)
if workspace.state != "started":
raise RuntimeError(
f"workspace {workspace_id} must be in 'started' state "
"before workspace_sync_push"
)
instance = workspace.to_instance(
workdir=self._workspace_runtime_dir(workspace.workspace_id)
)
try:
import_summary = self._backend.import_archive(
instance,
archive_path=prepared_seed.archive_path,
destination=normalized_destination,
)
finally:
prepared_seed.cleanup()
workspace_sync = prepared_seed.to_payload(
destination=normalized_destination,
path_key="source_path",
)
workspace_sync["entry_count"] = int(import_summary["entry_count"])
workspace_sync["bytes_written"] = int(import_summary["bytes_written"])
workspace_sync["destination"] = str(import_summary["destination"])
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = instance.last_error
workspace.metadata = dict(instance.metadata)
self._touch_workspace_activity_locked(workspace)
self._save_workspace_locked(workspace)
return {
"workspace_id": workspace_id,
"execution_mode": instance.metadata.get("execution_mode", "pending"),
"workspace_sync": workspace_sync,
}
def list_workspaces(self) -> dict[str, Any]:
with self._lock:
now = time.time()
self._reap_expired_workspaces_locked(now)
workspaces: list[WorkspaceRecord] = []
for metadata_path in self._workspaces_dir.glob("*/workspace.json"):
payload = json.loads(metadata_path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
continue
workspace = WorkspaceRecord.from_payload(payload)
self._refresh_workspace_liveness_locked(workspace)
self._refresh_workspace_service_counts_locked(workspace)
self._save_workspace_locked(workspace)
workspaces.append(workspace)
workspaces.sort(
key=lambda item: (
-item.last_activity_at,
-item.created_at,
item.workspace_id,
)
)
return {
"count": len(workspaces),
"workspaces": [
self._serialize_workspace_list_item(workspace) for workspace in workspaces
],
}
def update_workspace(
self,
workspace_id: str,
*,
name: str | None = None,
clear_name: bool = False,
labels: dict[str, str] | None = None,
clear_labels: list[str] | None = None,
) -> dict[str, Any]:
if name is not None and clear_name:
raise ValueError("name and clear_name cannot be used together")
normalized_name = None if name is None else _normalize_workspace_name(name)
normalized_labels = None if labels is None else _normalize_workspace_labels(labels)
normalized_clear_labels = [
_normalize_workspace_label_key(label_key) for label_key in (clear_labels or [])
]
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
self._ensure_workspace_not_expired_locked(workspace, time.time())
updated = False
if clear_name:
if workspace.name is not None:
workspace.name = None
updated = True
elif normalized_name is not None and workspace.name != normalized_name:
workspace.name = normalized_name
updated = True
if normalized_labels is not None:
for label_key, label_value in normalized_labels.items():
if workspace.labels.get(label_key) != label_value:
workspace.labels[label_key] = label_value
updated = True
for label_key in normalized_clear_labels:
if label_key in workspace.labels:
del workspace.labels[label_key]
updated = True
workspace.labels = dict(sorted(workspace.labels.items()))
if not updated:
raise ValueError("workspace update requested no effective metadata change")
self._touch_workspace_activity_locked(workspace)
self._save_workspace_locked(workspace)
return self._serialize_workspace(workspace)
def export_workspace(
self,
workspace_id: str,
*,
path: str,
output_path: str | Path,
) -> dict[str, Any]:
normalized_path, _ = _normalize_workspace_destination(path)
raw_output_path = str(output_path).strip()
if raw_output_path == "":
raise ValueError("output_path must not be empty")
resolved_output_path = Path(output_path).expanduser().resolve()
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
instance = self._workspace_instance_for_live_operation_locked(
workspace,
operation_name="workspace_export",
)
with tempfile.TemporaryDirectory(prefix="pyro-workspace-export-") as temp_dir:
archive_path = Path(temp_dir) / "workspace-export.tar"
exported = self._backend.export_archive(
instance,
workspace_path=normalized_path,
archive_path=archive_path,
)
extracted = _extract_workspace_export_archive(
archive_path,
output_path=resolved_output_path,
artifact_type=cast(WorkspaceArtifactType, str(exported["artifact_type"])),
)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = instance.last_error
workspace.metadata = dict(instance.metadata)
self._save_workspace_locked(workspace)
return {
"workspace_id": workspace_id,
"workspace_path": normalized_path,
"output_path": str(Path(str(extracted["output_path"]))),
"artifact_type": extracted["artifact_type"],
"entry_count": int(extracted["entry_count"]),
"bytes_written": int(extracted["bytes_written"]),
"execution_mode": str(
exported.get("execution_mode", instance.metadata.get("execution_mode", "pending"))
),
}
def diff_workspace(self, workspace_id: str) -> dict[str, Any]:
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
instance = self._workspace_instance_for_live_operation_locked(
workspace,
operation_name="workspace_diff",
)
baseline_archive_path = self._workspace_baseline_archive_path(workspace_id)
if not baseline_archive_path.exists():
raise RuntimeError(
"workspace diff requires a baseline snapshot. Recreate the workspace to use diff."
)
with tempfile.TemporaryDirectory(prefix="pyro-workspace-diff-") as temp_dir:
temp_root = Path(temp_dir)
current_archive_path = temp_root / "current.tar"
baseline_root = temp_root / "baseline"
current_root = temp_root / "current"
self._backend.export_archive(
instance,
workspace_path=WORKSPACE_GUEST_PATH,
archive_path=current_archive_path,
)
_extract_seed_archive_to_host_workspace(
baseline_archive_path,
workspace_dir=baseline_root,
destination=WORKSPACE_GUEST_PATH,
)
_extract_seed_archive_to_host_workspace(
current_archive_path,
workspace_dir=current_root,
destination=WORKSPACE_GUEST_PATH,
)
diff_payload = _diff_workspace_trees(baseline_root, current_root)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = instance.last_error
workspace.metadata = dict(instance.metadata)
self._save_workspace_locked(workspace)
diff_payload["workspace_id"] = workspace_id
return diff_payload
def list_workspace_files(
self,
workspace_id: str,
*,
path: str = WORKSPACE_GUEST_PATH,
recursive: bool = False,
) -> dict[str, Any]:
normalized_path = _normalize_workspace_file_path(path)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
instance = self._workspace_instance_for_live_operation_locked(
workspace,
operation_name="workspace_file_list",
)
listing = self._backend.list_workspace_entries(
instance,
workspace_path=normalized_path,
recursive=recursive,
)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = instance.last_error
workspace.metadata = dict(instance.metadata)
self._save_workspace_locked(workspace)
return {
"workspace_id": workspace_id,
"path": str(listing["path"]),
"recursive": recursive,
"entries": cast(list[dict[str, Any]], list(listing.get("entries", []))),
"execution_mode": str(
listing.get("execution_mode", instance.metadata.get("execution_mode", "pending"))
),
}
def read_workspace_file(
self,
workspace_id: str,
path: str,
*,
max_bytes: int = DEFAULT_WORKSPACE_FILE_READ_MAX_BYTES,
) -> dict[str, Any]:
normalized_path = _normalize_workspace_file_path(path)
normalized_max_bytes = _validate_workspace_file_read_max_bytes(max_bytes)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
instance = self._workspace_instance_for_live_operation_locked(
workspace,
operation_name="workspace_file_read",
)
payload = self._backend.read_workspace_file(
instance,
workspace_path=normalized_path,
max_bytes=WORKSPACE_FILE_MAX_BYTES,
)
raw_bytes = cast(bytes, payload["content_bytes"])
content = raw_bytes[:normalized_max_bytes].decode("utf-8", errors="replace")
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = instance.last_error
workspace.metadata = dict(instance.metadata)
self._touch_workspace_activity_locked(workspace)
self._save_workspace_locked(workspace)
return {
"workspace_id": workspace_id,
"path": str(payload["path"]),
"size_bytes": int(payload["size_bytes"]),
"max_bytes": normalized_max_bytes,
"content": content,
"truncated": len(raw_bytes) > normalized_max_bytes,
"execution_mode": str(
payload.get("execution_mode", instance.metadata.get("execution_mode", "pending"))
),
}
def write_workspace_file(
self,
workspace_id: str,
path: str,
*,
text: str,
) -> dict[str, Any]:
normalized_path = _normalize_workspace_file_path(path)
normalized_text = _validate_workspace_text_payload(text, field_name="text")
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
instance = self._workspace_instance_for_live_operation_locked(
workspace,
operation_name="workspace_file_write",
)
payload = self._backend.write_workspace_file(
instance,
workspace_path=normalized_path,
text=normalized_text,
)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = instance.last_error
workspace.metadata = dict(instance.metadata)
self._save_workspace_locked(workspace)
return {
"workspace_id": workspace_id,
"path": str(payload["path"]),
"size_bytes": int(payload["size_bytes"]),
"bytes_written": int(payload["bytes_written"]),
"execution_mode": str(
payload.get("execution_mode", instance.metadata.get("execution_mode", "pending"))
),
}
def apply_workspace_patch(
self,
workspace_id: str,
*,
patch: str,
) -> dict[str, Any]:
patch_text = _validate_workspace_patch_text(patch)
parsed_patches = parse_unified_text_patch(patch_text)
patch_by_path: dict[str, WorkspaceTextPatch] = {}
for text_patch in parsed_patches:
if text_patch.path in patch_by_path:
raise ValueError(f"patch contains duplicate file entries for {text_patch.path}")
patch_by_path[text_patch.path] = text_patch
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
instance = self._workspace_instance_for_live_operation_locked(
workspace,
operation_name="workspace_patch_apply",
)
planned_writes: dict[str, str] = {}
planned_deletes: list[str] = []
summary = {
"total": 0,
"added": 0,
"modified": 0,
"deleted": 0,
}
entries: list[dict[str, str]] = []
for path_text in sorted(patch_by_path):
file_patch = patch_by_path[path_text]
listing: dict[str, Any] | None = None
current_text: str | None = None
exists = True
try:
listing = self._backend.list_workspace_entries(
instance,
workspace_path=file_patch.path,
recursive=False,
)
except RuntimeError as exc:
if "does not exist" in str(exc):
exists = False
else:
raise
if exists:
if listing is None:
raise RuntimeError(
f"workspace patch could not inspect current path: {file_patch.path}"
)
artifact_type = str(listing["artifact_type"])
if artifact_type != "file":
raise RuntimeError(
f"workspace patch only supports regular files: {file_patch.path}"
)
current_payload = self._backend.read_workspace_file(
instance,
workspace_path=file_patch.path,
max_bytes=WORKSPACE_FILE_MAX_BYTES,
)
current_text = _decode_workspace_patch_text(
file_patch.path,
cast(bytes, current_payload["content_bytes"]),
)
if file_patch.status == "added" and exists:
raise RuntimeError(
f"workspace patch cannot add an existing path: {file_patch.path}"
)
if file_patch.status in {"modified", "deleted"} and not exists:
raise RuntimeError(
f"workspace patch cannot modify a missing path: {file_patch.path}"
)
after_text = apply_unified_text_patch(
path=file_patch.path,
patch=file_patch,
before_text=current_text,
)
if after_text is None:
planned_deletes.append(file_patch.path)
else:
planned_writes[file_patch.path] = after_text
summary["total"] += 1
summary[file_patch.status] += 1
entries.append({"path": file_patch.path, "status": file_patch.status})
for path_text in sorted(planned_writes):
self._backend.write_workspace_file(
instance,
workspace_path=path_text,
text=planned_writes[path_text],
)
for path_text in sorted(planned_deletes):
self._backend.delete_workspace_path(
instance,
workspace_path=path_text,
)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = instance.last_error
workspace.metadata = dict(instance.metadata)
self._touch_workspace_activity_locked(workspace)
self._save_workspace_locked(workspace)
return {
"workspace_id": workspace_id,
"changed": bool(entries),
"summary": summary,
"entries": entries,
"patch": patch_text,
"execution_mode": instance.metadata.get("execution_mode", "pending"),
}
def create_snapshot(
self,
workspace_id: str,
snapshot_name: str,
) -> dict[str, Any]:
normalized_snapshot_name = _normalize_workspace_snapshot_name(snapshot_name)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
self._ensure_workspace_not_expired_locked(workspace, time.time())
self._workspace_baseline_snapshot_locked(workspace)
if (
self._load_workspace_snapshot_locked_optional(
workspace_id,
normalized_snapshot_name,
)
is not None
):
raise ValueError(
f"snapshot {normalized_snapshot_name!r} already exists in workspace "
f"{workspace_id!r}"
)
instance = self._workspace_instance_for_live_operation_locked(
workspace,
operation_name="workspace_snapshot_create",
)
with tempfile.TemporaryDirectory(prefix="pyro-workspace-snapshot-") as temp_dir:
temp_archive_path = Path(temp_dir) / f"{normalized_snapshot_name}.tar"
exported = self._backend.export_archive(
instance,
workspace_path=WORKSPACE_GUEST_PATH,
archive_path=temp_archive_path,
)
snapshot = WorkspaceSnapshotRecord(
workspace_id=workspace_id,
snapshot_name=normalized_snapshot_name,
kind="named",
created_at=time.time(),
entry_count=int(exported["entry_count"]),
bytes_written=int(exported["bytes_written"]),
)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
self._ensure_workspace_not_expired_locked(workspace, time.time())
if (
self._load_workspace_snapshot_locked_optional(
workspace_id,
normalized_snapshot_name,
)
is not None
):
raise ValueError(
f"snapshot {normalized_snapshot_name!r} already exists in workspace "
f"{workspace_id!r}"
)
archive_path = self._workspace_snapshot_archive_path(
workspace_id,
normalized_snapshot_name,
)
archive_path.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(temp_archive_path, archive_path)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = instance.last_error
workspace.metadata = dict(instance.metadata)
self._touch_workspace_activity_locked(workspace)
self._save_workspace_locked(workspace)
self._save_workspace_snapshot_locked(snapshot)
return {
"workspace_id": workspace_id,
"snapshot": self._serialize_workspace_snapshot(snapshot),
"execution_mode": instance.metadata.get("execution_mode", "pending"),
}
def list_snapshots(self, workspace_id: str) -> dict[str, Any]:
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
self._ensure_workspace_not_expired_locked(workspace, time.time())
snapshots = self._list_workspace_snapshots_locked(workspace)
return {
"workspace_id": workspace_id,
"count": len(snapshots),
"snapshots": [
self._serialize_workspace_snapshot(snapshot) for snapshot in snapshots
],
}
def delete_snapshot(self, workspace_id: str, snapshot_name: str) -> dict[str, Any]:
normalized_snapshot_name = _normalize_workspace_snapshot_name(
snapshot_name,
allow_baseline=True,
)
if normalized_snapshot_name == "baseline":
raise ValueError("cannot delete the baseline snapshot")
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
self._ensure_workspace_not_expired_locked(workspace, time.time())
self._workspace_baseline_snapshot_locked(workspace)
self._load_workspace_snapshot_locked(workspace_id, normalized_snapshot_name)
self._delete_workspace_snapshot_locked(workspace_id, normalized_snapshot_name)
self._touch_workspace_activity_locked(workspace)
self._save_workspace_locked(workspace)
return {
"workspace_id": workspace_id,
"snapshot_name": normalized_snapshot_name,
"deleted": True,
}
def reset_workspace(
self,
workspace_id: str,
*,
snapshot: str = "baseline",
) -> dict[str, Any]:
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
self._ensure_workspace_not_expired_locked(workspace, time.time())
self._refresh_workspace_liveness_locked(workspace)
selected_snapshot, archive_path = self._resolve_workspace_snapshot_locked(
workspace,
snapshot,
)
instance = workspace.to_instance(
workdir=self._workspace_runtime_dir(workspace.workspace_id)
)
self._stop_workspace_services_locked(workspace, instance)
self._close_workspace_shells_locked(workspace, instance)
if workspace.state == "started":
self._backend.stop(instance)
workspace.state = "stopped"
self._backend.delete(instance)
workspace.state = "stopped"
workspace.firecracker_pid = None
workspace.last_error = None
self._reset_workspace_runtime_dirs(workspace_id)
self._save_workspace_locked(workspace)
recreated: VmInstance | None = None
try:
recreated = workspace.to_instance(
workdir=self._workspace_runtime_dir(workspace.workspace_id)
)
self._require_workspace_network_policy_support(
network_policy=workspace.network_policy
)
self._backend.create(recreated)
if self._runtime_capabilities.supports_guest_exec:
self._ensure_workspace_guest_bootstrap_support(recreated)
with self._lock:
self._start_instance_locked(recreated)
workspace = self._load_workspace_locked(workspace_id)
if workspace.secrets:
self._install_workspace_secrets_locked(workspace, recreated)
self._require_guest_exec_or_opt_in(recreated)
reset_summary = self._backend.import_archive(
recreated,
archive_path=archive_path,
destination=WORKSPACE_GUEST_PATH,
)
workspace = self._load_workspace_locked(workspace_id)
workspace.state = recreated.state
workspace.firecracker_pid = recreated.firecracker_pid
workspace.last_error = recreated.last_error
workspace.metadata = dict(recreated.metadata)
workspace.command_count = 0
workspace.last_command = None
workspace.reset_count += 1
workspace.last_reset_at = time.time()
self._touch_workspace_activity_locked(workspace, when=workspace.last_reset_at)
self._save_workspace_locked(workspace)
payload = self._serialize_workspace(workspace)
except Exception:
try:
if recreated is not None and recreated.state == "started":
self._backend.stop(recreated)
except Exception:
pass
try:
if recreated is not None:
self._backend.delete(recreated)
except Exception:
pass
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
workspace.state = "stopped"
workspace.firecracker_pid = None
workspace.last_error = None
self._save_workspace_locked(workspace)
raise
payload["workspace_reset"] = {
"snapshot_name": selected_snapshot.snapshot_name,
"kind": selected_snapshot.kind,
"destination": str(reset_summary["destination"]),
"entry_count": int(reset_summary["entry_count"]),
"bytes_written": int(reset_summary["bytes_written"]),
}
return payload
def exec_workspace(
self,
workspace_id: str,
*,
command: str,
timeout_seconds: int = 30,
secret_env: dict[str, str] | None = None,
) -> dict[str, Any]:
if timeout_seconds <= 0:
raise ValueError("timeout_seconds must be positive")
normalized_secret_env = _normalize_workspace_secret_env_mapping(secret_env)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
self._ensure_workspace_not_expired_locked(workspace, time.time())
self._refresh_workspace_liveness_locked(workspace)
if workspace.state != "started":
raise RuntimeError(
f"workspace {workspace_id} must be in 'started' state before workspace_exec"
)
instance = workspace.to_instance(
workdir=self._workspace_runtime_dir(workspace.workspace_id)
)
redact_values = self._workspace_secret_redact_values_locked(workspace)
env_values = self._workspace_secret_env_values_locked(workspace, normalized_secret_env)
if workspace.secrets and normalized_secret_env:
self._install_workspace_secrets_locked(workspace, instance)
try:
exec_result, execution_mode = self._exec_instance(
instance,
command=command,
timeout_seconds=timeout_seconds,
host_workdir=self._workspace_host_dir(workspace.workspace_id),
guest_cwd=WORKSPACE_GUEST_PATH,
env=env_values or None,
)
except Exception as exc:
raise _redact_exception(exc, redact_values) from exc
redacted_exec_result = VmExecResult(
stdout=_redact_text(exec_result.stdout, redact_values),
stderr=_redact_text(exec_result.stderr, redact_values),
exit_code=exec_result.exit_code,
duration_ms=exec_result.duration_ms,
)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = instance.last_error
workspace.metadata = dict(instance.metadata)
entry = self._record_workspace_command_locked(
workspace,
command=command,
exec_result=redacted_exec_result,
execution_mode=execution_mode,
cwd=WORKSPACE_GUEST_PATH,
)
self._save_workspace_locked(workspace)
return {
"workspace_id": workspace_id,
"environment": workspace.environment,
"environment_version": workspace.metadata.get("environment_version"),
"command": command,
"stdout": redacted_exec_result.stdout,
"stderr": redacted_exec_result.stderr,
"exit_code": redacted_exec_result.exit_code,
"duration_ms": redacted_exec_result.duration_ms,
"execution_mode": execution_mode,
"sequence": entry["sequence"],
"cwd": WORKSPACE_GUEST_PATH,
}
def open_shell(
self,
workspace_id: str,
*,
cwd: str = WORKSPACE_GUEST_PATH,
cols: int = DEFAULT_SHELL_COLS,
rows: int = DEFAULT_SHELL_ROWS,
secret_env: dict[str, str] | None = None,
) -> dict[str, Any]:
if cols <= 0:
raise ValueError("cols must be positive")
if rows <= 0:
raise ValueError("rows must be positive")
normalized_cwd, _ = _normalize_workspace_destination(cwd)
normalized_secret_env = _normalize_workspace_secret_env_mapping(secret_env)
shell_id = uuid.uuid4().hex[:12]
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
instance = self._workspace_instance_for_live_shell_locked(workspace)
redact_values = self._workspace_secret_redact_values_locked(workspace)
env_values = self._workspace_secret_env_values_locked(workspace, normalized_secret_env)
if workspace.secrets and normalized_secret_env:
self._install_workspace_secrets_locked(workspace, instance)
try:
payload = self._backend.open_shell(
instance,
workspace_id=workspace_id,
shell_id=shell_id,
cwd=normalized_cwd,
cols=cols,
rows=rows,
env=env_values or None,
redact_values=redact_values,
)
except Exception as exc:
raise _redact_exception(exc, redact_values) from exc
shell = self._workspace_shell_record_from_payload(
workspace_id=workspace_id,
shell_id=shell_id,
payload=payload,
)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = instance.last_error
workspace.metadata = dict(instance.metadata)
self._touch_workspace_activity_locked(workspace)
self._save_workspace_locked(workspace)
self._save_workspace_shell_locked(shell)
return self._serialize_workspace_shell(shell)
def read_shell(
self,
workspace_id: str,
shell_id: str,
*,
cursor: int = 0,
max_chars: int = DEFAULT_SHELL_MAX_CHARS,
) -> dict[str, Any]:
if cursor < 0:
raise ValueError("cursor must not be negative")
if max_chars <= 0:
raise ValueError("max_chars must be positive")
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
instance = self._workspace_instance_for_live_shell_locked(workspace)
shell = self._load_workspace_shell_locked(workspace_id, shell_id)
redact_values = self._workspace_secret_redact_values_locked(workspace)
try:
payload = self._backend.read_shell(
instance,
workspace_id=workspace_id,
shell_id=shell_id,
cursor=cursor,
max_chars=max_chars,
)
except Exception as exc:
raise _redact_exception(exc, redact_values) from exc
updated_shell = self._workspace_shell_record_from_payload(
workspace_id=workspace_id,
shell_id=shell_id,
payload=payload,
metadata=shell.metadata,
)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = instance.last_error
workspace.metadata = dict(instance.metadata)
self._save_workspace_locked(workspace)
self._save_workspace_shell_locked(updated_shell)
response = self._serialize_workspace_shell(updated_shell)
response.update(
{
"cursor": int(payload.get("cursor", cursor)),
"next_cursor": int(payload.get("next_cursor", cursor)),
"output": _redact_text(str(payload.get("output", "")), redact_values),
"truncated": bool(payload.get("truncated", False)),
}
)
return response
def write_shell(
self,
workspace_id: str,
shell_id: str,
*,
input_text: str,
append_newline: bool = True,
) -> dict[str, Any]:
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
instance = self._workspace_instance_for_live_shell_locked(workspace)
shell = self._load_workspace_shell_locked(workspace_id, shell_id)
redact_values = self._workspace_secret_redact_values_locked(workspace)
try:
payload = self._backend.write_shell(
instance,
workspace_id=workspace_id,
shell_id=shell_id,
input_text=input_text,
append_newline=append_newline,
)
except Exception as exc:
raise _redact_exception(exc, redact_values) from exc
updated_shell = self._workspace_shell_record_from_payload(
workspace_id=workspace_id,
shell_id=shell_id,
payload=payload,
metadata=shell.metadata,
)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = instance.last_error
workspace.metadata = dict(instance.metadata)
self._touch_workspace_activity_locked(workspace)
self._save_workspace_locked(workspace)
self._save_workspace_shell_locked(updated_shell)
response = self._serialize_workspace_shell(updated_shell)
response.update(
{
"input_length": int(payload.get("input_length", len(input_text))),
"append_newline": bool(payload.get("append_newline", append_newline)),
}
)
return response
def signal_shell(
self,
workspace_id: str,
shell_id: str,
*,
signal_name: str = "INT",
) -> dict[str, Any]:
normalized_signal = signal_name.upper()
if normalized_signal not in WORKSPACE_SHELL_SIGNAL_NAMES:
raise ValueError(
f"signal_name must be one of: {', '.join(WORKSPACE_SHELL_SIGNAL_NAMES)}"
)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
instance = self._workspace_instance_for_live_shell_locked(workspace)
shell = self._load_workspace_shell_locked(workspace_id, shell_id)
redact_values = self._workspace_secret_redact_values_locked(workspace)
try:
payload = self._backend.signal_shell(
instance,
workspace_id=workspace_id,
shell_id=shell_id,
signal_name=normalized_signal,
)
except Exception as exc:
raise _redact_exception(exc, redact_values) from exc
updated_shell = self._workspace_shell_record_from_payload(
workspace_id=workspace_id,
shell_id=shell_id,
payload=payload,
metadata=shell.metadata,
)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = instance.last_error
workspace.metadata = dict(instance.metadata)
self._touch_workspace_activity_locked(workspace)
self._save_workspace_locked(workspace)
self._save_workspace_shell_locked(updated_shell)
response = self._serialize_workspace_shell(updated_shell)
response["signal"] = str(payload.get("signal", normalized_signal))
return response
def close_shell(
self,
workspace_id: str,
shell_id: str,
) -> dict[str, Any]:
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
instance = self._workspace_instance_for_live_shell_locked(workspace)
shell = self._load_workspace_shell_locked(workspace_id, shell_id)
redact_values = self._workspace_secret_redact_values_locked(workspace)
try:
payload = self._backend.close_shell(
instance,
workspace_id=workspace_id,
shell_id=shell_id,
)
except Exception as exc:
raise _redact_exception(exc, redact_values) from exc
closed_shell = self._workspace_shell_record_from_payload(
workspace_id=workspace_id,
shell_id=shell_id,
payload=payload,
metadata=shell.metadata,
)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = instance.last_error
workspace.metadata = dict(instance.metadata)
self._touch_workspace_activity_locked(workspace)
self._save_workspace_locked(workspace)
self._delete_workspace_shell_locked(workspace_id, shell_id)
response = self._serialize_workspace_shell(closed_shell)
response["closed"] = bool(payload.get("closed", True))
return response
def start_service(
self,
workspace_id: str,
service_name: str,
*,
command: str,
cwd: str = WORKSPACE_GUEST_PATH,
readiness: dict[str, Any] | None = None,
ready_timeout_seconds: int = DEFAULT_SERVICE_READY_TIMEOUT_SECONDS,
ready_interval_ms: int = DEFAULT_SERVICE_READY_INTERVAL_MS,
secret_env: dict[str, str] | None = None,
published_ports: list[dict[str, Any]] | None = None,
) -> dict[str, Any]:
normalized_service_name = _normalize_workspace_service_name(service_name)
normalized_cwd, _ = _normalize_workspace_destination(cwd)
normalized_readiness = _normalize_workspace_service_readiness(readiness)
normalized_secret_env = _normalize_workspace_secret_env_mapping(secret_env)
normalized_published_ports = _normalize_workspace_published_port_specs(published_ports)
if ready_timeout_seconds <= 0:
raise ValueError("ready_timeout_seconds must be positive")
if ready_interval_ms <= 0:
raise ValueError("ready_interval_ms must be positive")
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
instance = self._workspace_instance_for_live_service_locked(workspace)
if normalized_published_ports:
if workspace.network_policy != "egress+published-ports":
raise RuntimeError(
"published ports require workspace network_policy "
"'egress+published-ports'"
)
if instance.network is None:
raise RuntimeError(
"published ports require an active guest network configuration"
)
redact_values = self._workspace_secret_redact_values_locked(workspace)
env_values = self._workspace_secret_env_values_locked(workspace, normalized_secret_env)
if workspace.secrets and normalized_secret_env:
self._install_workspace_secrets_locked(workspace, instance)
existing = self._load_workspace_service_locked_optional(
workspace_id,
normalized_service_name,
)
if existing is not None:
existing = self._refresh_workspace_service_locked(
workspace,
instance,
existing,
)
if existing.state == "running":
raise RuntimeError(
f"service {normalized_service_name!r} is already running in "
f"workspace {workspace_id!r}"
)
self._delete_workspace_service_artifacts_locked(
workspace_id,
normalized_service_name,
)
try:
payload = self._backend.start_service(
instance,
workspace_id=workspace_id,
service_name=normalized_service_name,
command=command,
cwd=normalized_cwd,
readiness=normalized_readiness,
ready_timeout_seconds=ready_timeout_seconds,
ready_interval_ms=ready_interval_ms,
env=env_values or None,
)
except Exception as exc:
raise _redact_exception(exc, redact_values) from exc
service = self._workspace_service_record_from_payload(
workspace_id=workspace_id,
service_name=normalized_service_name,
payload=payload,
)
if normalized_published_ports:
assert instance.network is not None # guarded above
try:
service.published_ports = self._start_workspace_service_published_ports(
workspace=workspace,
service=service,
guest_ip=instance.network.guest_ip,
published_ports=normalized_published_ports,
)
except Exception:
try:
failed_payload = self._backend.stop_service(
instance,
workspace_id=workspace_id,
service_name=normalized_service_name,
)
service = self._workspace_service_record_from_payload(
workspace_id=workspace_id,
service_name=normalized_service_name,
payload=failed_payload,
published_ports=[],
)
except Exception:
service.state = "failed"
service.stop_reason = "published_port_failed"
service.ended_at = service.ended_at or time.time()
else:
service.state = "failed"
service.stop_reason = "published_port_failed"
service.ended_at = service.ended_at or time.time()
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = instance.last_error
workspace.metadata = dict(instance.metadata)
self._touch_workspace_activity_locked(workspace)
self._save_workspace_locked(workspace)
self._save_workspace_service_locked(service)
return self._serialize_workspace_service(service)
def list_services(self, workspace_id: str) -> dict[str, Any]:
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
instance = self._workspace_instance_for_live_service_locked(workspace)
services = self._refresh_workspace_services_locked(workspace, instance)
self._save_workspace_locked(workspace)
serialized = [self._serialize_workspace_service(service) for service in services]
return {
"workspace_id": workspace_id,
"count": len(serialized),
"running_count": sum(1 for item in serialized if item["state"] == "running"),
"services": serialized,
}
def status_service(self, workspace_id: str, service_name: str) -> dict[str, Any]:
normalized_service_name = _normalize_workspace_service_name(service_name)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
instance = self._workspace_instance_for_live_service_locked(workspace)
service = self._load_workspace_service_locked(workspace_id, normalized_service_name)
service = self._refresh_workspace_service_locked(workspace, instance, service)
self._save_workspace_locked(workspace)
self._save_workspace_service_locked(service)
return self._serialize_workspace_service(service)
def logs_service(
self,
workspace_id: str,
service_name: str,
*,
tail_lines: int | None = DEFAULT_SERVICE_LOG_TAIL_LINES,
) -> dict[str, Any]:
normalized_service_name = _normalize_workspace_service_name(service_name)
if tail_lines is not None and tail_lines <= 0:
raise ValueError("tail_lines must be positive")
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
instance = self._workspace_instance_for_live_service_locked(workspace)
service = self._load_workspace_service_locked(workspace_id, normalized_service_name)
redact_values = self._workspace_secret_redact_values_locked(workspace)
try:
payload = self._backend.logs_service(
instance,
workspace_id=workspace_id,
service_name=normalized_service_name,
tail_lines=tail_lines,
)
except Exception as exc:
raise _redact_exception(exc, redact_values) from exc
service = self._workspace_service_record_from_payload(
workspace_id=workspace_id,
service_name=normalized_service_name,
payload=payload,
metadata=service.metadata,
published_ports=service.published_ports,
)
if service.published_ports:
for published_port in service.published_ports:
_stop_workspace_published_port_proxy(published_port)
service.published_ports = [
WorkspacePublishedPortRecord(
guest_port=published_port.guest_port,
host_port=published_port.host_port,
host=published_port.host,
protocol=published_port.protocol,
proxy_pid=None,
)
for published_port in service.published_ports
]
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = instance.last_error
workspace.metadata = dict(instance.metadata)
self._touch_workspace_activity_locked(workspace)
self._save_workspace_locked(workspace)
self._save_workspace_service_locked(service)
response = self._serialize_workspace_service(service)
response.update(
{
"stdout": _redact_text(str(payload.get("stdout", "")), redact_values),
"stderr": _redact_text(str(payload.get("stderr", "")), redact_values),
"tail_lines": tail_lines,
"truncated": bool(payload.get("truncated", False)),
}
)
return response
def stop_service(self, workspace_id: str, service_name: str) -> dict[str, Any]:
normalized_service_name = _normalize_workspace_service_name(service_name)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
instance = self._workspace_instance_for_live_service_locked(workspace)
service = self._load_workspace_service_locked(workspace_id, normalized_service_name)
redact_values = self._workspace_secret_redact_values_locked(workspace)
try:
payload = self._backend.stop_service(
instance,
workspace_id=workspace_id,
service_name=normalized_service_name,
)
except Exception as exc:
raise _redact_exception(exc, redact_values) from exc
service = self._workspace_service_record_from_payload(
workspace_id=workspace_id,
service_name=normalized_service_name,
payload=payload,
metadata=service.metadata,
published_ports=service.published_ports,
)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = instance.last_error
workspace.metadata = dict(instance.metadata)
self._save_workspace_locked(workspace)
self._save_workspace_service_locked(service)
return self._serialize_workspace_service(service)
def status_workspace(self, workspace_id: str) -> dict[str, Any]:
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
self._ensure_workspace_not_expired_locked(workspace, time.time())
self._refresh_workspace_liveness_locked(workspace)
self._refresh_workspace_service_counts_locked(workspace)
self._save_workspace_locked(workspace)
return self._serialize_workspace(workspace)
def logs_workspace(self, workspace_id: str) -> dict[str, Any]:
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
self._ensure_workspace_not_expired_locked(workspace, time.time())
self._refresh_workspace_liveness_locked(workspace)
self._save_workspace_locked(workspace)
entries = self._read_workspace_logs_locked(workspace.workspace_id)
redact_values = self._workspace_secret_redact_values_locked(workspace)
redacted_entries = []
for entry in entries:
redacted_entry = dict(entry)
redacted_entry["stdout"] = _redact_text(str(entry.get("stdout", "")), redact_values)
redacted_entry["stderr"] = _redact_text(str(entry.get("stderr", "")), redact_values)
redacted_entries.append(redacted_entry)
return {
"workspace_id": workspace.workspace_id,
"count": len(redacted_entries),
"entries": redacted_entries,
}
def stop_workspace(self, workspace_id: str) -> dict[str, Any]:
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
self._ensure_workspace_not_expired_locked(workspace, time.time())
self._refresh_workspace_liveness_locked(workspace)
instance = workspace.to_instance(
workdir=self._workspace_runtime_dir(workspace.workspace_id)
)
try:
self._stop_workspace_services_locked(workspace, instance)
self._close_workspace_shells_locked(workspace, instance)
self._flush_workspace_filesystem_locked(workspace, instance)
if workspace.state == "started":
self._backend.stop(instance)
workspace.state = "stopped"
workspace.firecracker_pid = None
workspace.last_error = None
workspace.metadata = dict(instance.metadata)
self._scrub_workspace_runtime_state_locked(workspace)
except Exception as exc:
workspace.state = "stopped"
workspace.firecracker_pid = None
workspace.last_error = str(exc)
workspace.metadata = dict(instance.metadata)
self._touch_workspace_activity_locked(workspace)
self._save_workspace_locked(workspace)
raise
self._touch_workspace_activity_locked(workspace)
self._save_workspace_locked(workspace)
return self._serialize_workspace(workspace)
def start_workspace(self, workspace_id: str) -> dict[str, Any]:
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
self._ensure_workspace_not_expired_locked(workspace, time.time())
self._refresh_workspace_liveness_locked(workspace)
if workspace.state == "started":
self._refresh_workspace_service_counts_locked(workspace)
self._touch_workspace_activity_locked(workspace)
self._save_workspace_locked(workspace)
return self._serialize_workspace(workspace)
instance = workspace.to_instance(
workdir=self._workspace_runtime_dir(workspace.workspace_id)
)
self._stop_workspace_services_locked(workspace, instance)
self._close_workspace_shells_locked(workspace, instance)
try:
self._require_workspace_network_policy_support(
network_policy=workspace.network_policy
)
if self._runtime_capabilities.supports_guest_exec:
self._ensure_workspace_guest_bootstrap_support(instance)
with self._lock:
self._start_instance_locked(instance)
workspace = self._load_workspace_locked(workspace_id)
if workspace.secrets:
self._install_workspace_secrets_locked(workspace, instance)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = None
workspace.metadata = dict(instance.metadata)
self._touch_workspace_activity_locked(workspace)
self._save_workspace_locked(workspace)
return self._serialize_workspace(workspace)
except Exception as exc:
try:
if instance.state == "started":
self._backend.stop(instance)
except Exception:
pass
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
workspace.state = "stopped"
workspace.firecracker_pid = None
workspace.last_error = str(exc)
workspace.metadata = dict(instance.metadata)
self._touch_workspace_activity_locked(workspace)
self._save_workspace_locked(workspace)
raise
def export_workspace_disk(
self,
workspace_id: str,
*,
output_path: str | Path,
) -> dict[str, Any]:
raw_output_path = str(output_path).strip()
if raw_output_path == "":
raise ValueError("output_path must not be empty")
resolved_output_path = Path(output_path).expanduser().resolve()
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
rootfs_path = self._workspace_stopped_disk_rootfs_locked(
workspace,
operation_name="workspace_disk_export",
)
self._scrub_workspace_runtime_state_locked(workspace, rootfs_path=rootfs_path)
self._save_workspace_locked(workspace)
exported = export_workspace_disk_image(rootfs_path, output_path=resolved_output_path)
return {
"workspace_id": workspace_id,
"output_path": str(Path(str(exported["output_path"]))),
"disk_format": str(exported["disk_format"]),
"bytes_written": int(exported["bytes_written"]),
}
def list_workspace_disk(
self,
workspace_id: str,
*,
path: str = WORKSPACE_GUEST_PATH,
recursive: bool = False,
) -> dict[str, Any]:
normalized_path = _normalize_workspace_disk_path(path)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
rootfs_path = self._workspace_stopped_disk_rootfs_locked(
workspace,
operation_name="workspace_disk_list",
)
self._scrub_workspace_runtime_state_locked(workspace, rootfs_path=rootfs_path)
self._save_workspace_locked(workspace)
entries = list_workspace_disk(
rootfs_path,
guest_path=normalized_path,
recursive=recursive,
)
return {
"workspace_id": workspace_id,
"path": normalized_path,
"recursive": recursive,
"entries": entries,
}
def read_workspace_disk(
self,
workspace_id: str,
*,
path: str,
max_bytes: int = DEFAULT_WORKSPACE_DISK_READ_MAX_BYTES,
) -> dict[str, Any]:
normalized_path = _normalize_workspace_disk_path(path)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
rootfs_path = self._workspace_stopped_disk_rootfs_locked(
workspace,
operation_name="workspace_disk_read",
)
self._scrub_workspace_runtime_state_locked(workspace, rootfs_path=rootfs_path)
self._save_workspace_locked(workspace)
payload = read_workspace_disk_file(
rootfs_path,
guest_path=normalized_path,
max_bytes=max_bytes,
)
payload["workspace_id"] = workspace_id
return payload
def delete_workspace(
self,
workspace_id: str,
*,
reason: str = "explicit_delete",
) -> dict[str, Any]:
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
instance = workspace.to_instance(
workdir=self._workspace_runtime_dir(workspace.workspace_id)
)
self._stop_workspace_services_locked(workspace, instance)
self._close_workspace_shells_locked(workspace, instance)
if workspace.state == "started":
self._backend.stop(instance)
workspace.state = "stopped"
self._backend.delete(instance)
shutil.rmtree(self._workspace_dir(workspace_id), ignore_errors=True)
return {"workspace_id": workspace_id, "deleted": True, "reason": reason}
def _validate_limits(self, *, vcpu_count: int, mem_mib: int, ttl_seconds: int) -> None:
if not self.MIN_VCPUS <= vcpu_count <= self.MAX_VCPUS:
raise ValueError(f"vcpu_count must be between {self.MIN_VCPUS} and {self.MAX_VCPUS}")
if not self.MIN_MEM_MIB <= mem_mib <= self.MAX_MEM_MIB:
raise ValueError(f"mem_mib must be between {self.MIN_MEM_MIB} and {self.MAX_MEM_MIB}")
if not self.MIN_TTL_SECONDS <= ttl_seconds <= self.MAX_TTL_SECONDS:
raise ValueError(
f"ttl_seconds must be between {self.MIN_TTL_SECONDS} and {self.MAX_TTL_SECONDS}"
)
def _serialize(self, instance: VmInstance) -> dict[str, Any]:
return {
"vm_id": instance.vm_id,
"environment": instance.environment,
"environment_version": instance.metadata.get("environment_version"),
"vcpu_count": instance.vcpu_count,
"mem_mib": instance.mem_mib,
"ttl_seconds": instance.ttl_seconds,
"created_at": instance.created_at,
"expires_at": instance.expires_at,
"state": instance.state,
"network_enabled": instance.network is not None,
"allow_host_compat": instance.allow_host_compat,
"guest_ip": instance.network.guest_ip if instance.network is not None else None,
"tap_name": instance.network.tap_name if instance.network is not None else None,
"execution_mode": instance.metadata.get("execution_mode", "pending"),
"metadata": instance.metadata,
}
def _serialize_workspace(self, workspace: WorkspaceRecord) -> dict[str, Any]:
service_count, running_service_count = self._workspace_service_counts_locked(
workspace.workspace_id
)
return {
"workspace_id": workspace.workspace_id,
"name": workspace.name,
"labels": dict(workspace.labels),
"environment": workspace.environment,
"environment_version": workspace.metadata.get("environment_version"),
"vcpu_count": workspace.vcpu_count,
"mem_mib": workspace.mem_mib,
"ttl_seconds": workspace.ttl_seconds,
"created_at": workspace.created_at,
"last_activity_at": workspace.last_activity_at,
"expires_at": workspace.expires_at,
"state": workspace.state,
"network_policy": workspace.network_policy,
"network_enabled": workspace.network is not None,
"allow_host_compat": workspace.allow_host_compat,
"guest_ip": workspace.network.guest_ip if workspace.network is not None else None,
"tap_name": workspace.network.tap_name if workspace.network is not None else None,
"execution_mode": workspace.metadata.get("execution_mode", "pending"),
"workspace_path": WORKSPACE_GUEST_PATH,
"workspace_seed": _workspace_seed_dict(workspace.workspace_seed),
"secrets": [
_serialize_workspace_secret_public(secret) for secret in workspace.secrets
],
"command_count": workspace.command_count,
"last_command": workspace.last_command,
"reset_count": workspace.reset_count,
"last_reset_at": workspace.last_reset_at,
"service_count": service_count,
"running_service_count": running_service_count,
"metadata": workspace.metadata,
}
def _serialize_workspace_list_item(self, workspace: WorkspaceRecord) -> dict[str, Any]:
service_count, running_service_count = self._workspace_service_counts_locked(
workspace.workspace_id
)
return {
"workspace_id": workspace.workspace_id,
"name": workspace.name,
"labels": dict(workspace.labels),
"environment": workspace.environment,
"state": workspace.state,
"created_at": workspace.created_at,
"last_activity_at": workspace.last_activity_at,
"expires_at": workspace.expires_at,
"command_count": workspace.command_count,
"service_count": service_count,
"running_service_count": running_service_count,
}
def _serialize_workspace_shell(self, shell: WorkspaceShellRecord) -> dict[str, Any]:
return {
"workspace_id": shell.workspace_id,
"shell_id": shell.shell_id,
"cwd": shell.cwd,
"cols": shell.cols,
"rows": shell.rows,
"state": shell.state,
"started_at": shell.started_at,
"ended_at": shell.ended_at,
"exit_code": shell.exit_code,
"execution_mode": shell.execution_mode,
}
def _serialize_workspace_service(self, service: WorkspaceServiceRecord) -> dict[str, Any]:
return {
"workspace_id": service.workspace_id,
"service_name": service.service_name,
"state": service.state,
"command": service.command,
"cwd": service.cwd,
"started_at": service.started_at,
"ended_at": service.ended_at,
"exit_code": service.exit_code,
"pid": service.pid,
"execution_mode": service.execution_mode,
"readiness": dict(service.readiness) if service.readiness is not None else None,
"ready_at": service.ready_at,
"stop_reason": service.stop_reason,
"published_ports": [
_serialize_workspace_published_port_public(published_port)
for published_port in service.published_ports
],
}
def _serialize_workspace_snapshot(self, snapshot: WorkspaceSnapshotRecord) -> dict[str, Any]:
return {
"workspace_id": snapshot.workspace_id,
"snapshot_name": snapshot.snapshot_name,
"kind": snapshot.kind,
"created_at": snapshot.created_at,
"entry_count": snapshot.entry_count,
"bytes_written": snapshot.bytes_written,
"deletable": snapshot.kind != "baseline",
}
def _require_guest_boot_or_opt_in(self, instance: VmInstance) -> None:
if self._runtime_capabilities.supports_vm_boot or instance.allow_host_compat:
return
reason = self._runtime_capabilities.reason or "runtime does not support real VM boot"
raise RuntimeError(
"guest boot is unavailable and host compatibility mode is disabled: "
f"{reason}. Set allow_host_compat=True (CLI: --allow-host-compat) to opt into "
"host execution."
)
def _require_guest_exec_or_opt_in(self, instance: VmInstance) -> None:
if self._runtime_capabilities.supports_guest_exec or instance.allow_host_compat:
return
reason = self._runtime_capabilities.reason or (
"runtime does not support guest command execution"
)
raise RuntimeError(
"guest command execution is unavailable and host compatibility mode is disabled: "
f"{reason}. Set allow_host_compat=True (CLI: --allow-host-compat) to opt into "
"host execution."
)
def _require_workspace_shell_support(self, instance: VmInstance) -> None:
if self._backend_name == "mock":
return
if self._runtime_capabilities.supports_guest_exec:
return
reason = self._runtime_capabilities.reason or (
"runtime does not support guest interactive shell sessions"
)
raise RuntimeError(
"interactive shells require guest execution and are unavailable for this "
f"workspace: {reason}"
)
def _require_workspace_service_support(self, instance: VmInstance) -> None:
if self._backend_name == "mock":
return
if self._runtime_capabilities.supports_guest_exec or instance.allow_host_compat:
return
reason = self._runtime_capabilities.reason or (
"runtime does not support guest-backed or host-compatible service execution"
)
raise RuntimeError(
"workspace services are unavailable for this workspace: "
f"{reason}. Recreate the workspace with --allow-host-compat to opt into "
"host compatibility when guest execution is unavailable."
)
def _require_workspace_secret_support(self, instance: VmInstance) -> None:
if self._backend_name == "mock":
return
if self._runtime_capabilities.supports_guest_exec:
return
reason = self._runtime_capabilities.reason or (
"runtime does not support guest-backed secret installation"
)
raise RuntimeError(
"workspace secrets require guest execution and are unavailable for this "
f"workspace: {reason}"
)
def _require_workspace_network_policy_support(
self,
*,
network_policy: WorkspaceNetworkPolicy,
) -> None:
if network_policy == "off":
return
if self._runtime_capabilities.supports_guest_network:
return
reason = self._runtime_capabilities.reason or (
"runtime does not support guest-backed workspace networking"
)
raise RuntimeError(
"workspace network_policy requires guest networking and is unavailable for this "
f"workspace: {reason}"
)
def _workspace_secret_values_locked(self, workspace: WorkspaceRecord) -> dict[str, str]:
return _load_workspace_secret_values(
workspace_dir=self._workspace_dir(workspace.workspace_id),
secrets=workspace.secrets,
)
def _workspace_secret_redact_values_locked(self, workspace: WorkspaceRecord) -> list[str]:
return list(self._workspace_secret_values_locked(workspace).values())
def _workspace_secret_env_values_locked(
self,
workspace: WorkspaceRecord,
secret_env: dict[str, str],
) -> dict[str, str]:
secret_values = self._workspace_secret_values_locked(workspace)
env_values: dict[str, str] = {}
for secret_name, env_name in secret_env.items():
if secret_name not in secret_values:
raise ValueError(
f"secret_env references unknown workspace secret {secret_name!r}"
)
env_values[env_name] = secret_values[secret_name]
return env_values
def _touch_workspace_activity_locked(
self,
workspace: WorkspaceRecord,
*,
when: float | None = None,
) -> None:
workspace.last_activity_at = time.time() if when is None else when
def _install_workspace_secrets_locked(
self,
workspace: WorkspaceRecord,
instance: VmInstance,
) -> None:
if not workspace.secrets:
return
self._require_workspace_secret_support(instance)
with tempfile.TemporaryDirectory(prefix="pyro-workspace-secrets-") as temp_dir:
archive_path = Path(temp_dir) / "workspace-secrets.tar"
_build_workspace_secret_archive(
workspace_dir=self._workspace_dir(workspace.workspace_id),
secrets=workspace.secrets,
archive_path=archive_path,
)
self._backend.install_secrets(instance, archive_path=archive_path)
def _get_instance_locked(self, vm_id: str) -> VmInstance:
try:
return self._instances[vm_id]
except KeyError as exc:
raise ValueError(f"vm {vm_id!r} does not exist") from exc
def _reap_expired_locked(self, now: float) -> None:
expired_vm_ids = [
vm_id for vm_id, inst in self._instances.items() if inst.expires_at <= now
]
for vm_id in expired_vm_ids:
instance = self._instances[vm_id]
if instance.state == "started":
self._backend.stop(instance)
instance.state = "stopped"
self._backend.delete(instance)
del self._instances[vm_id]
def _ensure_not_expired_locked(self, instance: VmInstance, now: float) -> None:
if instance.expires_at <= now:
vm_id = instance.vm_id
self._reap_expired_locked(now)
raise RuntimeError(f"vm {vm_id!r} expired and was automatically deleted")
def _start_instance_locked(self, instance: VmInstance) -> None:
if instance.state not in {"created", "stopped"}:
raise RuntimeError(
f"vm {instance.vm_id} cannot be started from state {instance.state!r}"
)
self._require_guest_boot_or_opt_in(instance)
if not self._runtime_capabilities.supports_vm_boot:
instance.metadata["execution_mode"] = "host_compat"
instance.metadata["boot_mode"] = "compat"
if self._runtime_capabilities.reason is not None:
instance.metadata["runtime_reason"] = self._runtime_capabilities.reason
self._backend.start(instance)
instance.state = "started"
def _exec_instance(
self,
instance: VmInstance,
*,
command: str,
timeout_seconds: int,
host_workdir: Path | None = None,
guest_cwd: str | None = None,
env: dict[str, str] | None = None,
) -> tuple[VmExecResult, str]:
if timeout_seconds <= 0:
raise ValueError("timeout_seconds must be positive")
if instance.state != "started":
raise RuntimeError(f"vm {instance.vm_id} must be in 'started' state before execution")
self._require_guest_exec_or_opt_in(instance)
prepared_command = command
if self._runtime_capabilities.supports_guest_exec:
prepared_command = _wrap_guest_command(command, cwd=guest_cwd)
workdir = None
else:
instance.metadata["execution_mode"] = "host_compat"
workdir = host_workdir
if env is None:
exec_result = self._backend.exec(
instance,
prepared_command,
timeout_seconds,
workdir=workdir,
)
else:
exec_result = self._backend.exec(
instance,
prepared_command,
timeout_seconds,
workdir=workdir,
env=env,
)
execution_mode = instance.metadata.get("execution_mode", "unknown")
return exec_result, execution_mode
def _prepare_workspace_seed(self, seed_path: str | Path | None) -> PreparedWorkspaceSeed:
if seed_path is None:
return PreparedWorkspaceSeed(mode="empty", source_path=None)
resolved_source_path = Path(seed_path).expanduser().resolve()
if not resolved_source_path.exists():
raise ValueError(f"seed_path {resolved_source_path} does not exist")
if resolved_source_path.is_dir():
cleanup_dir = Path(tempfile.mkdtemp(prefix="pyro-workspace-seed-"))
archive_path = cleanup_dir / "workspace-seed.tar"
try:
_write_directory_seed_archive(resolved_source_path, archive_path)
entry_count, bytes_written = _inspect_seed_archive(archive_path)
except Exception:
shutil.rmtree(cleanup_dir, ignore_errors=True)
raise
return PreparedWorkspaceSeed(
mode="directory",
source_path=str(resolved_source_path),
archive_path=archive_path,
entry_count=entry_count,
bytes_written=bytes_written,
cleanup_dir=cleanup_dir,
)
if (
not resolved_source_path.is_file()
or not _is_supported_seed_archive(resolved_source_path)
):
raise ValueError(
"seed_path must be a directory or a .tar/.tar.gz/.tgz archive"
)
entry_count, bytes_written = _inspect_seed_archive(resolved_source_path)
return PreparedWorkspaceSeed(
mode="tar_archive",
source_path=str(resolved_source_path),
archive_path=resolved_source_path,
entry_count=entry_count,
bytes_written=bytes_written,
)
def _ensure_workspace_guest_bootstrap_support(self, instance: VmInstance) -> None:
if (
self._runtime_paths is None
or self._runtime_paths.guest_agent_path is None
or self._runtime_paths.guest_init_path is None
):
raise RuntimeError(
"runtime bundle does not provide guest bootstrap assets for workspace operations"
)
rootfs_image = instance.metadata.get("rootfs_image")
if rootfs_image is None or rootfs_image == "":
raise RuntimeError("workspace rootfs image is unavailable for guest operations")
rootfs_path = Path(rootfs_image)
_patch_rootfs_runtime_file(
rootfs_path,
source_path=self._runtime_paths.guest_init_path,
destination_path=WORKSPACE_GUEST_INIT_PATH,
asset_label="guest-init",
file_mode="0100755",
)
_patch_rootfs_runtime_file(
rootfs_path,
source_path=self._runtime_paths.guest_agent_path,
destination_path=WORKSPACE_GUEST_AGENT_PATH,
asset_label="guest-agent",
file_mode="0100755",
)
def _workspace_dir(self, workspace_id: str) -> Path:
return self._workspaces_dir / workspace_id
def _workspace_runtime_dir(self, workspace_id: str) -> Path:
return self._workspace_dir(workspace_id) / WORKSPACE_RUNTIME_DIRNAME
def _workspace_host_dir(self, workspace_id: str) -> Path:
return self._workspace_dir(workspace_id) / WORKSPACE_DIRNAME
def _workspace_baseline_dir(self, workspace_id: str) -> Path:
return self._workspace_dir(workspace_id) / WORKSPACE_BASELINE_DIRNAME
def _workspace_baseline_archive_path(self, workspace_id: str) -> Path:
return self._workspace_baseline_dir(workspace_id) / WORKSPACE_BASELINE_ARCHIVE_NAME
def _workspace_snapshots_dir(self, workspace_id: str) -> Path:
return self._workspace_dir(workspace_id) / WORKSPACE_SNAPSHOTS_DIRNAME
def _workspace_snapshot_archive_path(self, workspace_id: str, snapshot_name: str) -> Path:
return self._workspace_snapshots_dir(workspace_id) / f"{snapshot_name}.tar"
def _workspace_snapshot_metadata_path(self, workspace_id: str, snapshot_name: str) -> Path:
return self._workspace_snapshots_dir(workspace_id) / f"{snapshot_name}.json"
def _workspace_commands_dir(self, workspace_id: str) -> Path:
return self._workspace_dir(workspace_id) / WORKSPACE_COMMANDS_DIRNAME
def _workspace_shells_dir(self, workspace_id: str) -> Path:
return self._workspace_dir(workspace_id) / WORKSPACE_SHELLS_DIRNAME
def _workspace_services_dir(self, workspace_id: str) -> Path:
return self._workspace_dir(workspace_id) / WORKSPACE_SERVICES_DIRNAME
def _workspace_secrets_dir(self, workspace_id: str) -> Path:
return self._workspace_dir(workspace_id) / WORKSPACE_SECRETS_DIRNAME
def _workspace_metadata_path(self, workspace_id: str) -> Path:
return self._workspace_dir(workspace_id) / "workspace.json"
def _workspace_shell_record_path(self, workspace_id: str, shell_id: str) -> Path:
return self._workspace_shells_dir(workspace_id) / f"{shell_id}.json"
def _workspace_service_record_path(self, workspace_id: str, service_name: str) -> Path:
return self._workspace_services_dir(workspace_id) / f"{service_name}.json"
def _workspace_rootfs_image_path_locked(
self,
workspace: WorkspaceRecord,
) -> Path:
raw_rootfs_image = workspace.metadata.get("rootfs_image")
if raw_rootfs_image is None or raw_rootfs_image == "":
raise RuntimeError(
f"workspace {workspace.workspace_id!r} does not have a persisted rootfs image"
)
rootfs_path = Path(raw_rootfs_image)
if not rootfs_path.exists():
raise RuntimeError(
f"workspace {workspace.workspace_id!r} rootfs image is unavailable at "
f"{rootfs_path}"
)
return rootfs_path
def _workspace_stopped_disk_rootfs_locked(
self,
workspace: WorkspaceRecord,
*,
operation_name: str,
) -> Path:
self._ensure_workspace_not_expired_locked(workspace, time.time())
self._refresh_workspace_liveness_locked(workspace)
if workspace.state != "stopped":
raise RuntimeError(
f"workspace {workspace.workspace_id!r} must be stopped before {operation_name}"
)
if workspace.metadata.get("execution_mode") == "host_compat":
raise RuntimeError(
f"{operation_name} is unavailable for host_compat workspaces"
)
return self._workspace_rootfs_image_path_locked(workspace)
def _scrub_workspace_runtime_state_locked(
self,
workspace: WorkspaceRecord,
*,
rootfs_path: Path | None = None,
) -> None:
execution_mode = workspace.metadata.get("execution_mode")
if execution_mode == "host_compat":
return
scrub_workspace_runtime_paths(
rootfs_path or self._workspace_rootfs_image_path_locked(workspace)
)
def _flush_workspace_filesystem_locked(
self,
workspace: WorkspaceRecord,
instance: VmInstance,
) -> None:
if workspace.state != "started":
return
if self._backend_name == "mock":
return
if not self._runtime_capabilities.supports_guest_exec:
return
self._backend.exec(instance, "sync", 10)
def _count_workspaces_locked(self) -> int:
return sum(1 for _ in self._workspaces_dir.glob("*/workspace.json"))
def _load_workspace_locked(self, workspace_id: str) -> WorkspaceRecord:
metadata_path = self._workspace_metadata_path(workspace_id)
if not metadata_path.exists():
raise ValueError(f"workspace {workspace_id!r} does not exist")
payload = json.loads(metadata_path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
raise RuntimeError(f"workspace record at {metadata_path} is invalid")
return WorkspaceRecord.from_payload(payload)
def _save_workspace_locked(self, workspace: WorkspaceRecord) -> None:
metadata_path = self._workspace_metadata_path(workspace.workspace_id)
metadata_path.parent.mkdir(parents=True, exist_ok=True)
metadata_path.write_text(
json.dumps(workspace.to_payload(), indent=2, sort_keys=True),
encoding="utf-8",
)
def _reap_expired_workspaces_locked(self, now: float) -> None:
for metadata_path in list(self._workspaces_dir.glob("*/workspace.json")):
payload = json.loads(metadata_path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
shutil.rmtree(metadata_path.parent, ignore_errors=True)
continue
workspace = WorkspaceRecord.from_payload(payload)
if workspace.expires_at > now:
continue
instance = workspace.to_instance(
workdir=self._workspace_runtime_dir(workspace.workspace_id)
)
self._close_workspace_shells_locked(workspace, instance)
if workspace.state == "started":
self._backend.stop(instance)
workspace.state = "stopped"
self._backend.delete(instance)
shutil.rmtree(self._workspace_dir(workspace.workspace_id), ignore_errors=True)
def _ensure_workspace_not_expired_locked(
self,
workspace: WorkspaceRecord,
now: float,
) -> None:
if workspace.expires_at <= now:
workspace_id = workspace.workspace_id
self._reap_expired_workspaces_locked(now)
raise RuntimeError(f"workspace {workspace_id!r} expired and was automatically deleted")
def _refresh_workspace_liveness_locked(self, workspace: WorkspaceRecord) -> None:
if workspace.state != "started":
return
execution_mode = workspace.metadata.get("execution_mode")
if execution_mode == "host_compat":
return
if _pid_is_running(workspace.firecracker_pid):
return
workspace.state = "stopped"
workspace.firecracker_pid = None
workspace.last_error = "backing guest process is no longer running"
def _record_workspace_command_locked(
self,
workspace: WorkspaceRecord,
*,
command: str,
exec_result: VmExecResult,
execution_mode: str,
cwd: str,
) -> dict[str, Any]:
sequence = workspace.command_count + 1
commands_dir = self._workspace_commands_dir(workspace.workspace_id)
commands_dir.mkdir(parents=True, exist_ok=True)
base_name = f"{sequence:06d}"
stdout_path = commands_dir / f"{base_name}.stdout"
stderr_path = commands_dir / f"{base_name}.stderr"
record_path = commands_dir / f"{base_name}.json"
stdout_path.write_text(exec_result.stdout, encoding="utf-8")
stderr_path.write_text(exec_result.stderr, encoding="utf-8")
entry: dict[str, Any] = {
"sequence": sequence,
"command": command,
"cwd": cwd,
"exit_code": exec_result.exit_code,
"duration_ms": exec_result.duration_ms,
"execution_mode": execution_mode,
"stdout_file": stdout_path.name,
"stderr_file": stderr_path.name,
"recorded_at": time.time(),
}
record_path.write_text(json.dumps(entry, indent=2, sort_keys=True), encoding="utf-8")
workspace.command_count = sequence
workspace.last_command = {
"sequence": sequence,
"command": command,
"cwd": cwd,
"exit_code": exec_result.exit_code,
"duration_ms": exec_result.duration_ms,
"execution_mode": execution_mode,
}
self._touch_workspace_activity_locked(workspace)
return entry
def _read_workspace_logs_locked(self, workspace_id: str) -> list[dict[str, Any]]:
entries: list[dict[str, Any]] = []
commands_dir = self._workspace_commands_dir(workspace_id)
if not commands_dir.exists():
return entries
for record_path in sorted(commands_dir.glob("*.json")):
payload = json.loads(record_path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
continue
stdout_name = str(payload.get("stdout_file", ""))
stderr_name = str(payload.get("stderr_file", ""))
stdout = ""
stderr = ""
if stdout_name != "":
stdout_path = commands_dir / stdout_name
if stdout_path.exists():
stdout = stdout_path.read_text(encoding="utf-8")
if stderr_name != "":
stderr_path = commands_dir / stderr_name
if stderr_path.exists():
stderr = stderr_path.read_text(encoding="utf-8")
entry = dict(payload)
entry["stdout"] = stdout
entry["stderr"] = stderr
entries.append(entry)
return entries
def _workspace_instance_for_live_shell_locked(self, workspace: WorkspaceRecord) -> VmInstance:
instance = self._workspace_instance_for_live_operation_locked(
workspace,
operation_name="shell operations",
)
self._require_workspace_shell_support(instance)
return instance
def _workspace_instance_for_live_service_locked(self, workspace: WorkspaceRecord) -> VmInstance:
instance = self._workspace_instance_for_live_operation_locked(
workspace,
operation_name="service operations",
)
self._require_workspace_service_support(instance)
return instance
def _workspace_instance_for_live_operation_locked(
self,
workspace: WorkspaceRecord,
*,
operation_name: str,
) -> VmInstance:
self._ensure_workspace_not_expired_locked(workspace, time.time())
self._refresh_workspace_liveness_locked(workspace)
if workspace.state != "started":
raise RuntimeError(
"workspace "
f"{workspace.workspace_id} must be in 'started' state before {operation_name}"
)
instance = workspace.to_instance(
workdir=self._workspace_runtime_dir(workspace.workspace_id)
)
return instance
def _workspace_shell_record_from_payload(
self,
*,
workspace_id: str,
shell_id: str,
payload: dict[str, Any],
metadata: dict[str, str] | None = None,
) -> WorkspaceShellRecord:
return WorkspaceShellRecord(
workspace_id=workspace_id,
shell_id=str(payload.get("shell_id", shell_id)),
cwd=str(payload.get("cwd", WORKSPACE_GUEST_PATH)),
cols=int(payload.get("cols", DEFAULT_SHELL_COLS)),
rows=int(payload.get("rows", DEFAULT_SHELL_ROWS)),
state=cast(WorkspaceShellState, str(payload.get("state", "stopped"))),
started_at=float(payload.get("started_at", time.time())),
ended_at=(
None if payload.get("ended_at") is None else float(payload.get("ended_at", 0.0))
),
exit_code=(
None if payload.get("exit_code") is None else int(payload.get("exit_code", 0))
),
execution_mode=str(payload.get("execution_mode", "pending")),
metadata=dict(metadata or {}),
)
def _workspace_service_record_from_payload(
self,
*,
workspace_id: str,
service_name: str,
payload: dict[str, Any],
metadata: dict[str, str] | None = None,
published_ports: list[WorkspacePublishedPortRecord] | None = None,
) -> WorkspaceServiceRecord:
readiness_payload = payload.get("readiness")
readiness = dict(readiness_payload) if isinstance(readiness_payload, dict) else None
normalized_published_ports = _workspace_published_port_records(
payload.get("published_ports")
)
if not normalized_published_ports and published_ports is not None:
normalized_published_ports = list(published_ports)
return WorkspaceServiceRecord(
workspace_id=workspace_id,
service_name=str(payload.get("service_name", service_name)),
command=str(payload.get("command", "")),
cwd=str(payload.get("cwd", WORKSPACE_GUEST_PATH)),
state=cast(WorkspaceServiceState, str(payload.get("state", "stopped"))),
started_at=float(payload.get("started_at", time.time())),
readiness=readiness,
ready_at=(
None if payload.get("ready_at") is None else float(payload.get("ready_at", 0.0))
),
ended_at=(
None if payload.get("ended_at") is None else float(payload.get("ended_at", 0.0))
),
exit_code=(
None if payload.get("exit_code") is None else int(payload.get("exit_code", 0))
),
pid=None if payload.get("pid") is None else int(payload.get("pid", 0)),
execution_mode=str(payload.get("execution_mode", "pending")),
stop_reason=_optional_str(payload.get("stop_reason")),
published_ports=normalized_published_ports,
metadata=dict(metadata or {}),
)
def _load_workspace_shell_locked(
self,
workspace_id: str,
shell_id: str,
) -> WorkspaceShellRecord:
record_path = self._workspace_shell_record_path(workspace_id, shell_id)
if not record_path.exists():
raise ValueError(f"shell {shell_id!r} does not exist in workspace {workspace_id!r}")
payload = json.loads(record_path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
raise RuntimeError(f"shell record at {record_path} is invalid")
return WorkspaceShellRecord.from_payload(payload)
def _workspace_service_counts_locked(self, workspace_id: str) -> tuple[int, int]:
services = self._list_workspace_services_locked(workspace_id)
return len(services), sum(1 for service in services if service.state == "running")
def _start_workspace_service_published_ports(
self,
*,
workspace: WorkspaceRecord,
service: WorkspaceServiceRecord,
guest_ip: str,
published_ports: list[WorkspacePublishedPortSpec],
) -> list[WorkspacePublishedPortRecord]:
services_dir = self._workspace_services_dir(workspace.workspace_id)
started: list[WorkspacePublishedPortRecord] = []
try:
for spec in published_ports:
started.append(
_start_workspace_published_port_proxy(
services_dir=services_dir,
service_name=service.service_name,
workspace_id=workspace.workspace_id,
guest_ip=guest_ip,
spec=spec,
)
)
except Exception:
for published_port in started:
_stop_workspace_published_port_proxy(published_port)
raise
return started
def _workspace_baseline_snapshot_locked(
self,
workspace: WorkspaceRecord,
) -> WorkspaceSnapshotRecord:
baseline_archive_path = self._workspace_baseline_archive_path(workspace.workspace_id)
if not baseline_archive_path.exists():
raise RuntimeError(
"workspace snapshots and reset require a baseline snapshot. "
"Recreate the workspace to use snapshot/reset features."
)
entry_count, bytes_written = _inspect_seed_archive(baseline_archive_path)
return WorkspaceSnapshotRecord(
workspace_id=workspace.workspace_id,
snapshot_name="baseline",
kind="baseline",
created_at=workspace.created_at,
entry_count=entry_count,
bytes_written=bytes_written,
)
def _resolve_workspace_snapshot_locked(
self,
workspace: WorkspaceRecord,
snapshot_name: str,
) -> tuple[WorkspaceSnapshotRecord, Path]:
normalized_name = _normalize_workspace_snapshot_name(snapshot_name, allow_baseline=True)
if normalized_name == "baseline":
baseline = self._workspace_baseline_snapshot_locked(workspace)
return baseline, self._workspace_baseline_archive_path(workspace.workspace_id)
snapshot = self._load_workspace_snapshot_locked(workspace.workspace_id, normalized_name)
return (
snapshot,
self._workspace_snapshot_archive_path(workspace.workspace_id, normalized_name),
)
def _load_workspace_service_locked(
self,
workspace_id: str,
service_name: str,
) -> WorkspaceServiceRecord:
record_path = self._workspace_service_record_path(workspace_id, service_name)
if not record_path.exists():
raise ValueError(
f"service {service_name!r} does not exist in workspace {workspace_id!r}"
)
payload = json.loads(record_path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
raise RuntimeError(f"service record at {record_path} is invalid")
return WorkspaceServiceRecord.from_payload(payload)
def _load_workspace_snapshot_locked(
self,
workspace_id: str,
snapshot_name: str,
) -> WorkspaceSnapshotRecord:
record_path = self._workspace_snapshot_metadata_path(workspace_id, snapshot_name)
if not record_path.exists():
raise ValueError(
f"snapshot {snapshot_name!r} does not exist in workspace {workspace_id!r}"
)
payload = json.loads(record_path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
raise RuntimeError(f"snapshot record at {record_path} is invalid")
return WorkspaceSnapshotRecord.from_payload(payload)
def _load_workspace_snapshot_locked_optional(
self,
workspace_id: str,
snapshot_name: str,
) -> WorkspaceSnapshotRecord | None:
record_path = self._workspace_snapshot_metadata_path(workspace_id, snapshot_name)
if not record_path.exists():
return None
payload = json.loads(record_path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
raise RuntimeError(f"snapshot record at {record_path} is invalid")
return WorkspaceSnapshotRecord.from_payload(payload)
def _load_workspace_service_locked_optional(
self,
workspace_id: str,
service_name: str,
) -> WorkspaceServiceRecord | None:
record_path = self._workspace_service_record_path(workspace_id, service_name)
if not record_path.exists():
return None
payload = json.loads(record_path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
raise RuntimeError(f"service record at {record_path} is invalid")
return WorkspaceServiceRecord.from_payload(payload)
def _save_workspace_service_locked(self, service: WorkspaceServiceRecord) -> None:
record_path = self._workspace_service_record_path(
service.workspace_id,
service.service_name,
)
record_path.parent.mkdir(parents=True, exist_ok=True)
record_path.write_text(
json.dumps(service.to_payload(), indent=2, sort_keys=True),
encoding="utf-8",
)
def _save_workspace_snapshot_locked(self, snapshot: WorkspaceSnapshotRecord) -> None:
record_path = self._workspace_snapshot_metadata_path(
snapshot.workspace_id,
snapshot.snapshot_name,
)
record_path.parent.mkdir(parents=True, exist_ok=True)
record_path.write_text(
json.dumps(snapshot.to_payload(), indent=2, sort_keys=True),
encoding="utf-8",
)
def _delete_workspace_service_artifacts_locked(
self,
workspace_id: str,
service_name: str,
) -> None:
existing = self._load_workspace_service_locked_optional(workspace_id, service_name)
if existing is not None:
for published_port in existing.published_ports:
_stop_workspace_published_port_proxy(published_port)
self._workspace_service_record_path(workspace_id, service_name).unlink(missing_ok=True)
services_dir = self._workspace_services_dir(workspace_id)
_workspace_service_stdout_path(services_dir, service_name).unlink(missing_ok=True)
_workspace_service_stderr_path(services_dir, service_name).unlink(missing_ok=True)
_workspace_service_status_path(services_dir, service_name).unlink(missing_ok=True)
_workspace_service_runner_path(services_dir, service_name).unlink(missing_ok=True)
for ready_path in services_dir.glob(f"{service_name}.port-*.ready.json"):
ready_path.unlink(missing_ok=True)
def _delete_workspace_snapshot_locked(self, workspace_id: str, snapshot_name: str) -> None:
self._workspace_snapshot_metadata_path(workspace_id, snapshot_name).unlink(missing_ok=True)
self._workspace_snapshot_archive_path(workspace_id, snapshot_name).unlink(missing_ok=True)
def _list_workspace_services_locked(self, workspace_id: str) -> list[WorkspaceServiceRecord]:
services_dir = self._workspace_services_dir(workspace_id)
if not services_dir.exists():
return []
services: list[WorkspaceServiceRecord] = []
for record_path in sorted(services_dir.glob("*.json")):
payload = json.loads(record_path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
continue
services.append(WorkspaceServiceRecord.from_payload(payload))
return services
def _list_workspace_snapshots_locked(
self,
workspace: WorkspaceRecord,
) -> list[WorkspaceSnapshotRecord]:
snapshots_dir = self._workspace_snapshots_dir(workspace.workspace_id)
snapshots: list[WorkspaceSnapshotRecord] = [
self._workspace_baseline_snapshot_locked(workspace)
]
if not snapshots_dir.exists():
return snapshots
named_snapshots: list[WorkspaceSnapshotRecord] = []
for record_path in snapshots_dir.glob("*.json"):
payload = json.loads(record_path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
continue
named_snapshots.append(WorkspaceSnapshotRecord.from_payload(payload))
named_snapshots.sort(key=lambda item: (-item.created_at, item.snapshot_name))
snapshots.extend(named_snapshots)
return snapshots
def _save_workspace_shell_locked(self, shell: WorkspaceShellRecord) -> None:
record_path = self._workspace_shell_record_path(shell.workspace_id, shell.shell_id)
record_path.parent.mkdir(parents=True, exist_ok=True)
record_path.write_text(
json.dumps(shell.to_payload(), indent=2, sort_keys=True),
encoding="utf-8",
)
def _delete_workspace_shell_locked(self, workspace_id: str, shell_id: str) -> None:
record_path = self._workspace_shell_record_path(workspace_id, shell_id)
if record_path.exists():
record_path.unlink()
def _list_workspace_shells_locked(self, workspace_id: str) -> list[WorkspaceShellRecord]:
shells_dir = self._workspace_shells_dir(workspace_id)
if not shells_dir.exists():
return []
shells: list[WorkspaceShellRecord] = []
for record_path in sorted(shells_dir.glob("*.json")):
payload = json.loads(record_path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
continue
shells.append(WorkspaceShellRecord.from_payload(payload))
return shells
def _close_workspace_shells_locked(
self,
workspace: WorkspaceRecord,
instance: VmInstance,
) -> None:
for shell in self._list_workspace_shells_locked(workspace.workspace_id):
try:
self._backend.close_shell(
instance,
workspace_id=workspace.workspace_id,
shell_id=shell.shell_id,
)
except Exception:
pass
self._delete_workspace_shell_locked(workspace.workspace_id, shell.shell_id)
def _reset_workspace_runtime_dirs(self, workspace_id: str) -> None:
shutil.rmtree(self._workspace_runtime_dir(workspace_id), ignore_errors=True)
shutil.rmtree(self._workspace_host_dir(workspace_id), ignore_errors=True)
shutil.rmtree(self._workspace_commands_dir(workspace_id), ignore_errors=True)
shutil.rmtree(self._workspace_shells_dir(workspace_id), ignore_errors=True)
shutil.rmtree(self._workspace_services_dir(workspace_id), ignore_errors=True)
self._workspace_host_dir(workspace_id).mkdir(parents=True, exist_ok=True)
self._workspace_commands_dir(workspace_id).mkdir(parents=True, exist_ok=True)
self._workspace_shells_dir(workspace_id).mkdir(parents=True, exist_ok=True)
self._workspace_services_dir(workspace_id).mkdir(parents=True, exist_ok=True)
def _refresh_workspace_service_locked(
self,
workspace: WorkspaceRecord,
instance: VmInstance,
service: WorkspaceServiceRecord,
) -> WorkspaceServiceRecord:
payload = self._backend.status_service(
instance,
workspace_id=workspace.workspace_id,
service_name=service.service_name,
)
refreshed = self._workspace_service_record_from_payload(
workspace_id=workspace.workspace_id,
service_name=service.service_name,
payload=payload,
metadata=service.metadata,
published_ports=service.published_ports,
)
if refreshed.state != "running" and refreshed.published_ports:
refreshed.published_ports = [
WorkspacePublishedPortRecord(
guest_port=published_port.guest_port,
host_port=published_port.host_port,
host=published_port.host,
protocol=published_port.protocol,
proxy_pid=None,
)
for published_port in refreshed.published_ports
]
for published_port in service.published_ports:
_stop_workspace_published_port_proxy(published_port)
self._save_workspace_service_locked(refreshed)
return refreshed
def _refresh_workspace_services_locked(
self,
workspace: WorkspaceRecord,
instance: VmInstance,
) -> list[WorkspaceServiceRecord]:
services = self._list_workspace_services_locked(workspace.workspace_id)
refreshed: list[WorkspaceServiceRecord] = []
for service in services:
refreshed.append(self._refresh_workspace_service_locked(workspace, instance, service))
return refreshed
def _refresh_workspace_service_counts_locked(self, workspace: WorkspaceRecord) -> None:
services = self._list_workspace_services_locked(workspace.workspace_id)
if not services:
return
if workspace.state != "started":
changed = False
for service in services:
if service.state == "running":
for published_port in service.published_ports:
_stop_workspace_published_port_proxy(published_port)
service.state = "stopped"
service.stop_reason = "workspace_stopped"
service.ended_at = service.ended_at or time.time()
self._save_workspace_service_locked(service)
changed = True
if changed:
return
return
instance = workspace.to_instance(
workdir=self._workspace_runtime_dir(workspace.workspace_id)
)
self._require_workspace_service_support(instance)
self._refresh_workspace_services_locked(workspace, instance)
def _stop_workspace_services_locked(
self,
workspace: WorkspaceRecord,
instance: VmInstance,
) -> None:
for service in self._list_workspace_services_locked(workspace.workspace_id):
if workspace.state == "started":
try:
payload = self._backend.stop_service(
instance,
workspace_id=workspace.workspace_id,
service_name=service.service_name,
)
stopped = self._workspace_service_record_from_payload(
workspace_id=workspace.workspace_id,
service_name=service.service_name,
payload=payload,
metadata=service.metadata,
published_ports=service.published_ports,
)
self._save_workspace_service_locked(stopped)
except Exception:
pass
self._delete_workspace_service_artifacts_locked(
workspace.workspace_id,
service.service_name,
)