Add runtime capability scaffolding and align docs

This commit is contained in:
Thales Maciel 2026-03-05 22:57:09 -03:00
parent fb8b985049
commit cbf212bb7b
19 changed files with 1048 additions and 71 deletions

View file

@ -12,7 +12,15 @@ from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Literal
from pyro_mcp.runtime import RuntimePaths, resolve_runtime_paths
from pyro_mcp.runtime import (
RuntimeCapabilities,
RuntimePaths,
resolve_runtime_paths,
runtime_capabilities,
)
from pyro_mcp.vm_firecracker import build_launch_plan
from pyro_mcp.vm_guest import VsockExecClient
from pyro_mcp.vm_network import NetworkConfig, TapNetworkManager
from pyro_mcp.vm_profiles import get_profile, list_profiles, resolve_artifacts
VmState = Literal["created", "started", "stopped"]
@ -34,6 +42,7 @@ class VmInstance:
firecracker_pid: int | None = None
last_error: str | None = None
metadata: dict[str, str] = field(default_factory=dict)
network: NetworkConfig | None = None
@dataclass(frozen=True)
@ -119,10 +128,21 @@ class MockBackend(VmBackend):
class FirecrackerBackend(VmBackend): # pragma: no cover
"""Host-gated backend that validates Firecracker prerequisites."""
def __init__(self, artifacts_dir: Path, firecracker_bin: Path, jailer_bin: Path) -> None:
def __init__(
self,
artifacts_dir: Path,
firecracker_bin: Path,
jailer_bin: Path,
runtime_capabilities: RuntimeCapabilities,
network_manager: TapNetworkManager | None = None,
guest_exec_client: VsockExecClient | None = None,
) -> None:
self._artifacts_dir = artifacts_dir
self._firecracker_bin = firecracker_bin
self._jailer_bin = jailer_bin
self._runtime_capabilities = runtime_capabilities
self._network_manager = network_manager or TapNetworkManager()
self._guest_exec_client = guest_exec_client or VsockExecClient()
if not self._firecracker_bin.exists():
raise RuntimeError(f"bundled firecracker binary not found at {self._firecracker_bin}")
if not self._jailer_bin.exists():
@ -132,16 +152,29 @@ class FirecrackerBackend(VmBackend): # pragma: no cover
def create(self, instance: VmInstance) -> None:
instance.workdir.mkdir(parents=True, exist_ok=False)
artifacts = resolve_artifacts(self._artifacts_dir, instance.profile)
if not artifacts.kernel_image.exists() or not artifacts.rootfs_image.exists():
raise RuntimeError(
f"missing profile artifacts for {instance.profile}; expected "
f"{artifacts.kernel_image} and {artifacts.rootfs_image}"
)
instance.metadata["kernel_image"] = str(artifacts.kernel_image)
instance.metadata["rootfs_image"] = str(artifacts.rootfs_image)
try:
artifacts = resolve_artifacts(self._artifacts_dir, instance.profile)
if not artifacts.kernel_image.exists() or not artifacts.rootfs_image.exists():
raise RuntimeError(
f"missing profile artifacts for {instance.profile}; expected "
f"{artifacts.kernel_image} and {artifacts.rootfs_image}"
)
instance.metadata["kernel_image"] = str(artifacts.kernel_image)
instance.metadata["rootfs_image"] = str(artifacts.rootfs_image)
network = self._network_manager.allocate(instance.vm_id)
instance.network = network
instance.metadata.update(self._network_manager.to_metadata(network))
except Exception:
shutil.rmtree(instance.workdir, ignore_errors=True)
raise
def start(self, instance: VmInstance) -> None:
launch_plan = build_launch_plan(instance)
instance.metadata["firecracker_config_path"] = str(launch_plan.config_path)
instance.metadata["guest_network_path"] = str(launch_plan.guest_network_path)
instance.metadata["guest_exec_path"] = str(launch_plan.guest_exec_path)
instance.metadata["guest_cid"] = str(launch_plan.guest_cid)
instance.metadata["guest_exec_port"] = str(launch_plan.vsock_port)
proc = subprocess.run( # noqa: S603
[str(self._firecracker_bin), "--version"],
text=True,
@ -152,15 +185,35 @@ class FirecrackerBackend(VmBackend): # pragma: no cover
raise RuntimeError(f"firecracker startup preflight failed: {proc.stderr.strip()}")
instance.metadata["firecracker_version"] = proc.stdout.strip()
instance.metadata["jailer_path"] = str(self._jailer_bin)
if not self._runtime_capabilities.supports_vm_boot:
instance.metadata["execution_mode"] = "host_compat"
instance.metadata["boot_mode"] = "shim"
if self._runtime_capabilities.reason is not None:
instance.metadata["runtime_reason"] = self._runtime_capabilities.reason
return
instance.metadata["execution_mode"] = "guest_vsock"
instance.metadata["boot_mode"] = "native"
def exec(self, instance: VmInstance, command: str, timeout_seconds: int) -> VmExecResult:
# Temporary compatibility path until guest-side execution agent is integrated.
if self._runtime_capabilities.supports_guest_exec:
guest_cid = int(instance.metadata["guest_cid"])
port = int(instance.metadata["guest_exec_port"])
response = self._guest_exec_client.exec(guest_cid, port, command, timeout_seconds)
return VmExecResult(
stdout=response.stdout,
stderr=response.stderr,
exit_code=response.exit_code,
duration_ms=response.duration_ms,
)
instance.metadata["execution_mode"] = "host_compat"
return _run_host_command(instance.workdir, command, timeout_seconds)
def stop(self, instance: VmInstance) -> None:
del instance
def delete(self, instance: VmInstance) -> None:
if instance.network is not None:
self._network_manager.cleanup(instance.network)
shutil.rmtree(instance.workdir, ignore_errors=True)
@ -182,6 +235,7 @@ class VmManager:
artifacts_dir: Path | None = None,
max_active_vms: int = 4,
runtime_paths: RuntimePaths | None = None,
network_manager: TapNetworkManager | None = None,
) -> None:
self._backend_name = backend_name or "firecracker"
self._base_dir = base_dir or Path("/tmp/pyro-mcp")
@ -189,11 +243,19 @@ class VmManager:
if self._backend_name == "firecracker":
self._runtime_paths = self._runtime_paths or resolve_runtime_paths()
self._artifacts_dir = artifacts_dir or self._runtime_paths.artifacts_dir
self._runtime_capabilities = runtime_capabilities(self._runtime_paths)
else:
self._artifacts_dir = artifacts_dir or Path(
os.environ.get("PYRO_VM_ARTIFACTS_DIR", "/opt/pyro-mcp/artifacts")
)
self._runtime_capabilities = RuntimeCapabilities(
supports_vm_boot=False,
supports_guest_exec=False,
supports_guest_network=False,
reason="mock backend does not boot a guest",
)
self._max_active_vms = max_active_vms
self._network_manager = network_manager or TapNetworkManager()
self._lock = threading.Lock()
self._instances: dict[str, VmInstance] = {}
self._base_dir.mkdir(parents=True, exist_ok=True)
@ -209,6 +271,8 @@ class VmManager:
self._artifacts_dir,
firecracker_bin=self._runtime_paths.firecracker_bin,
jailer_bin=self._runtime_paths.jailer_bin,
runtime_capabilities=self._runtime_capabilities,
network_manager=self._network_manager,
)
raise ValueError("invalid backend; expected one of: mock, firecracker")
@ -262,6 +326,7 @@ class VmManager:
if instance.state != "started":
raise RuntimeError(f"vm {vm_id} must be in 'started' state before vm_exec")
exec_result = self._backend.exec(instance, command, timeout_seconds)
execution_mode = instance.metadata.get("execution_mode", "host_compat")
cleanup = self.delete_vm(vm_id, reason="post_exec_cleanup")
return {
"vm_id": vm_id,
@ -270,6 +335,7 @@ class VmManager:
"stderr": exec_result.stderr,
"exit_code": exec_result.exit_code,
"duration_ms": exec_result.duration_ms,
"execution_mode": execution_mode,
"cleanup": cleanup,
}
@ -296,6 +362,19 @@ class VmManager:
self._ensure_not_expired_locked(instance, time.time())
return self._serialize(instance)
def network_info_vm(self, vm_id: str) -> dict[str, Any]:
with self._lock:
instance = self._get_instance_locked(vm_id)
self._ensure_not_expired_locked(instance, time.time())
if instance.network is None:
return {
"vm_id": vm_id,
"network_enabled": False,
"outbound_connectivity_expected": False,
"reason": "network configuration is unavailable for this VM",
}
return {"vm_id": vm_id, **self._network_manager.network_info(instance.network)}
def reap_expired(self) -> dict[str, Any]:
now = time.time()
with self._lock:
@ -331,6 +410,10 @@ class VmManager:
"created_at": instance.created_at,
"expires_at": instance.expires_at,
"state": instance.state,
"network_enabled": instance.network is not None,
"guest_ip": instance.network.guest_ip if instance.network is not None else None,
"tap_name": instance.network.tap_name if instance.network is not None else None,
"execution_mode": instance.metadata.get("execution_mode", "host_compat"),
"metadata": instance.metadata,
}