Enable real guest networking and make demos network-first
This commit is contained in:
parent
c43c718c83
commit
b01efa6452
14 changed files with 618 additions and 72 deletions
|
|
@ -4,6 +4,7 @@ from __future__ import annotations
|
|||
|
||||
import os
|
||||
import shutil
|
||||
import signal
|
||||
import subprocess
|
||||
import threading
|
||||
import time
|
||||
|
|
@ -143,6 +144,7 @@ class FirecrackerBackend(VmBackend): # pragma: no cover
|
|||
self._runtime_capabilities = runtime_capabilities
|
||||
self._network_manager = network_manager or TapNetworkManager()
|
||||
self._guest_exec_client = guest_exec_client or VsockExecClient()
|
||||
self._processes: dict[str, subprocess.Popen[str]] = {}
|
||||
if not self._firecracker_bin.exists():
|
||||
raise RuntimeError(f"bundled firecracker binary not found at {self._firecracker_bin}")
|
||||
if not self._jailer_bin.exists():
|
||||
|
|
@ -160,10 +162,16 @@ class FirecrackerBackend(VmBackend): # pragma: no cover
|
|||
f"{artifacts.kernel_image} and {artifacts.rootfs_image}"
|
||||
)
|
||||
instance.metadata["kernel_image"] = str(artifacts.kernel_image)
|
||||
instance.metadata["rootfs_image"] = str(artifacts.rootfs_image)
|
||||
network = self._network_manager.allocate(instance.vm_id)
|
||||
instance.network = network
|
||||
instance.metadata.update(self._network_manager.to_metadata(network))
|
||||
rootfs_copy = instance.workdir / "rootfs.ext4"
|
||||
shutil.copy2(artifacts.rootfs_image, rootfs_copy)
|
||||
instance.metadata["rootfs_image"] = str(rootfs_copy)
|
||||
if self._network_manager.enabled:
|
||||
network = self._network_manager.allocate(instance.vm_id)
|
||||
instance.network = network
|
||||
instance.metadata.update(self._network_manager.to_metadata(network))
|
||||
else:
|
||||
instance.network = None
|
||||
instance.metadata["network_enabled"] = "false"
|
||||
except Exception:
|
||||
shutil.rmtree(instance.workdir, ignore_errors=True)
|
||||
raise
|
||||
|
|
@ -175,6 +183,12 @@ class FirecrackerBackend(VmBackend): # pragma: no cover
|
|||
instance.metadata["guest_exec_path"] = str(launch_plan.guest_exec_path)
|
||||
instance.metadata["guest_cid"] = str(launch_plan.guest_cid)
|
||||
instance.metadata["guest_exec_port"] = str(launch_plan.vsock_port)
|
||||
instance.metadata["guest_exec_uds_path"] = str(instance.workdir / "vsock.sock")
|
||||
serial_log_path = instance.workdir / "serial.log"
|
||||
firecracker_log_path = instance.workdir / "firecracker.log"
|
||||
firecracker_log_path.touch()
|
||||
instance.metadata["serial_log_path"] = str(serial_log_path)
|
||||
instance.metadata["firecracker_log_path"] = str(firecracker_log_path)
|
||||
proc = subprocess.run( # noqa: S603
|
||||
[str(self._firecracker_bin), "--version"],
|
||||
text=True,
|
||||
|
|
@ -191,14 +205,60 @@ class FirecrackerBackend(VmBackend): # pragma: no cover
|
|||
if self._runtime_capabilities.reason is not None:
|
||||
instance.metadata["runtime_reason"] = self._runtime_capabilities.reason
|
||||
return
|
||||
instance.metadata["execution_mode"] = "guest_vsock"
|
||||
with serial_log_path.open("w", encoding="utf-8") as serial_fp:
|
||||
process = subprocess.Popen( # noqa: S603
|
||||
[
|
||||
str(self._firecracker_bin),
|
||||
"--no-api",
|
||||
"--config-file",
|
||||
str(launch_plan.config_path),
|
||||
"--log-path",
|
||||
str(firecracker_log_path),
|
||||
"--level",
|
||||
"Info",
|
||||
],
|
||||
stdout=serial_fp,
|
||||
stderr=subprocess.STDOUT,
|
||||
text=True,
|
||||
)
|
||||
self._processes[instance.vm_id] = process
|
||||
time.sleep(2)
|
||||
if process.poll() is not None:
|
||||
serial_log = serial_log_path.read_text(encoding="utf-8", errors="ignore")
|
||||
firecracker_log = firecracker_log_path.read_text(encoding="utf-8", errors="ignore")
|
||||
self._processes.pop(instance.vm_id, None)
|
||||
raise RuntimeError(
|
||||
"firecracker microVM exited during startup: "
|
||||
f"{(serial_log or firecracker_log).strip()}"
|
||||
)
|
||||
instance.firecracker_pid = process.pid
|
||||
instance.metadata["execution_mode"] = (
|
||||
"guest_vsock" if self._runtime_capabilities.supports_guest_exec else "host_compat"
|
||||
)
|
||||
instance.metadata["boot_mode"] = "native"
|
||||
|
||||
def exec(self, instance: VmInstance, command: str, timeout_seconds: int) -> VmExecResult:
|
||||
if self._runtime_capabilities.supports_guest_exec:
|
||||
guest_cid = int(instance.metadata["guest_cid"])
|
||||
port = int(instance.metadata["guest_exec_port"])
|
||||
response = self._guest_exec_client.exec(guest_cid, port, command, timeout_seconds)
|
||||
uds_path = instance.metadata.get("guest_exec_uds_path")
|
||||
deadline = time.monotonic() + min(timeout_seconds, 10)
|
||||
while True:
|
||||
try:
|
||||
response = self._guest_exec_client.exec(
|
||||
guest_cid,
|
||||
port,
|
||||
command,
|
||||
timeout_seconds,
|
||||
uds_path=uds_path,
|
||||
)
|
||||
break
|
||||
except (OSError, RuntimeError) as exc:
|
||||
if time.monotonic() >= deadline:
|
||||
raise RuntimeError(
|
||||
f"guest exec transport did not become ready: {exc}"
|
||||
) from exc
|
||||
time.sleep(0.2)
|
||||
return VmExecResult(
|
||||
stdout=response.stdout,
|
||||
stderr=response.stderr,
|
||||
|
|
@ -209,9 +269,36 @@ class FirecrackerBackend(VmBackend): # pragma: no cover
|
|||
return _run_host_command(instance.workdir, command, timeout_seconds)
|
||||
|
||||
def stop(self, instance: VmInstance) -> None:
|
||||
del instance
|
||||
process = self._processes.pop(instance.vm_id, None)
|
||||
if process is not None:
|
||||
process.terminate()
|
||||
try:
|
||||
process.wait(timeout=5)
|
||||
except subprocess.TimeoutExpired:
|
||||
process.kill()
|
||||
process.wait(timeout=5)
|
||||
instance.firecracker_pid = None
|
||||
return
|
||||
if instance.firecracker_pid is None:
|
||||
return
|
||||
try:
|
||||
os.kill(instance.firecracker_pid, signal.SIGTERM)
|
||||
except ProcessLookupError:
|
||||
instance.firecracker_pid = None
|
||||
return
|
||||
deadline = time.monotonic() + 5
|
||||
while time.monotonic() < deadline:
|
||||
try:
|
||||
os.kill(instance.firecracker_pid, 0)
|
||||
except ProcessLookupError:
|
||||
instance.firecracker_pid = None
|
||||
return
|
||||
time.sleep(0.1)
|
||||
os.kill(instance.firecracker_pid, signal.SIGKILL)
|
||||
instance.firecracker_pid = None
|
||||
|
||||
def delete(self, instance: VmInstance) -> None:
|
||||
self._processes.pop(instance.vm_id, None)
|
||||
if instance.network is not None:
|
||||
self._network_manager.cleanup(instance.network)
|
||||
shutil.rmtree(instance.workdir, ignore_errors=True)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue