Enable real guest networking and make demos network-first

This commit is contained in:
Thales Maciel 2026-03-06 22:47:16 -03:00
parent c43c718c83
commit b01efa6452
14 changed files with 618 additions and 72 deletions

View file

@ -14,8 +14,11 @@ This repository ships `pyro-mcp`, an MCP-compatible package for ephemeral VM lif
- Use `make runtime-bundle` to regenerate the packaged runtime bundle from `runtime_sources/`. - Use `make runtime-bundle` to regenerate the packaged runtime bundle from `runtime_sources/`.
- Use `make runtime-materialize` to build real runtime inputs into `build/runtime_sources/`. - Use `make runtime-materialize` to build real runtime inputs into `build/runtime_sources/`.
- Use `make runtime-fetch-binaries`, `make runtime-build-kernel-real`, and `make runtime-build-rootfs-real` if you need to debug the real-source pipeline step by step. - Use `make runtime-fetch-binaries`, `make runtime-build-kernel-real`, and `make runtime-build-rootfs-real` if you need to debug the real-source pipeline step by step.
- Use `make runtime-boot-check` to run a direct Firecracker boot validation against the bundled runtime artifacts.
- Use `make runtime-network-check` to validate outbound internet access from inside the guest.
- Use `make demo` to validate deterministic VM lifecycle execution. - Use `make demo` to validate deterministic VM lifecycle execution.
- Use `make ollama-demo` to validate model-triggered lifecycle tool usage. - Use `make network-demo` to validate deterministic VM lifecycle execution with guest networking enabled.
- Use `make ollama-demo` to validate model-triggered lifecycle tool usage with guest networking enabled.
- Use `make doctor` to inspect bundled runtime integrity and host prerequisites. - Use `make doctor` to inspect bundled runtime integrity and host prerequisites.
- If you need full log payloads from the Ollama demo, use `make ollama-demo OLLAMA_DEMO_FLAGS=-v`. - If you need full log payloads from the Ollama demo, use `make ollama-demo OLLAMA_DEMO_FLAGS=-v`.
@ -32,7 +35,7 @@ These checks run in pre-commit hooks and should all pass locally.
- Public factory: `pyro_mcp.create_server()` - Public factory: `pyro_mcp.create_server()`
- Runtime diagnostics CLI: `pyro-mcp-doctor` - Runtime diagnostics CLI: `pyro-mcp-doctor`
- Runtime bundle build CLI: `pyro-mcp-runtime-build` - Runtime bundle build CLI: `pyro-mcp-runtime-build`
- Current bundled runtime is shim-based unless replaced with a real guest-capable bundle; check `make doctor` for runtime capabilities. - Current bundled runtime is guest-capable for VM boot, guest exec, and guest networking; check `make doctor` for runtime capabilities.
- Lifecycle tools: - Lifecycle tools:
- `vm_list_profiles` - `vm_list_profiles`
- `vm_create` - `vm_create`

View file

@ -8,7 +8,7 @@ RUNTIME_BUILD_DIR ?= build/runtime_bundle
RUNTIME_BUNDLE_DIR ?= src/pyro_mcp/runtime_bundle RUNTIME_BUNDLE_DIR ?= src/pyro_mcp/runtime_bundle
RUNTIME_MATERIALIZED_DIR ?= build/runtime_sources RUNTIME_MATERIALIZED_DIR ?= build/runtime_sources
.PHONY: setup lint format typecheck test check demo doctor ollama ollama-demo run-server install-hooks runtime-bundle runtime-binaries runtime-kernel runtime-rootfs runtime-agent runtime-validate runtime-manifest runtime-sync runtime-clean runtime-fetch-binaries runtime-build-kernel-real runtime-build-rootfs-real runtime-materialize .PHONY: setup lint format typecheck test check demo network-demo doctor ollama ollama-demo run-server install-hooks runtime-bundle runtime-binaries runtime-kernel runtime-rootfs runtime-agent runtime-validate runtime-manifest runtime-sync runtime-clean runtime-fetch-binaries runtime-build-kernel-real runtime-build-rootfs-real runtime-materialize runtime-boot-check runtime-network-check
setup: setup:
uv sync --dev uv sync --dev
@ -30,13 +30,16 @@ check: lint typecheck test
demo: demo:
uv run python examples/static_tool_demo.py uv run python examples/static_tool_demo.py
network-demo:
PYRO_VM_ENABLE_NETWORK=1 uv run python examples/static_tool_demo.py
doctor: doctor:
uv run pyro-mcp-doctor uv run pyro-mcp-doctor
ollama: ollama-demo ollama: ollama-demo
ollama-demo: ollama-demo:
uv run pyro-mcp-ollama-demo --base-url "$(OLLAMA_BASE_URL)" --model "$(OLLAMA_MODEL)" $(OLLAMA_DEMO_FLAGS) PYRO_VM_ENABLE_NETWORK=1 uv run pyro-mcp-ollama-demo --base-url "$(OLLAMA_BASE_URL)" --model "$(OLLAMA_MODEL)" $(OLLAMA_DEMO_FLAGS)
run-server: run-server:
uv run pyro-mcp-server uv run pyro-mcp-server
@ -80,5 +83,11 @@ runtime-build-rootfs-real:
runtime-materialize: runtime-materialize:
uv run pyro-mcp-runtime-build materialize --platform "$(RUNTIME_PLATFORM)" --source-dir "$(RUNTIME_SOURCE_DIR)" --build-dir "$(RUNTIME_BUILD_DIR)" --bundle-dir "$(RUNTIME_BUNDLE_DIR)" --materialized-dir "$(RUNTIME_MATERIALIZED_DIR)" uv run pyro-mcp-runtime-build materialize --platform "$(RUNTIME_PLATFORM)" --source-dir "$(RUNTIME_SOURCE_DIR)" --build-dir "$(RUNTIME_BUILD_DIR)" --bundle-dir "$(RUNTIME_BUNDLE_DIR)" --materialized-dir "$(RUNTIME_MATERIALIZED_DIR)"
runtime-boot-check:
uv run pyro-mcp-runtime-boot-check
runtime-network-check:
uv run pyro-mcp-runtime-network-check
runtime-clean: runtime-clean:
rm -rf "$(RUNTIME_BUILD_DIR)" "$(RUNTIME_MATERIALIZED_DIR)" rm -rf "$(RUNTIME_BUILD_DIR)" "$(RUNTIME_MATERIALIZED_DIR)"

View file

@ -22,11 +22,11 @@ The package includes a bundled Linux x86_64 runtime payload:
No system Firecracker installation is required for basic usage. No system Firecracker installation is required for basic usage.
Current limitation: Current status:
- The bundled runtime is currently shim-based. - The bundled runtime is real, not shim-based.
- `doctor` reports runtime capabilities, and current bundles report no real guest boot, no guest exec agent, and no guest networking. - `doctor` reports real guest capability flags for VM boot, guest exec, and guest networking.
- Until a real guest-capable bundle is installed, `vm_exec` runs in `host_compat` mode rather than `guest_vsock`. - `vm_exec` now runs in `guest_vsock` mode when the VM is started from the bundled runtime.
- This means demo commands can exercise lifecycle/control-plane behavior, but they are not yet proof of command execution inside a real VM guest. - Networking still requires host privileges for TAP/NAT setup; see the networking section below.
Host requirements still apply: Host requirements still apply:
- Linux host - Linux host
@ -68,12 +68,13 @@ Available real-runtime targets:
- `make runtime-build-kernel-real` - `make runtime-build-kernel-real`
- `make runtime-build-rootfs-real` - `make runtime-build-rootfs-real`
- `make runtime-materialize` - `make runtime-materialize`
- `make runtime-boot-check`
- `make runtime-network-check`
Current limitation: Notes:
- the pipeline is real, but the checked-in source artifacts in `runtime_sources/` are still shim/placeholder inputs
- the real-source path depends on `docker`, outbound access to GitHub and Debian snapshot mirrors, and enough disk for kernel/rootfs builds - the real-source path depends on `docker`, outbound access to GitHub and Debian snapshot mirrors, and enough disk for kernel/rootfs builds
- replacing those inputs with real Firecracker binaries, a real kernel, and real rootfs images is what upgrades the packaged bundle from `host_compat` to true guest execution - `make runtime-boot-check` validates that the bundled runtime can boot a real microVM
- the next artifact-replacement steps are documented in `runtime_sources/README.md` - `make runtime-network-check` validates outbound internet access from inside the guest by cloning `https://github.com/octocat/Hello-World.git`
## Run deterministic lifecycle demo ## Run deterministic lifecycle demo
@ -82,8 +83,14 @@ make demo
``` ```
The demo creates a VM, starts it, runs a command, and returns structured output. The demo creates a VM, starts it, runs a command, and returns structured output.
If the runtime reports `guest_vsock` plus networking, it uses an internet probe. If the VM was started with networking enabled, it uses an internet probe.
Otherwise it falls back to a local compatibility command and the result will report `execution_mode=host_compat`. Otherwise it runs `git --version`.
To run the deterministic demo with guest networking enabled:
```bash
make network-demo
```
## Runtime doctor ## Runtime doctor
@ -100,12 +107,16 @@ This prints bundled runtime paths, profile availability, checksum validation sta
- Host TAP/NAT setup is opt-in with: - Host TAP/NAT setup is opt-in with:
```bash ```bash
PYRO_VM_ENABLE_NETWORK=1 make doctor PYRO_VM_ENABLE_NETWORK=1 make demo
``` ```
- Current limitation: - Network setup requires host privilege to manage TAP/NAT state.
- network metadata and host preflight exist - The current implementation auto-uses `sudo -n` for `ip`, `nft`, and `iptables` commands when available.
- real in-guest outbound networking still depends on a non-shim runtime bundle with real guest boot and guest exec support - To validate real guest egress directly:
```bash
make runtime-network-check
```
## Run Ollama lifecycle demo ## Run Ollama lifecycle demo
@ -117,6 +128,7 @@ make ollama-demo
Defaults are configured in `Makefile`. Defaults are configured in `Makefile`.
The demo streams lifecycle progress logs and ends with a short text summary. The demo streams lifecycle progress logs and ends with a short text summary.
`make ollama-demo` now enables guest networking by default.
The command it asks the model to run is a small public repository clone: The command it asks the model to run is a small public repository clone:
```bash ```bash

View file

@ -16,6 +16,9 @@ pyro-mcp-server = "pyro_mcp.server:main"
pyro-mcp-demo = "pyro_mcp.demo:main" pyro-mcp-demo = "pyro_mcp.demo:main"
pyro-mcp-ollama-demo = "pyro_mcp.ollama_demo:main" pyro-mcp-ollama-demo = "pyro_mcp.ollama_demo:main"
pyro-mcp-doctor = "pyro_mcp.doctor:main" pyro-mcp-doctor = "pyro_mcp.doctor:main"
pyro-mcp-runtime-build = "pyro_mcp.runtime_build:main"
pyro-mcp-runtime-boot-check = "pyro_mcp.runtime_boot_check:main"
pyro-mcp-runtime-network-check = "pyro_mcp.runtime_network_check:main"
[build-system] [build-system]
requires = ["hatchling"] requires = ["hatchling"]
@ -30,6 +33,7 @@ packages = ["src/pyro_mcp"]
[tool.hatch.build.targets.sdist] [tool.hatch.build.targets.sdist]
include = [ include = [
"src/pyro_mcp/runtime_bundle/**", "src/pyro_mcp/runtime_bundle/**",
"runtime_sources/**",
"src/pyro_mcp/**/*.py", "src/pyro_mcp/**/*.py",
"README.md", "README.md",
"AGENTS.md", "AGENTS.md",

View file

@ -3,8 +3,8 @@
Source-of-truth inputs for `make runtime-bundle`. Source-of-truth inputs for `make runtime-bundle`.
Current state: Current state:
- `bin/firecracker` and `bin/jailer` are shim placeholders. - `build/runtime_sources/` contains the real materialized runtime inputs used to build the packaged bundle.
- profile kernels and rootfs images are placeholder files. - the checked-in tracked files under `runtime_sources/linux-x86_64/` are build recipes and lock metadata, not the materialized binaries/images.
- `guest/pyro_guest_agent.py` is the guest agent artifact that should ultimately be installed into each real rootfs. - `guest/pyro_guest_agent.py` is the guest agent artifact that should ultimately be installed into each real rootfs.
- real source materialization now writes into `build/runtime_sources/`, not back into the tracked placeholder files. - real source materialization now writes into `build/runtime_sources/`, not back into the tracked placeholder files.
@ -19,11 +19,11 @@ Build requirements for the real path:
- outbound network access to GitHub and Debian snapshot mirrors - outbound network access to GitHub and Debian snapshot mirrors
- enough disk for a kernel build plus 2G ext4 images per profile - enough disk for a kernel build plus 2G ext4 images per profile
Next steps to make the bundle guest-capable: Current status:
1. Replace shim binaries with pinned official Firecracker and Jailer release artifacts. 1. Firecracker and Jailer are materialized from pinned official release artifacts.
2. Replace placeholder `vmlinux` and `rootfs.ext4` files with real, bootable artifacts for each profile. 2. The kernel and rootfs images are built from pinned inputs into `build/runtime_sources/`.
3. Ensure the guest agent is installed and enabled inside every rootfs so the host can use vsock exec. 3. The guest agent is installed into each rootfs and used for vsock exec.
4. Once the source artifacts are real, update `runtime.lock.json` component versions and flip capability flags from `false` to `true`. 4. `runtime.lock.json` now advertises real guest capabilities.
Safety rule: Safety rule:
- The build pipeline should never emit `vm_boot=true`, `guest_exec=true`, or `guest_network=true` while any source artifact is still a shim or placeholder. - The build pipeline should never emit `vm_boot=true`, `guest_exec=true`, or `guest_network=true` while any source artifact is still a shim or placeholder.

View file

@ -9,9 +9,9 @@
"base_distro": "debian-bookworm-20250210" "base_distro": "debian-bookworm-20250210"
}, },
"capabilities": { "capabilities": {
"vm_boot": false, "vm_boot": true,
"guest_exec": false, "guest_exec": true,
"guest_network": false "guest_network": true
}, },
"binaries": { "binaries": {
"firecracker": "bin/firecracker", "firecracker": "bin/firecracker",

View file

@ -9,6 +9,7 @@ import shutil
import subprocess import subprocess
import tarfile import tarfile
import urllib.request import urllib.request
import uuid
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
@ -307,6 +308,7 @@ def materialize_rootfs(
packages_path = paths.source_platform_root / raw_packages_path packages_path = paths.source_platform_root / raw_packages_path
output_path = paths.materialized_platform_root / profile["rootfs"] output_path = paths.materialized_platform_root / profile["rootfs"]
output_path.parent.mkdir(parents=True, exist_ok=True) output_path.parent.mkdir(parents=True, exist_ok=True)
profile_workdir = workdir / f"{profile_name}-{uuid.uuid4().hex[:8]}"
_run( _run(
[ [
str(script_path), str(script_path),
@ -325,7 +327,7 @@ def materialize_rootfs(
"--agent-service", "--agent-service",
str(service_path), str(service_path),
"--workdir", "--workdir",
str(workdir / profile_name), str(profile_workdir),
"--output", "--output",
str(output_path), str(output_path),
] ]

View file

@ -0,0 +1,96 @@
"""Direct guest-network validation for a bundled runtime profile."""
from __future__ import annotations
import argparse
from dataclasses import dataclass
from pathlib import Path
from pyro_mcp.vm_manager import VmManager
from pyro_mcp.vm_network import TapNetworkManager
NETWORK_CHECK_COMMAND = (
"rm -rf hello-world "
"&& git clone --depth 1 https://github.com/octocat/Hello-World.git hello-world >/dev/null "
"&& git -C hello-world rev-parse --is-inside-work-tree"
)
@dataclass(frozen=True)
class NetworkCheckResult:
vm_id: str
execution_mode: str
network_enabled: bool
exit_code: int
stdout: str
stderr: str
cleanup: dict[str, object]
def run_network_check(
*,
profile: str = "debian-git",
vcpu_count: int = 1,
mem_mib: int = 1024,
ttl_seconds: int = 600,
timeout_seconds: int = 120,
base_dir: Path | None = None,
) -> NetworkCheckResult: # pragma: no cover - integration helper
manager = VmManager(
base_dir=base_dir,
network_manager=TapNetworkManager(enabled=True),
)
created = manager.create_vm(
profile=profile,
vcpu_count=vcpu_count,
mem_mib=mem_mib,
ttl_seconds=ttl_seconds,
)
vm_id = str(created["vm_id"])
manager.start_vm(vm_id)
status = manager.status_vm(vm_id)
executed = manager.exec_vm(
vm_id,
command=NETWORK_CHECK_COMMAND,
timeout_seconds=timeout_seconds,
)
return NetworkCheckResult(
vm_id=vm_id,
execution_mode=str(executed["execution_mode"]),
network_enabled=bool(status["network_enabled"]),
exit_code=int(executed["exit_code"]),
stdout=str(executed["stdout"]),
stderr=str(executed["stderr"]),
cleanup=dict(executed["cleanup"]),
)
def main() -> None: # pragma: no cover - CLI wiring
parser = argparse.ArgumentParser(description="Run a guest networking check.")
parser.add_argument("--profile", default="debian-git")
parser.add_argument("--vcpu-count", type=int, default=1)
parser.add_argument("--mem-mib", type=int, default=1024)
parser.add_argument("--ttl-seconds", type=int, default=600)
parser.add_argument("--timeout-seconds", type=int, default=120)
args = parser.parse_args()
result = run_network_check(
profile=args.profile,
vcpu_count=args.vcpu_count,
mem_mib=args.mem_mib,
ttl_seconds=args.ttl_seconds,
timeout_seconds=args.timeout_seconds,
)
print(f"[network] vm_id={result.vm_id}")
print(f"[network] execution_mode={result.execution_mode}")
print(f"[network] network_enabled={result.network_enabled}")
print(f"[network] exit_code={result.exit_code}")
if result.exit_code == 0 and result.stdout.strip() == "true":
print("[network] result=success")
return
print("[network] result=failure")
if result.stdout.strip():
print(f"[network] stdout={result.stdout.strip()}")
if result.stderr.strip():
print(f"[network] stderr={result.stderr.strip()}")
raise SystemExit(1)

View file

@ -5,13 +5,13 @@ from __future__ import annotations
import json import json
import socket import socket
from dataclasses import dataclass from dataclasses import dataclass
from typing import Callable, Protocol from typing import Any, Callable, Protocol
class SocketLike(Protocol): class SocketLike(Protocol):
def settimeout(self, timeout: int) -> None: ... def settimeout(self, timeout: int) -> None: ...
def connect(self, address: tuple[int, int]) -> None: ... def connect(self, address: Any) -> None: ...
def sendall(self, data: bytes) -> None: ... def sendall(self, data: bytes) -> None: ...
@ -38,19 +38,35 @@ class VsockExecClient:
self._socket_factory = socket_factory or socket.socket self._socket_factory = socket_factory or socket.socket
def exec( def exec(
self, guest_cid: int, port: int, command: str, timeout_seconds: int self,
guest_cid: int,
port: int,
command: str,
timeout_seconds: int,
*,
uds_path: str | None = None,
) -> GuestExecResponse: ) -> GuestExecResponse:
request = { request = {
"command": command, "command": command,
"timeout_seconds": timeout_seconds, "timeout_seconds": timeout_seconds,
} }
family = getattr(socket, "AF_VSOCK", None) family = getattr(socket, "AF_VSOCK", None)
if family is None: if family is not None:
raise RuntimeError("vsock sockets are not supported on this host Python runtime")
sock = self._socket_factory(family, socket.SOCK_STREAM) sock = self._socket_factory(family, socket.SOCK_STREAM)
connect_address: Any = (guest_cid, port)
elif uds_path is not None:
sock = self._socket_factory(socket.AF_UNIX, socket.SOCK_STREAM)
connect_address = uds_path
else:
raise RuntimeError("vsock sockets are not supported on this host Python runtime")
try: try:
sock.settimeout(timeout_seconds) sock.settimeout(timeout_seconds)
sock.connect((guest_cid, port)) sock.connect(connect_address)
if family is None:
sock.sendall(f"CONNECT {port}\n".encode("utf-8"))
status = self._recv_line(sock)
if not status.startswith("OK "):
raise RuntimeError(f"vsock unix bridge rejected port {port}: {status.strip()}")
sock.sendall((json.dumps(request) + "\n").encode("utf-8")) sock.sendall((json.dumps(request) + "\n").encode("utf-8"))
chunks: list[bytes] = [] chunks: list[bytes] = []
while True: while True:
@ -70,3 +86,15 @@ class VsockExecClient:
exit_code=int(payload.get("exit_code", -1)), exit_code=int(payload.get("exit_code", -1)),
duration_ms=int(payload.get("duration_ms", 0)), duration_ms=int(payload.get("duration_ms", 0)),
) )
@staticmethod
def _recv_line(sock: SocketLike) -> str:
chunks: list[bytes] = []
while True:
data = sock.recv(1)
if data == b"":
break
chunks.append(data)
if data == b"\n":
break
return b"".join(chunks).decode("utf-8", errors="replace")

View file

@ -4,6 +4,7 @@ from __future__ import annotations
import os import os
import shutil import shutil
import signal
import subprocess import subprocess
import threading import threading
import time import time
@ -143,6 +144,7 @@ class FirecrackerBackend(VmBackend): # pragma: no cover
self._runtime_capabilities = runtime_capabilities self._runtime_capabilities = runtime_capabilities
self._network_manager = network_manager or TapNetworkManager() self._network_manager = network_manager or TapNetworkManager()
self._guest_exec_client = guest_exec_client or VsockExecClient() self._guest_exec_client = guest_exec_client or VsockExecClient()
self._processes: dict[str, subprocess.Popen[str]] = {}
if not self._firecracker_bin.exists(): if not self._firecracker_bin.exists():
raise RuntimeError(f"bundled firecracker binary not found at {self._firecracker_bin}") raise RuntimeError(f"bundled firecracker binary not found at {self._firecracker_bin}")
if not self._jailer_bin.exists(): if not self._jailer_bin.exists():
@ -160,10 +162,16 @@ class FirecrackerBackend(VmBackend): # pragma: no cover
f"{artifacts.kernel_image} and {artifacts.rootfs_image}" f"{artifacts.kernel_image} and {artifacts.rootfs_image}"
) )
instance.metadata["kernel_image"] = str(artifacts.kernel_image) instance.metadata["kernel_image"] = str(artifacts.kernel_image)
instance.metadata["rootfs_image"] = str(artifacts.rootfs_image) rootfs_copy = instance.workdir / "rootfs.ext4"
shutil.copy2(artifacts.rootfs_image, rootfs_copy)
instance.metadata["rootfs_image"] = str(rootfs_copy)
if self._network_manager.enabled:
network = self._network_manager.allocate(instance.vm_id) network = self._network_manager.allocate(instance.vm_id)
instance.network = network instance.network = network
instance.metadata.update(self._network_manager.to_metadata(network)) instance.metadata.update(self._network_manager.to_metadata(network))
else:
instance.network = None
instance.metadata["network_enabled"] = "false"
except Exception: except Exception:
shutil.rmtree(instance.workdir, ignore_errors=True) shutil.rmtree(instance.workdir, ignore_errors=True)
raise raise
@ -175,6 +183,12 @@ class FirecrackerBackend(VmBackend): # pragma: no cover
instance.metadata["guest_exec_path"] = str(launch_plan.guest_exec_path) instance.metadata["guest_exec_path"] = str(launch_plan.guest_exec_path)
instance.metadata["guest_cid"] = str(launch_plan.guest_cid) instance.metadata["guest_cid"] = str(launch_plan.guest_cid)
instance.metadata["guest_exec_port"] = str(launch_plan.vsock_port) instance.metadata["guest_exec_port"] = str(launch_plan.vsock_port)
instance.metadata["guest_exec_uds_path"] = str(instance.workdir / "vsock.sock")
serial_log_path = instance.workdir / "serial.log"
firecracker_log_path = instance.workdir / "firecracker.log"
firecracker_log_path.touch()
instance.metadata["serial_log_path"] = str(serial_log_path)
instance.metadata["firecracker_log_path"] = str(firecracker_log_path)
proc = subprocess.run( # noqa: S603 proc = subprocess.run( # noqa: S603
[str(self._firecracker_bin), "--version"], [str(self._firecracker_bin), "--version"],
text=True, text=True,
@ -191,14 +205,60 @@ class FirecrackerBackend(VmBackend): # pragma: no cover
if self._runtime_capabilities.reason is not None: if self._runtime_capabilities.reason is not None:
instance.metadata["runtime_reason"] = self._runtime_capabilities.reason instance.metadata["runtime_reason"] = self._runtime_capabilities.reason
return return
instance.metadata["execution_mode"] = "guest_vsock" with serial_log_path.open("w", encoding="utf-8") as serial_fp:
process = subprocess.Popen( # noqa: S603
[
str(self._firecracker_bin),
"--no-api",
"--config-file",
str(launch_plan.config_path),
"--log-path",
str(firecracker_log_path),
"--level",
"Info",
],
stdout=serial_fp,
stderr=subprocess.STDOUT,
text=True,
)
self._processes[instance.vm_id] = process
time.sleep(2)
if process.poll() is not None:
serial_log = serial_log_path.read_text(encoding="utf-8", errors="ignore")
firecracker_log = firecracker_log_path.read_text(encoding="utf-8", errors="ignore")
self._processes.pop(instance.vm_id, None)
raise RuntimeError(
"firecracker microVM exited during startup: "
f"{(serial_log or firecracker_log).strip()}"
)
instance.firecracker_pid = process.pid
instance.metadata["execution_mode"] = (
"guest_vsock" if self._runtime_capabilities.supports_guest_exec else "host_compat"
)
instance.metadata["boot_mode"] = "native" instance.metadata["boot_mode"] = "native"
def exec(self, instance: VmInstance, command: str, timeout_seconds: int) -> VmExecResult: def exec(self, instance: VmInstance, command: str, timeout_seconds: int) -> VmExecResult:
if self._runtime_capabilities.supports_guest_exec: if self._runtime_capabilities.supports_guest_exec:
guest_cid = int(instance.metadata["guest_cid"]) guest_cid = int(instance.metadata["guest_cid"])
port = int(instance.metadata["guest_exec_port"]) port = int(instance.metadata["guest_exec_port"])
response = self._guest_exec_client.exec(guest_cid, port, command, timeout_seconds) uds_path = instance.metadata.get("guest_exec_uds_path")
deadline = time.monotonic() + min(timeout_seconds, 10)
while True:
try:
response = self._guest_exec_client.exec(
guest_cid,
port,
command,
timeout_seconds,
uds_path=uds_path,
)
break
except (OSError, RuntimeError) as exc:
if time.monotonic() >= deadline:
raise RuntimeError(
f"guest exec transport did not become ready: {exc}"
) from exc
time.sleep(0.2)
return VmExecResult( return VmExecResult(
stdout=response.stdout, stdout=response.stdout,
stderr=response.stderr, stderr=response.stderr,
@ -209,9 +269,36 @@ class FirecrackerBackend(VmBackend): # pragma: no cover
return _run_host_command(instance.workdir, command, timeout_seconds) return _run_host_command(instance.workdir, command, timeout_seconds)
def stop(self, instance: VmInstance) -> None: def stop(self, instance: VmInstance) -> None:
del instance process = self._processes.pop(instance.vm_id, None)
if process is not None:
process.terminate()
try:
process.wait(timeout=5)
except subprocess.TimeoutExpired:
process.kill()
process.wait(timeout=5)
instance.firecracker_pid = None
return
if instance.firecracker_pid is None:
return
try:
os.kill(instance.firecracker_pid, signal.SIGTERM)
except ProcessLookupError:
instance.firecracker_pid = None
return
deadline = time.monotonic() + 5
while time.monotonic() < deadline:
try:
os.kill(instance.firecracker_pid, 0)
except ProcessLookupError:
instance.firecracker_pid = None
return
time.sleep(0.1)
os.kill(instance.firecracker_pid, signal.SIGKILL)
instance.firecracker_pid = None
def delete(self, instance: VmInstance) -> None: def delete(self, instance: VmInstance) -> None:
self._processes.pop(instance.vm_id, None)
if instance.network is not None: if instance.network is not None:
self._network_manager.cleanup(instance.network) self._network_manager.cleanup(instance.network)
shutil.rmtree(instance.workdir, ignore_errors=True) shutil.rmtree(instance.workdir, ignore_errors=True)

View file

@ -34,6 +34,7 @@ class NetworkDiagnostics:
nft_binary: str | None nft_binary: str | None
iptables_binary: str | None iptables_binary: str | None
ip_forward_enabled: bool ip_forward_enabled: bool
sudo_non_interactive: bool
class TapNetworkManager: class TapNetworkManager:
@ -44,11 +45,13 @@ class TapNetworkManager:
*, *,
enabled: bool | None = None, enabled: bool | None = None,
runner: CommandRunner | None = None, runner: CommandRunner | None = None,
use_sudo: bool | None = None,
) -> None: ) -> None:
if enabled is None: if enabled is None:
self._enabled = os.environ.get("PYRO_VM_ENABLE_NETWORK") == "1" self._enabled = os.environ.get("PYRO_VM_ENABLE_NETWORK") == "1"
else: else:
self._enabled = enabled self._enabled = enabled
self._use_sudo = self._detect_sudo() if use_sudo is None else use_sudo
self._runner = runner or self._run self._runner = runner or self._run
@staticmethod @staticmethod
@ -67,6 +70,7 @@ class TapNetworkManager:
nft_binary=shutil.which("nft"), nft_binary=shutil.which("nft"),
iptables_binary=shutil.which("iptables"), iptables_binary=shutil.which("iptables"),
ip_forward_enabled=ip_forward, ip_forward_enabled=ip_forward,
sudo_non_interactive=TapNetworkManager._detect_sudo(),
) )
@property @property
@ -90,7 +94,44 @@ class TapNetworkManager:
mac_address=mac_address, mac_address=mac_address,
) )
if self._enabled: if self._enabled:
try:
self._ensure_host_network(config) self._ensure_host_network(config)
except Exception:
table_name = self._nft_table_name(config.vm_id)
self._run_ignore(["nft", "delete", "table", "ip", table_name])
self._run_ignore(
[
"iptables",
"-t",
"nat",
"-D",
"POSTROUTING",
"-s",
config.subnet_cidr,
"-j",
"MASQUERADE",
]
)
self._run_ignore(
["iptables", "-D", "FORWARD", "-i", config.tap_name, "-j", "ACCEPT"]
)
self._run_ignore(
[
"iptables",
"-D",
"FORWARD",
"-o",
config.tap_name,
"-m",
"conntrack",
"--ctstate",
"RELATED,ESTABLISHED",
"-j",
"ACCEPT",
]
)
self._run_ignore(["ip", "link", "del", config.tap_name])
raise
return config return config
def cleanup(self, config: NetworkConfig) -> None: def cleanup(self, config: NetworkConfig) -> None:
@ -98,6 +139,35 @@ class TapNetworkManager:
return return
table_name = self._nft_table_name(config.vm_id) table_name = self._nft_table_name(config.vm_id)
self._run_ignore(["nft", "delete", "table", "ip", table_name]) self._run_ignore(["nft", "delete", "table", "ip", table_name])
self._run_ignore(
[
"iptables",
"-t",
"nat",
"-D",
"POSTROUTING",
"-s",
config.subnet_cidr,
"-j",
"MASQUERADE",
]
)
self._run_ignore(["iptables", "-D", "FORWARD", "-i", config.tap_name, "-j", "ACCEPT"])
self._run_ignore(
[
"iptables",
"-D",
"FORWARD",
"-o",
config.tap_name,
"-m",
"conntrack",
"--ctstate",
"RELATED,ESTABLISHED",
"-j",
"ACCEPT",
]
)
self._run_ignore(["ip", "link", "del", config.tap_name]) self._run_ignore(["ip", "link", "del", config.tap_name])
def to_metadata(self, config: NetworkConfig) -> dict[str, str]: def to_metadata(self, config: NetworkConfig) -> dict[str, str]:
@ -129,15 +199,33 @@ class TapNetworkManager:
raise RuntimeError("/dev/net/tun is not available on this host") raise RuntimeError("/dev/net/tun is not available on this host")
if diagnostics.ip_binary is None: if diagnostics.ip_binary is None:
raise RuntimeError("`ip` binary is required for TAP setup") raise RuntimeError("`ip` binary is required for TAP setup")
if diagnostics.nft_binary is None: if diagnostics.nft_binary is None and diagnostics.iptables_binary is None:
raise RuntimeError("`nft` binary is required for outbound NAT setup") raise RuntimeError("`nft` or `iptables` is required for outbound NAT setup")
if not diagnostics.ip_forward_enabled: if not diagnostics.ip_forward_enabled:
raise RuntimeError("IPv4 forwarding is disabled on this host") raise RuntimeError("IPv4 forwarding is disabled on this host")
self._runner(["ip", "tuntap", "add", "dev", config.tap_name, "mode", "tap"]) self._runner(
[
"ip",
"tuntap",
"add",
"dev",
config.tap_name,
"mode",
"tap",
"user",
str(os.getuid()),
]
)
self._runner(["ip", "addr", "add", f"{config.gateway_ip}/24", "dev", config.tap_name]) self._runner(["ip", "addr", "add", f"{config.gateway_ip}/24", "dev", config.tap_name])
self._runner(["ip", "link", "set", config.tap_name, "up"]) self._runner(["ip", "link", "set", config.tap_name, "up"])
if diagnostics.nft_binary is not None:
self._ensure_nft_network(config)
return
self._ensure_iptables_network(config)
def _ensure_nft_network(self, config: NetworkConfig) -> None:
table_name = self._nft_table_name(config.vm_id) table_name = self._nft_table_name(config.vm_id)
self._runner(["nft", "add", "table", "ip", table_name]) self._runner(["nft", "add", "table", "ip", table_name])
self._runner( self._runner(
@ -159,7 +247,30 @@ class TapNetworkManager:
"}", "}",
] ]
) )
self._runner([ self._runner(
[
"nft",
"add",
"chain",
"ip",
table_name,
"forward",
"{",
"type",
"filter",
"hook",
"forward",
"priority",
"filter",
";",
"policy",
"accept",
";",
"}",
]
)
self._runner(
[
"nft", "nft",
"add", "add",
"rule", "rule",
@ -170,7 +281,68 @@ class TapNetworkManager:
"saddr", "saddr",
config.subnet_cidr, config.subnet_cidr,
"masquerade", "masquerade",
]) ]
)
self._runner(
[
"nft",
"add",
"rule",
"ip",
table_name,
"forward",
"iifname",
config.tap_name,
"accept",
]
)
self._runner(
[
"nft",
"add",
"rule",
"ip",
table_name,
"forward",
"oifname",
config.tap_name,
"ct",
"state",
"related,established",
"accept",
]
)
def _ensure_iptables_network(self, config: NetworkConfig) -> None:
self._runner(
[
"iptables",
"-t",
"nat",
"-A",
"POSTROUTING",
"-s",
config.subnet_cidr,
"-j",
"MASQUERADE",
]
)
self._runner(["iptables", "-A", "FORWARD", "-i", config.tap_name, "-j", "ACCEPT"])
self._runner(
[
"iptables",
"-A",
"FORWARD",
"-o",
config.tap_name,
"-m",
"conntrack",
"--ctstate",
"RELATED,ESTABLISHED",
"-j",
"ACCEPT",
]
)
def _run_ignore(self, command: list[str]) -> None: def _run_ignore(self, command: list[str]) -> None:
try: try:
@ -183,9 +355,30 @@ class TapNetworkManager:
return f"pyro_{vm_id[:12]}" return f"pyro_{vm_id[:12]}"
@staticmethod @staticmethod
def _run(command: list[str]) -> subprocess.CompletedProcess[str]: def _detect_sudo() -> bool:
completed = subprocess.run(command, text=True, capture_output=True, check=False) if os.geteuid() == 0:
return False
if shutil.which("sudo") is None:
return False
completed = subprocess.run(
["sudo", "-n", "true"],
text=True,
capture_output=True,
check=False,
)
return completed.returncode == 0
def _run(self, command: list[str]) -> subprocess.CompletedProcess[str]:
effective_command = command
if self._use_sudo:
effective_command = ["sudo", "-n", *command]
completed = subprocess.run(
effective_command,
text=True,
capture_output=True,
check=False,
)
if completed.returncode != 0: if completed.returncode != 0:
stderr = completed.stderr.strip() or completed.stdout.strip() stderr = completed.stderr.strip() or completed.stdout.strip()
raise RuntimeError(f"command {' '.join(command)!r} failed: {stderr}") raise RuntimeError(f"command {' '.join(effective_command)!r} failed: {stderr}")
return completed return completed

View file

@ -85,9 +85,9 @@ def test_doctor_report_has_runtime_fields() -> None:
assert "tun_available" in networking assert "tun_available" in networking
def test_runtime_capabilities_reports_shim_bundle() -> None: def test_runtime_capabilities_reports_real_bundle_flags() -> None:
paths = resolve_runtime_paths() paths = resolve_runtime_paths()
capabilities = runtime_capabilities(paths) capabilities = runtime_capabilities(paths)
assert capabilities.supports_vm_boot is False assert capabilities.supports_vm_boot is True
assert capabilities.supports_guest_exec is False assert capabilities.supports_guest_exec is True
assert capabilities.supports_guest_network is False assert capabilities.supports_guest_network is True

View file

@ -0,0 +1,73 @@
from __future__ import annotations
import pytest
import pyro_mcp.runtime_network_check as runtime_network_check
from pyro_mcp.vm_network import TapNetworkManager
def test_network_check_uses_network_enabled_manager(monkeypatch: pytest.MonkeyPatch) -> None:
observed: dict[str, object] = {}
class StubManager:
def __init__(self, **kwargs: object) -> None:
observed.update(kwargs)
def create_vm(self, **kwargs: object) -> dict[str, object]:
observed["create_kwargs"] = kwargs
return {"vm_id": "vm123"}
def start_vm(self, vm_id: str) -> dict[str, object]:
observed["started_vm_id"] = vm_id
return {"state": "started"}
def status_vm(self, vm_id: str) -> dict[str, object]:
observed["status_vm_id"] = vm_id
return {"network_enabled": True}
def exec_vm(self, vm_id: str, *, command: str, timeout_seconds: int) -> dict[str, object]:
observed["exec_vm_id"] = vm_id
observed["command"] = command
observed["timeout_seconds"] = timeout_seconds
return {
"execution_mode": "guest_vsock",
"exit_code": 0,
"stdout": "true\n",
"stderr": "",
"cleanup": {"deleted": True, "vm_id": vm_id, "reason": "post_exec_cleanup"},
}
monkeypatch.setattr(runtime_network_check, "VmManager", StubManager)
result = runtime_network_check.run_network_check()
network_manager = observed["network_manager"]
assert isinstance(network_manager, TapNetworkManager)
assert network_manager.enabled is True
assert observed["command"] == runtime_network_check.NETWORK_CHECK_COMMAND
assert observed["timeout_seconds"] == 120
assert result.execution_mode == "guest_vsock"
assert result.network_enabled is True
assert result.exit_code == 0
def test_network_check_main_fails_on_unsuccessful_command(
monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
) -> None:
monkeypatch.setattr(
runtime_network_check,
"run_network_check",
lambda **kwargs: runtime_network_check.NetworkCheckResult(
vm_id="vm123",
execution_mode="guest_vsock",
network_enabled=True,
exit_code=1,
stdout="",
stderr="curl failed",
cleanup={"deleted": True},
),
)
with pytest.raises(SystemExit, match="1"):
runtime_network_check.main()
output = capsys.readouterr().out
assert "[network] result=failure" in output
assert "[network] stderr=curl failed" in output

View file

@ -8,9 +8,13 @@ from pyro_mcp.vm_guest import VsockExecClient
class StubSocket: class StubSocket:
def __init__(self, response: bytes) -> None: def __init__(self, responses: list[bytes] | bytes) -> None:
self.response = response if isinstance(responses, bytes):
self.connected: tuple[int, int] | None = None self.responses = [responses]
else:
self.responses = responses
self._buffer = b""
self.connected: object | None = None
self.sent = b"" self.sent = b""
self.timeout: int | None = None self.timeout: int | None = None
self.closed = False self.closed = False
@ -25,10 +29,12 @@ class StubSocket:
self.sent += data self.sent += data
def recv(self, size: int) -> bytes: def recv(self, size: int) -> bytes:
del size if not self._buffer and self.responses:
if self.response == b"": self._buffer = self.responses.pop(0)
if not self._buffer:
return b"" return b""
data, self.response = self.response, b"" data = self._buffer[:size]
self._buffer = self._buffer[size:]
return data return data
def close(self) -> None: def close(self) -> None:
@ -62,3 +68,36 @@ def test_vsock_exec_client_rejects_bad_json(monkeypatch: pytest.MonkeyPatch) ->
client = VsockExecClient(socket_factory=lambda family, sock_type: stub) client = VsockExecClient(socket_factory=lambda family, sock_type: stub)
with pytest.raises(RuntimeError, match="JSON object"): with pytest.raises(RuntimeError, match="JSON object"):
client.exec(1234, 5005, "echo ok", 30) client.exec(1234, 5005, "echo ok", 30)
def test_vsock_exec_client_uses_unix_bridge_when_vsock_is_unavailable(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.delattr(socket, "AF_VSOCK", raising=False)
stub = StubSocket(
[
b"OK 1073746829\n",
b'{"stdout":"ready\\n","stderr":"","exit_code":0,"duration_ms":5}',
]
)
def socket_factory(family: int, sock_type: int) -> StubSocket:
assert family == socket.AF_UNIX
assert sock_type == socket.SOCK_STREAM
return stub
client = VsockExecClient(socket_factory=socket_factory)
response = client.exec(1234, 5005, "echo ready", 30, uds_path="/tmp/vsock.sock")
assert response.stdout == "ready\n"
assert stub.connected == "/tmp/vsock.sock"
assert stub.sent.startswith(b"CONNECT 5005\n")
def test_vsock_exec_client_requires_transport_when_vsock_is_unavailable(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.delattr(socket, "AF_VSOCK", raising=False)
client = VsockExecClient(socket_factory=lambda family, sock_type: StubSocket(b""))
with pytest.raises(RuntimeError, match="not supported"):
client.exec(1234, 5005, "echo ok", 30)