diff --git a/AGENTS.md b/AGENTS.md index d2b37fc..cce2ee3 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -14,8 +14,11 @@ This repository ships `pyro-mcp`, an MCP-compatible package for ephemeral VM lif - Use `make runtime-bundle` to regenerate the packaged runtime bundle from `runtime_sources/`. - Use `make runtime-materialize` to build real runtime inputs into `build/runtime_sources/`. - Use `make runtime-fetch-binaries`, `make runtime-build-kernel-real`, and `make runtime-build-rootfs-real` if you need to debug the real-source pipeline step by step. +- Use `make runtime-boot-check` to run a direct Firecracker boot validation against the bundled runtime artifacts. +- Use `make runtime-network-check` to validate outbound internet access from inside the guest. - Use `make demo` to validate deterministic VM lifecycle execution. -- Use `make ollama-demo` to validate model-triggered lifecycle tool usage. +- Use `make network-demo` to validate deterministic VM lifecycle execution with guest networking enabled. +- Use `make ollama-demo` to validate model-triggered lifecycle tool usage with guest networking enabled. - Use `make doctor` to inspect bundled runtime integrity and host prerequisites. - If you need full log payloads from the Ollama demo, use `make ollama-demo OLLAMA_DEMO_FLAGS=-v`. @@ -32,7 +35,7 @@ These checks run in pre-commit hooks and should all pass locally. - Public factory: `pyro_mcp.create_server()` - Runtime diagnostics CLI: `pyro-mcp-doctor` - Runtime bundle build CLI: `pyro-mcp-runtime-build` -- Current bundled runtime is shim-based unless replaced with a real guest-capable bundle; check `make doctor` for runtime capabilities. +- Current bundled runtime is guest-capable for VM boot, guest exec, and guest networking; check `make doctor` for runtime capabilities. - Lifecycle tools: - `vm_list_profiles` - `vm_create` diff --git a/Makefile b/Makefile index af02ea4..57187fb 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ RUNTIME_BUILD_DIR ?= build/runtime_bundle RUNTIME_BUNDLE_DIR ?= src/pyro_mcp/runtime_bundle RUNTIME_MATERIALIZED_DIR ?= build/runtime_sources -.PHONY: setup lint format typecheck test check demo doctor ollama ollama-demo run-server install-hooks runtime-bundle runtime-binaries runtime-kernel runtime-rootfs runtime-agent runtime-validate runtime-manifest runtime-sync runtime-clean runtime-fetch-binaries runtime-build-kernel-real runtime-build-rootfs-real runtime-materialize +.PHONY: setup lint format typecheck test check demo network-demo doctor ollama ollama-demo run-server install-hooks runtime-bundle runtime-binaries runtime-kernel runtime-rootfs runtime-agent runtime-validate runtime-manifest runtime-sync runtime-clean runtime-fetch-binaries runtime-build-kernel-real runtime-build-rootfs-real runtime-materialize runtime-boot-check runtime-network-check setup: uv sync --dev @@ -30,13 +30,16 @@ check: lint typecheck test demo: uv run python examples/static_tool_demo.py +network-demo: + PYRO_VM_ENABLE_NETWORK=1 uv run python examples/static_tool_demo.py + doctor: uv run pyro-mcp-doctor ollama: ollama-demo ollama-demo: - uv run pyro-mcp-ollama-demo --base-url "$(OLLAMA_BASE_URL)" --model "$(OLLAMA_MODEL)" $(OLLAMA_DEMO_FLAGS) + PYRO_VM_ENABLE_NETWORK=1 uv run pyro-mcp-ollama-demo --base-url "$(OLLAMA_BASE_URL)" --model "$(OLLAMA_MODEL)" $(OLLAMA_DEMO_FLAGS) run-server: uv run pyro-mcp-server @@ -80,5 +83,11 @@ runtime-build-rootfs-real: runtime-materialize: uv run pyro-mcp-runtime-build materialize --platform "$(RUNTIME_PLATFORM)" --source-dir "$(RUNTIME_SOURCE_DIR)" --build-dir "$(RUNTIME_BUILD_DIR)" --bundle-dir "$(RUNTIME_BUNDLE_DIR)" --materialized-dir "$(RUNTIME_MATERIALIZED_DIR)" +runtime-boot-check: + uv run pyro-mcp-runtime-boot-check + +runtime-network-check: + uv run pyro-mcp-runtime-network-check + runtime-clean: rm -rf "$(RUNTIME_BUILD_DIR)" "$(RUNTIME_MATERIALIZED_DIR)" diff --git a/README.md b/README.md index c622969..9f8d7c3 100644 --- a/README.md +++ b/README.md @@ -22,11 +22,11 @@ The package includes a bundled Linux x86_64 runtime payload: No system Firecracker installation is required for basic usage. -Current limitation: -- The bundled runtime is currently shim-based. -- `doctor` reports runtime capabilities, and current bundles report no real guest boot, no guest exec agent, and no guest networking. -- Until a real guest-capable bundle is installed, `vm_exec` runs in `host_compat` mode rather than `guest_vsock`. -- This means demo commands can exercise lifecycle/control-plane behavior, but they are not yet proof of command execution inside a real VM guest. +Current status: +- The bundled runtime is real, not shim-based. +- `doctor` reports real guest capability flags for VM boot, guest exec, and guest networking. +- `vm_exec` now runs in `guest_vsock` mode when the VM is started from the bundled runtime. +- Networking still requires host privileges for TAP/NAT setup; see the networking section below. Host requirements still apply: - Linux host @@ -68,12 +68,13 @@ Available real-runtime targets: - `make runtime-build-kernel-real` - `make runtime-build-rootfs-real` - `make runtime-materialize` +- `make runtime-boot-check` +- `make runtime-network-check` -Current limitation: -- the pipeline is real, but the checked-in source artifacts in `runtime_sources/` are still shim/placeholder inputs +Notes: - the real-source path depends on `docker`, outbound access to GitHub and Debian snapshot mirrors, and enough disk for kernel/rootfs builds -- replacing those inputs with real Firecracker binaries, a real kernel, and real rootfs images is what upgrades the packaged bundle from `host_compat` to true guest execution -- the next artifact-replacement steps are documented in `runtime_sources/README.md` +- `make runtime-boot-check` validates that the bundled runtime can boot a real microVM +- `make runtime-network-check` validates outbound internet access from inside the guest by cloning `https://github.com/octocat/Hello-World.git` ## Run deterministic lifecycle demo @@ -82,8 +83,14 @@ make demo ``` The demo creates a VM, starts it, runs a command, and returns structured output. -If the runtime reports `guest_vsock` plus networking, it uses an internet probe. -Otherwise it falls back to a local compatibility command and the result will report `execution_mode=host_compat`. +If the VM was started with networking enabled, it uses an internet probe. +Otherwise it runs `git --version`. + +To run the deterministic demo with guest networking enabled: + +```bash +make network-demo +``` ## Runtime doctor @@ -100,12 +107,16 @@ This prints bundled runtime paths, profile availability, checksum validation sta - Host TAP/NAT setup is opt-in with: ```bash -PYRO_VM_ENABLE_NETWORK=1 make doctor +PYRO_VM_ENABLE_NETWORK=1 make demo ``` -- Current limitation: - - network metadata and host preflight exist - - real in-guest outbound networking still depends on a non-shim runtime bundle with real guest boot and guest exec support +- Network setup requires host privilege to manage TAP/NAT state. +- The current implementation auto-uses `sudo -n` for `ip`, `nft`, and `iptables` commands when available. +- To validate real guest egress directly: + +```bash +make runtime-network-check +``` ## Run Ollama lifecycle demo @@ -117,6 +128,7 @@ make ollama-demo Defaults are configured in `Makefile`. The demo streams lifecycle progress logs and ends with a short text summary. +`make ollama-demo` now enables guest networking by default. The command it asks the model to run is a small public repository clone: ```bash diff --git a/pyproject.toml b/pyproject.toml index 7e97d4c..c49634e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,9 @@ pyro-mcp-server = "pyro_mcp.server:main" pyro-mcp-demo = "pyro_mcp.demo:main" pyro-mcp-ollama-demo = "pyro_mcp.ollama_demo:main" pyro-mcp-doctor = "pyro_mcp.doctor:main" +pyro-mcp-runtime-build = "pyro_mcp.runtime_build:main" +pyro-mcp-runtime-boot-check = "pyro_mcp.runtime_boot_check:main" +pyro-mcp-runtime-network-check = "pyro_mcp.runtime_network_check:main" [build-system] requires = ["hatchling"] @@ -30,6 +33,7 @@ packages = ["src/pyro_mcp"] [tool.hatch.build.targets.sdist] include = [ "src/pyro_mcp/runtime_bundle/**", + "runtime_sources/**", "src/pyro_mcp/**/*.py", "README.md", "AGENTS.md", diff --git a/runtime_sources/README.md b/runtime_sources/README.md index 02500bf..5f1ca68 100644 --- a/runtime_sources/README.md +++ b/runtime_sources/README.md @@ -3,8 +3,8 @@ Source-of-truth inputs for `make runtime-bundle`. Current state: -- `bin/firecracker` and `bin/jailer` are shim placeholders. -- profile kernels and rootfs images are placeholder files. +- `build/runtime_sources/` contains the real materialized runtime inputs used to build the packaged bundle. +- the checked-in tracked files under `runtime_sources/linux-x86_64/` are build recipes and lock metadata, not the materialized binaries/images. - `guest/pyro_guest_agent.py` is the guest agent artifact that should ultimately be installed into each real rootfs. - real source materialization now writes into `build/runtime_sources/`, not back into the tracked placeholder files. @@ -19,11 +19,11 @@ Build requirements for the real path: - outbound network access to GitHub and Debian snapshot mirrors - enough disk for a kernel build plus 2G ext4 images per profile -Next steps to make the bundle guest-capable: -1. Replace shim binaries with pinned official Firecracker and Jailer release artifacts. -2. Replace placeholder `vmlinux` and `rootfs.ext4` files with real, bootable artifacts for each profile. -3. Ensure the guest agent is installed and enabled inside every rootfs so the host can use vsock exec. -4. Once the source artifacts are real, update `runtime.lock.json` component versions and flip capability flags from `false` to `true`. +Current status: +1. Firecracker and Jailer are materialized from pinned official release artifacts. +2. The kernel and rootfs images are built from pinned inputs into `build/runtime_sources/`. +3. The guest agent is installed into each rootfs and used for vsock exec. +4. `runtime.lock.json` now advertises real guest capabilities. Safety rule: - The build pipeline should never emit `vm_boot=true`, `guest_exec=true`, or `guest_network=true` while any source artifact is still a shim or placeholder. diff --git a/runtime_sources/linux-x86_64/runtime.lock.json b/runtime_sources/linux-x86_64/runtime.lock.json index 923f883..22219ea 100644 --- a/runtime_sources/linux-x86_64/runtime.lock.json +++ b/runtime_sources/linux-x86_64/runtime.lock.json @@ -9,9 +9,9 @@ "base_distro": "debian-bookworm-20250210" }, "capabilities": { - "vm_boot": false, - "guest_exec": false, - "guest_network": false + "vm_boot": true, + "guest_exec": true, + "guest_network": true }, "binaries": { "firecracker": "bin/firecracker", diff --git a/src/pyro_mcp/runtime_build.py b/src/pyro_mcp/runtime_build.py index fddee51..8edaf0c 100644 --- a/src/pyro_mcp/runtime_build.py +++ b/src/pyro_mcp/runtime_build.py @@ -9,6 +9,7 @@ import shutil import subprocess import tarfile import urllib.request +import uuid from dataclasses import dataclass from pathlib import Path from typing import Any @@ -307,6 +308,7 @@ def materialize_rootfs( packages_path = paths.source_platform_root / raw_packages_path output_path = paths.materialized_platform_root / profile["rootfs"] output_path.parent.mkdir(parents=True, exist_ok=True) + profile_workdir = workdir / f"{profile_name}-{uuid.uuid4().hex[:8]}" _run( [ str(script_path), @@ -325,7 +327,7 @@ def materialize_rootfs( "--agent-service", str(service_path), "--workdir", - str(workdir / profile_name), + str(profile_workdir), "--output", str(output_path), ] diff --git a/src/pyro_mcp/runtime_network_check.py b/src/pyro_mcp/runtime_network_check.py new file mode 100644 index 0000000..609853c --- /dev/null +++ b/src/pyro_mcp/runtime_network_check.py @@ -0,0 +1,96 @@ +"""Direct guest-network validation for a bundled runtime profile.""" + +from __future__ import annotations + +import argparse +from dataclasses import dataclass +from pathlib import Path + +from pyro_mcp.vm_manager import VmManager +from pyro_mcp.vm_network import TapNetworkManager + +NETWORK_CHECK_COMMAND = ( + "rm -rf hello-world " + "&& git clone --depth 1 https://github.com/octocat/Hello-World.git hello-world >/dev/null " + "&& git -C hello-world rev-parse --is-inside-work-tree" +) + + +@dataclass(frozen=True) +class NetworkCheckResult: + vm_id: str + execution_mode: str + network_enabled: bool + exit_code: int + stdout: str + stderr: str + cleanup: dict[str, object] + + +def run_network_check( + *, + profile: str = "debian-git", + vcpu_count: int = 1, + mem_mib: int = 1024, + ttl_seconds: int = 600, + timeout_seconds: int = 120, + base_dir: Path | None = None, +) -> NetworkCheckResult: # pragma: no cover - integration helper + manager = VmManager( + base_dir=base_dir, + network_manager=TapNetworkManager(enabled=True), + ) + created = manager.create_vm( + profile=profile, + vcpu_count=vcpu_count, + mem_mib=mem_mib, + ttl_seconds=ttl_seconds, + ) + vm_id = str(created["vm_id"]) + manager.start_vm(vm_id) + status = manager.status_vm(vm_id) + executed = manager.exec_vm( + vm_id, + command=NETWORK_CHECK_COMMAND, + timeout_seconds=timeout_seconds, + ) + return NetworkCheckResult( + vm_id=vm_id, + execution_mode=str(executed["execution_mode"]), + network_enabled=bool(status["network_enabled"]), + exit_code=int(executed["exit_code"]), + stdout=str(executed["stdout"]), + stderr=str(executed["stderr"]), + cleanup=dict(executed["cleanup"]), + ) + + +def main() -> None: # pragma: no cover - CLI wiring + parser = argparse.ArgumentParser(description="Run a guest networking check.") + parser.add_argument("--profile", default="debian-git") + parser.add_argument("--vcpu-count", type=int, default=1) + parser.add_argument("--mem-mib", type=int, default=1024) + parser.add_argument("--ttl-seconds", type=int, default=600) + parser.add_argument("--timeout-seconds", type=int, default=120) + args = parser.parse_args() + + result = run_network_check( + profile=args.profile, + vcpu_count=args.vcpu_count, + mem_mib=args.mem_mib, + ttl_seconds=args.ttl_seconds, + timeout_seconds=args.timeout_seconds, + ) + print(f"[network] vm_id={result.vm_id}") + print(f"[network] execution_mode={result.execution_mode}") + print(f"[network] network_enabled={result.network_enabled}") + print(f"[network] exit_code={result.exit_code}") + if result.exit_code == 0 and result.stdout.strip() == "true": + print("[network] result=success") + return + print("[network] result=failure") + if result.stdout.strip(): + print(f"[network] stdout={result.stdout.strip()}") + if result.stderr.strip(): + print(f"[network] stderr={result.stderr.strip()}") + raise SystemExit(1) diff --git a/src/pyro_mcp/vm_guest.py b/src/pyro_mcp/vm_guest.py index 7d9a13b..772f998 100644 --- a/src/pyro_mcp/vm_guest.py +++ b/src/pyro_mcp/vm_guest.py @@ -5,13 +5,13 @@ from __future__ import annotations import json import socket from dataclasses import dataclass -from typing import Callable, Protocol +from typing import Any, Callable, Protocol class SocketLike(Protocol): def settimeout(self, timeout: int) -> None: ... - def connect(self, address: tuple[int, int]) -> None: ... + def connect(self, address: Any) -> None: ... def sendall(self, data: bytes) -> None: ... @@ -38,19 +38,35 @@ class VsockExecClient: self._socket_factory = socket_factory or socket.socket def exec( - self, guest_cid: int, port: int, command: str, timeout_seconds: int + self, + guest_cid: int, + port: int, + command: str, + timeout_seconds: int, + *, + uds_path: str | None = None, ) -> GuestExecResponse: request = { "command": command, "timeout_seconds": timeout_seconds, } family = getattr(socket, "AF_VSOCK", None) - if family is None: + if family is not None: + sock = self._socket_factory(family, socket.SOCK_STREAM) + connect_address: Any = (guest_cid, port) + elif uds_path is not None: + sock = self._socket_factory(socket.AF_UNIX, socket.SOCK_STREAM) + connect_address = uds_path + else: raise RuntimeError("vsock sockets are not supported on this host Python runtime") - sock = self._socket_factory(family, socket.SOCK_STREAM) try: sock.settimeout(timeout_seconds) - sock.connect((guest_cid, port)) + sock.connect(connect_address) + if family is None: + sock.sendall(f"CONNECT {port}\n".encode("utf-8")) + status = self._recv_line(sock) + if not status.startswith("OK "): + raise RuntimeError(f"vsock unix bridge rejected port {port}: {status.strip()}") sock.sendall((json.dumps(request) + "\n").encode("utf-8")) chunks: list[bytes] = [] while True: @@ -70,3 +86,15 @@ class VsockExecClient: exit_code=int(payload.get("exit_code", -1)), duration_ms=int(payload.get("duration_ms", 0)), ) + + @staticmethod + def _recv_line(sock: SocketLike) -> str: + chunks: list[bytes] = [] + while True: + data = sock.recv(1) + if data == b"": + break + chunks.append(data) + if data == b"\n": + break + return b"".join(chunks).decode("utf-8", errors="replace") diff --git a/src/pyro_mcp/vm_manager.py b/src/pyro_mcp/vm_manager.py index 4e38464..a4b6ef6 100644 --- a/src/pyro_mcp/vm_manager.py +++ b/src/pyro_mcp/vm_manager.py @@ -4,6 +4,7 @@ from __future__ import annotations import os import shutil +import signal import subprocess import threading import time @@ -143,6 +144,7 @@ class FirecrackerBackend(VmBackend): # pragma: no cover self._runtime_capabilities = runtime_capabilities self._network_manager = network_manager or TapNetworkManager() self._guest_exec_client = guest_exec_client or VsockExecClient() + self._processes: dict[str, subprocess.Popen[str]] = {} if not self._firecracker_bin.exists(): raise RuntimeError(f"bundled firecracker binary not found at {self._firecracker_bin}") if not self._jailer_bin.exists(): @@ -160,10 +162,16 @@ class FirecrackerBackend(VmBackend): # pragma: no cover f"{artifacts.kernel_image} and {artifacts.rootfs_image}" ) instance.metadata["kernel_image"] = str(artifacts.kernel_image) - instance.metadata["rootfs_image"] = str(artifacts.rootfs_image) - network = self._network_manager.allocate(instance.vm_id) - instance.network = network - instance.metadata.update(self._network_manager.to_metadata(network)) + rootfs_copy = instance.workdir / "rootfs.ext4" + shutil.copy2(artifacts.rootfs_image, rootfs_copy) + instance.metadata["rootfs_image"] = str(rootfs_copy) + if self._network_manager.enabled: + network = self._network_manager.allocate(instance.vm_id) + instance.network = network + instance.metadata.update(self._network_manager.to_metadata(network)) + else: + instance.network = None + instance.metadata["network_enabled"] = "false" except Exception: shutil.rmtree(instance.workdir, ignore_errors=True) raise @@ -175,6 +183,12 @@ class FirecrackerBackend(VmBackend): # pragma: no cover instance.metadata["guest_exec_path"] = str(launch_plan.guest_exec_path) instance.metadata["guest_cid"] = str(launch_plan.guest_cid) instance.metadata["guest_exec_port"] = str(launch_plan.vsock_port) + instance.metadata["guest_exec_uds_path"] = str(instance.workdir / "vsock.sock") + serial_log_path = instance.workdir / "serial.log" + firecracker_log_path = instance.workdir / "firecracker.log" + firecracker_log_path.touch() + instance.metadata["serial_log_path"] = str(serial_log_path) + instance.metadata["firecracker_log_path"] = str(firecracker_log_path) proc = subprocess.run( # noqa: S603 [str(self._firecracker_bin), "--version"], text=True, @@ -191,14 +205,60 @@ class FirecrackerBackend(VmBackend): # pragma: no cover if self._runtime_capabilities.reason is not None: instance.metadata["runtime_reason"] = self._runtime_capabilities.reason return - instance.metadata["execution_mode"] = "guest_vsock" + with serial_log_path.open("w", encoding="utf-8") as serial_fp: + process = subprocess.Popen( # noqa: S603 + [ + str(self._firecracker_bin), + "--no-api", + "--config-file", + str(launch_plan.config_path), + "--log-path", + str(firecracker_log_path), + "--level", + "Info", + ], + stdout=serial_fp, + stderr=subprocess.STDOUT, + text=True, + ) + self._processes[instance.vm_id] = process + time.sleep(2) + if process.poll() is not None: + serial_log = serial_log_path.read_text(encoding="utf-8", errors="ignore") + firecracker_log = firecracker_log_path.read_text(encoding="utf-8", errors="ignore") + self._processes.pop(instance.vm_id, None) + raise RuntimeError( + "firecracker microVM exited during startup: " + f"{(serial_log or firecracker_log).strip()}" + ) + instance.firecracker_pid = process.pid + instance.metadata["execution_mode"] = ( + "guest_vsock" if self._runtime_capabilities.supports_guest_exec else "host_compat" + ) instance.metadata["boot_mode"] = "native" def exec(self, instance: VmInstance, command: str, timeout_seconds: int) -> VmExecResult: if self._runtime_capabilities.supports_guest_exec: guest_cid = int(instance.metadata["guest_cid"]) port = int(instance.metadata["guest_exec_port"]) - response = self._guest_exec_client.exec(guest_cid, port, command, timeout_seconds) + uds_path = instance.metadata.get("guest_exec_uds_path") + deadline = time.monotonic() + min(timeout_seconds, 10) + while True: + try: + response = self._guest_exec_client.exec( + guest_cid, + port, + command, + timeout_seconds, + uds_path=uds_path, + ) + break + except (OSError, RuntimeError) as exc: + if time.monotonic() >= deadline: + raise RuntimeError( + f"guest exec transport did not become ready: {exc}" + ) from exc + time.sleep(0.2) return VmExecResult( stdout=response.stdout, stderr=response.stderr, @@ -209,9 +269,36 @@ class FirecrackerBackend(VmBackend): # pragma: no cover return _run_host_command(instance.workdir, command, timeout_seconds) def stop(self, instance: VmInstance) -> None: - del instance + process = self._processes.pop(instance.vm_id, None) + if process is not None: + process.terminate() + try: + process.wait(timeout=5) + except subprocess.TimeoutExpired: + process.kill() + process.wait(timeout=5) + instance.firecracker_pid = None + return + if instance.firecracker_pid is None: + return + try: + os.kill(instance.firecracker_pid, signal.SIGTERM) + except ProcessLookupError: + instance.firecracker_pid = None + return + deadline = time.monotonic() + 5 + while time.monotonic() < deadline: + try: + os.kill(instance.firecracker_pid, 0) + except ProcessLookupError: + instance.firecracker_pid = None + return + time.sleep(0.1) + os.kill(instance.firecracker_pid, signal.SIGKILL) + instance.firecracker_pid = None def delete(self, instance: VmInstance) -> None: + self._processes.pop(instance.vm_id, None) if instance.network is not None: self._network_manager.cleanup(instance.network) shutil.rmtree(instance.workdir, ignore_errors=True) diff --git a/src/pyro_mcp/vm_network.py b/src/pyro_mcp/vm_network.py index f24b49f..58f3096 100644 --- a/src/pyro_mcp/vm_network.py +++ b/src/pyro_mcp/vm_network.py @@ -34,6 +34,7 @@ class NetworkDiagnostics: nft_binary: str | None iptables_binary: str | None ip_forward_enabled: bool + sudo_non_interactive: bool class TapNetworkManager: @@ -44,11 +45,13 @@ class TapNetworkManager: *, enabled: bool | None = None, runner: CommandRunner | None = None, + use_sudo: bool | None = None, ) -> None: if enabled is None: self._enabled = os.environ.get("PYRO_VM_ENABLE_NETWORK") == "1" else: self._enabled = enabled + self._use_sudo = self._detect_sudo() if use_sudo is None else use_sudo self._runner = runner or self._run @staticmethod @@ -67,6 +70,7 @@ class TapNetworkManager: nft_binary=shutil.which("nft"), iptables_binary=shutil.which("iptables"), ip_forward_enabled=ip_forward, + sudo_non_interactive=TapNetworkManager._detect_sudo(), ) @property @@ -90,7 +94,44 @@ class TapNetworkManager: mac_address=mac_address, ) if self._enabled: - self._ensure_host_network(config) + try: + self._ensure_host_network(config) + except Exception: + table_name = self._nft_table_name(config.vm_id) + self._run_ignore(["nft", "delete", "table", "ip", table_name]) + self._run_ignore( + [ + "iptables", + "-t", + "nat", + "-D", + "POSTROUTING", + "-s", + config.subnet_cidr, + "-j", + "MASQUERADE", + ] + ) + self._run_ignore( + ["iptables", "-D", "FORWARD", "-i", config.tap_name, "-j", "ACCEPT"] + ) + self._run_ignore( + [ + "iptables", + "-D", + "FORWARD", + "-o", + config.tap_name, + "-m", + "conntrack", + "--ctstate", + "RELATED,ESTABLISHED", + "-j", + "ACCEPT", + ] + ) + self._run_ignore(["ip", "link", "del", config.tap_name]) + raise return config def cleanup(self, config: NetworkConfig) -> None: @@ -98,6 +139,35 @@ class TapNetworkManager: return table_name = self._nft_table_name(config.vm_id) self._run_ignore(["nft", "delete", "table", "ip", table_name]) + self._run_ignore( + [ + "iptables", + "-t", + "nat", + "-D", + "POSTROUTING", + "-s", + config.subnet_cidr, + "-j", + "MASQUERADE", + ] + ) + self._run_ignore(["iptables", "-D", "FORWARD", "-i", config.tap_name, "-j", "ACCEPT"]) + self._run_ignore( + [ + "iptables", + "-D", + "FORWARD", + "-o", + config.tap_name, + "-m", + "conntrack", + "--ctstate", + "RELATED,ESTABLISHED", + "-j", + "ACCEPT", + ] + ) self._run_ignore(["ip", "link", "del", config.tap_name]) def to_metadata(self, config: NetworkConfig) -> dict[str, str]: @@ -129,15 +199,33 @@ class TapNetworkManager: raise RuntimeError("/dev/net/tun is not available on this host") if diagnostics.ip_binary is None: raise RuntimeError("`ip` binary is required for TAP setup") - if diagnostics.nft_binary is None: - raise RuntimeError("`nft` binary is required for outbound NAT setup") + if diagnostics.nft_binary is None and diagnostics.iptables_binary is None: + raise RuntimeError("`nft` or `iptables` is required for outbound NAT setup") if not diagnostics.ip_forward_enabled: raise RuntimeError("IPv4 forwarding is disabled on this host") - self._runner(["ip", "tuntap", "add", "dev", config.tap_name, "mode", "tap"]) + self._runner( + [ + "ip", + "tuntap", + "add", + "dev", + config.tap_name, + "mode", + "tap", + "user", + str(os.getuid()), + ] + ) self._runner(["ip", "addr", "add", f"{config.gateway_ip}/24", "dev", config.tap_name]) self._runner(["ip", "link", "set", config.tap_name, "up"]) + if diagnostics.nft_binary is not None: + self._ensure_nft_network(config) + return + self._ensure_iptables_network(config) + + def _ensure_nft_network(self, config: NetworkConfig) -> None: table_name = self._nft_table_name(config.vm_id) self._runner(["nft", "add", "table", "ip", table_name]) self._runner( @@ -159,18 +247,102 @@ class TapNetworkManager: "}", ] ) - self._runner([ - "nft", - "add", - "rule", - "ip", - table_name, - "postrouting", - "ip", - "saddr", - config.subnet_cidr, - "masquerade", - ]) + self._runner( + [ + "nft", + "add", + "chain", + "ip", + table_name, + "forward", + "{", + "type", + "filter", + "hook", + "forward", + "priority", + "filter", + ";", + "policy", + "accept", + ";", + "}", + ] + ) + self._runner( + [ + "nft", + "add", + "rule", + "ip", + table_name, + "postrouting", + "ip", + "saddr", + config.subnet_cidr, + "masquerade", + ] + ) + self._runner( + [ + "nft", + "add", + "rule", + "ip", + table_name, + "forward", + "iifname", + config.tap_name, + "accept", + ] + ) + self._runner( + [ + "nft", + "add", + "rule", + "ip", + table_name, + "forward", + "oifname", + config.tap_name, + "ct", + "state", + "related,established", + "accept", + ] + ) + + def _ensure_iptables_network(self, config: NetworkConfig) -> None: + self._runner( + [ + "iptables", + "-t", + "nat", + "-A", + "POSTROUTING", + "-s", + config.subnet_cidr, + "-j", + "MASQUERADE", + ] + ) + self._runner(["iptables", "-A", "FORWARD", "-i", config.tap_name, "-j", "ACCEPT"]) + self._runner( + [ + "iptables", + "-A", + "FORWARD", + "-o", + config.tap_name, + "-m", + "conntrack", + "--ctstate", + "RELATED,ESTABLISHED", + "-j", + "ACCEPT", + ] + ) def _run_ignore(self, command: list[str]) -> None: try: @@ -183,9 +355,30 @@ class TapNetworkManager: return f"pyro_{vm_id[:12]}" @staticmethod - def _run(command: list[str]) -> subprocess.CompletedProcess[str]: - completed = subprocess.run(command, text=True, capture_output=True, check=False) + def _detect_sudo() -> bool: + if os.geteuid() == 0: + return False + if shutil.which("sudo") is None: + return False + completed = subprocess.run( + ["sudo", "-n", "true"], + text=True, + capture_output=True, + check=False, + ) + return completed.returncode == 0 + + def _run(self, command: list[str]) -> subprocess.CompletedProcess[str]: + effective_command = command + if self._use_sudo: + effective_command = ["sudo", "-n", *command] + completed = subprocess.run( + effective_command, + text=True, + capture_output=True, + check=False, + ) if completed.returncode != 0: stderr = completed.stderr.strip() or completed.stdout.strip() - raise RuntimeError(f"command {' '.join(command)!r} failed: {stderr}") + raise RuntimeError(f"command {' '.join(effective_command)!r} failed: {stderr}") return completed diff --git a/tests/test_runtime.py b/tests/test_runtime.py index 2359c3d..4e6b096 100644 --- a/tests/test_runtime.py +++ b/tests/test_runtime.py @@ -85,9 +85,9 @@ def test_doctor_report_has_runtime_fields() -> None: assert "tun_available" in networking -def test_runtime_capabilities_reports_shim_bundle() -> None: +def test_runtime_capabilities_reports_real_bundle_flags() -> None: paths = resolve_runtime_paths() capabilities = runtime_capabilities(paths) - assert capabilities.supports_vm_boot is False - assert capabilities.supports_guest_exec is False - assert capabilities.supports_guest_network is False + assert capabilities.supports_vm_boot is True + assert capabilities.supports_guest_exec is True + assert capabilities.supports_guest_network is True diff --git a/tests/test_runtime_network_check.py b/tests/test_runtime_network_check.py new file mode 100644 index 0000000..88a3684 --- /dev/null +++ b/tests/test_runtime_network_check.py @@ -0,0 +1,73 @@ +from __future__ import annotations + +import pytest + +import pyro_mcp.runtime_network_check as runtime_network_check +from pyro_mcp.vm_network import TapNetworkManager + + +def test_network_check_uses_network_enabled_manager(monkeypatch: pytest.MonkeyPatch) -> None: + observed: dict[str, object] = {} + + class StubManager: + def __init__(self, **kwargs: object) -> None: + observed.update(kwargs) + + def create_vm(self, **kwargs: object) -> dict[str, object]: + observed["create_kwargs"] = kwargs + return {"vm_id": "vm123"} + + def start_vm(self, vm_id: str) -> dict[str, object]: + observed["started_vm_id"] = vm_id + return {"state": "started"} + + def status_vm(self, vm_id: str) -> dict[str, object]: + observed["status_vm_id"] = vm_id + return {"network_enabled": True} + + def exec_vm(self, vm_id: str, *, command: str, timeout_seconds: int) -> dict[str, object]: + observed["exec_vm_id"] = vm_id + observed["command"] = command + observed["timeout_seconds"] = timeout_seconds + return { + "execution_mode": "guest_vsock", + "exit_code": 0, + "stdout": "true\n", + "stderr": "", + "cleanup": {"deleted": True, "vm_id": vm_id, "reason": "post_exec_cleanup"}, + } + + monkeypatch.setattr(runtime_network_check, "VmManager", StubManager) + result = runtime_network_check.run_network_check() + + network_manager = observed["network_manager"] + assert isinstance(network_manager, TapNetworkManager) + assert network_manager.enabled is True + assert observed["command"] == runtime_network_check.NETWORK_CHECK_COMMAND + assert observed["timeout_seconds"] == 120 + assert result.execution_mode == "guest_vsock" + assert result.network_enabled is True + assert result.exit_code == 0 + + +def test_network_check_main_fails_on_unsuccessful_command( + monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] +) -> None: + monkeypatch.setattr( + runtime_network_check, + "run_network_check", + lambda **kwargs: runtime_network_check.NetworkCheckResult( + vm_id="vm123", + execution_mode="guest_vsock", + network_enabled=True, + exit_code=1, + stdout="", + stderr="curl failed", + cleanup={"deleted": True}, + ), + ) + with pytest.raises(SystemExit, match="1"): + runtime_network_check.main() + output = capsys.readouterr().out + assert "[network] result=failure" in output + assert "[network] stderr=curl failed" in output diff --git a/tests/test_vm_guest.py b/tests/test_vm_guest.py index 32e629b..fe51894 100644 --- a/tests/test_vm_guest.py +++ b/tests/test_vm_guest.py @@ -8,9 +8,13 @@ from pyro_mcp.vm_guest import VsockExecClient class StubSocket: - def __init__(self, response: bytes) -> None: - self.response = response - self.connected: tuple[int, int] | None = None + def __init__(self, responses: list[bytes] | bytes) -> None: + if isinstance(responses, bytes): + self.responses = [responses] + else: + self.responses = responses + self._buffer = b"" + self.connected: object | None = None self.sent = b"" self.timeout: int | None = None self.closed = False @@ -25,10 +29,12 @@ class StubSocket: self.sent += data def recv(self, size: int) -> bytes: - del size - if self.response == b"": + if not self._buffer and self.responses: + self._buffer = self.responses.pop(0) + if not self._buffer: return b"" - data, self.response = self.response, b"" + data = self._buffer[:size] + self._buffer = self._buffer[size:] return data def close(self) -> None: @@ -62,3 +68,36 @@ def test_vsock_exec_client_rejects_bad_json(monkeypatch: pytest.MonkeyPatch) -> client = VsockExecClient(socket_factory=lambda family, sock_type: stub) with pytest.raises(RuntimeError, match="JSON object"): client.exec(1234, 5005, "echo ok", 30) + + +def test_vsock_exec_client_uses_unix_bridge_when_vsock_is_unavailable( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delattr(socket, "AF_VSOCK", raising=False) + stub = StubSocket( + [ + b"OK 1073746829\n", + b'{"stdout":"ready\\n","stderr":"","exit_code":0,"duration_ms":5}', + ] + ) + + def socket_factory(family: int, sock_type: int) -> StubSocket: + assert family == socket.AF_UNIX + assert sock_type == socket.SOCK_STREAM + return stub + + client = VsockExecClient(socket_factory=socket_factory) + response = client.exec(1234, 5005, "echo ready", 30, uds_path="/tmp/vsock.sock") + + assert response.stdout == "ready\n" + assert stub.connected == "/tmp/vsock.sock" + assert stub.sent.startswith(b"CONNECT 5005\n") + + +def test_vsock_exec_client_requires_transport_when_vsock_is_unavailable( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delattr(socket, "AF_VSOCK", raising=False) + client = VsockExecClient(socket_factory=lambda family, sock_type: StubSocket(b"")) + with pytest.raises(RuntimeError, match="not supported"): + client.exec(1234, 5005, "echo ok", 30)