Add seeded task workspace creation

Current persistent tasks started with an empty workspace, which blocked the first useful host-to-task workflow in the task roadmap. This change lets task creation start from a host directory or tar archive without changing the one-shot VM surfaces.

Expose source_path on task create across the CLI, SDK, and MCP, add safe archive upload and extraction support for guest and host-compat backends, persist workspace_seed metadata, and patch the per-task rootfs with the bundled guest agent before boot so seeded guest tasks work without republishing environments. Also switch post--- command reconstruction to shlex.join() so documented sh -lc task examples preserve argument boundaries.

Validation:
- uv lock
- UV_CACHE_DIR=.uv-cache uv run pytest --no-cov tests/test_vm_guest.py tests/test_vm_manager.py tests/test_cli.py tests/test_api.py tests/test_server.py tests/test_public_contract.py
- UV_CACHE_DIR=.uv-cache make check
- UV_CACHE_DIR=.uv-cache make dist-check
- real guest-backed smoke: task create --source-path, task exec -- cat note.txt, task delete
This commit is contained in:
Thales Maciel 2026-03-11 21:45:38 -03:00
parent 58df176148
commit aa886b346e
25 changed files with 1076 additions and 75 deletions

View file

@ -34,7 +34,7 @@ Kernel build note:
Current status:
1. Firecracker and Jailer are materialized from pinned official release artifacts.
2. The kernel and rootfs images are built from pinned inputs into `build/runtime_sources/`.
3. The guest agent is installed into each rootfs and used for vsock exec.
3. The guest agent is installed into each rootfs and used for vsock exec plus workspace archive imports.
4. `runtime.lock.json` now advertises real guest capabilities.
Safety rule:

View file

@ -1,26 +1,31 @@
#!/usr/bin/env python3
"""Minimal guest-side exec agent for pyro runtime bundles."""
"""Minimal guest-side exec and workspace import agent for pyro runtime bundles."""
from __future__ import annotations
import io
import json
import os
import socket
import subprocess
import tarfile
import time
from pathlib import Path, PurePosixPath
from typing import Any
PORT = 5005
BUFFER_SIZE = 65536
WORKSPACE_ROOT = PurePosixPath("/workspace")
def _read_request(conn: socket.socket) -> dict[str, Any]:
chunks: list[bytes] = []
while True:
data = conn.recv(BUFFER_SIZE)
data = conn.recv(1)
if data == b"":
break
chunks.append(data)
if b"\n" in data:
if data == b"\n":
break
payload = json.loads(b"".join(chunks).decode("utf-8").strip())
if not isinstance(payload, dict):
@ -28,6 +33,130 @@ def _read_request(conn: socket.socket) -> dict[str, Any]:
return payload
def _read_exact(conn: socket.socket, size: int) -> bytes:
remaining = size
chunks: list[bytes] = []
while remaining > 0:
data = conn.recv(min(BUFFER_SIZE, remaining))
if data == b"":
raise RuntimeError("unexpected EOF while reading archive payload")
chunks.append(data)
remaining -= len(data)
return b"".join(chunks)
def _normalize_member_name(name: str) -> PurePosixPath:
candidate = name.strip()
if candidate == "":
raise RuntimeError("archive member path is empty")
member_path = PurePosixPath(candidate)
if member_path.is_absolute():
raise RuntimeError(f"absolute archive member paths are not allowed: {name}")
parts = [part for part in member_path.parts if part not in {"", "."}]
if any(part == ".." for part in parts):
raise RuntimeError(f"unsafe archive member path: {name}")
normalized = PurePosixPath(*parts)
if str(normalized) in {"", "."}:
raise RuntimeError(f"unsafe archive member path: {name}")
return normalized
def _normalize_destination(destination: str) -> tuple[PurePosixPath, Path]:
candidate = destination.strip()
if candidate == "":
raise RuntimeError("destination must not be empty")
destination_path = PurePosixPath(candidate)
if not destination_path.is_absolute():
destination_path = WORKSPACE_ROOT / destination_path
parts = [part for part in destination_path.parts if part not in {"", "."}]
normalized = PurePosixPath("/") / PurePosixPath(*parts)
if normalized == PurePosixPath("/"):
raise RuntimeError("destination must stay inside /workspace")
if normalized.parts[: len(WORKSPACE_ROOT.parts)] != WORKSPACE_ROOT.parts:
raise RuntimeError("destination must stay inside /workspace")
suffix = normalized.relative_to(WORKSPACE_ROOT)
host_path = Path("/workspace")
if str(suffix) not in {"", "."}:
host_path = host_path / str(suffix)
return normalized, host_path
def _validate_symlink_target(member_path: PurePosixPath, link_target: str) -> None:
target = link_target.strip()
if target == "":
raise RuntimeError(f"symlink {member_path} has an empty target")
target_path = PurePosixPath(target)
if target_path.is_absolute():
raise RuntimeError(f"symlink {member_path} escapes the workspace")
combined = member_path.parent.joinpath(target_path)
parts = [part for part in combined.parts if part not in {"", "."}]
if any(part == ".." for part in parts):
raise RuntimeError(f"symlink {member_path} escapes the workspace")
def _ensure_no_symlink_parents(root: Path, target_path: Path, member_name: str) -> None:
relative_path = target_path.relative_to(root)
current = root
for part in relative_path.parts[:-1]:
current = current / part
if current.is_symlink():
raise RuntimeError(
f"archive member would traverse through a symlinked path: {member_name}"
)
def _extract_archive(payload: bytes, destination: str) -> dict[str, Any]:
_, destination_root = _normalize_destination(destination)
destination_root.mkdir(parents=True, exist_ok=True)
bytes_written = 0
entry_count = 0
with tarfile.open(fileobj=io.BytesIO(payload), mode="r:*") as archive:
for member in archive.getmembers():
member_name = _normalize_member_name(member.name)
target_path = destination_root / str(member_name)
entry_count += 1
_ensure_no_symlink_parents(destination_root, target_path, member.name)
if member.isdir():
if target_path.exists() and not target_path.is_dir():
raise RuntimeError(f"directory conflicts with existing path: {member.name}")
target_path.mkdir(parents=True, exist_ok=True)
continue
if member.isfile():
target_path.parent.mkdir(parents=True, exist_ok=True)
if target_path.exists() and (target_path.is_dir() or target_path.is_symlink()):
raise RuntimeError(f"file conflicts with existing path: {member.name}")
source = archive.extractfile(member)
if source is None:
raise RuntimeError(f"failed to read archive member: {member.name}")
with target_path.open("wb") as handle:
while True:
chunk = source.read(BUFFER_SIZE)
if chunk == b"":
break
handle.write(chunk)
bytes_written += member.size
continue
if member.issym():
_validate_symlink_target(member_name, member.linkname)
target_path.parent.mkdir(parents=True, exist_ok=True)
if target_path.exists() and not target_path.is_symlink():
raise RuntimeError(f"symlink conflicts with existing path: {member.name}")
if target_path.is_symlink():
target_path.unlink()
os.symlink(member.linkname, target_path)
continue
if member.islnk():
raise RuntimeError(
f"hard links are not allowed in workspace archives: {member.name}"
)
raise RuntimeError(f"unsupported archive member type: {member.name}")
return {
"destination": destination,
"entry_count": entry_count,
"bytes_written": bytes_written,
}
def _run_command(command: str, timeout_seconds: int) -> dict[str, Any]:
started = time.monotonic()
try:
@ -64,9 +193,18 @@ def main() -> None:
conn, _ = server.accept()
with conn:
request = _read_request(conn)
command = str(request.get("command", ""))
timeout_seconds = int(request.get("timeout_seconds", 30))
response = _run_command(command, timeout_seconds)
action = str(request.get("action", "exec"))
if action == "extract_archive":
archive_size = int(request.get("archive_size", 0))
if archive_size < 0:
raise RuntimeError("archive_size must not be negative")
destination = str(request.get("destination", "/workspace"))
payload = _read_exact(conn, archive_size)
response = _extract_archive(payload, destination)
else:
command = str(request.get("command", ""))
timeout_seconds = int(request.get("timeout_seconds", 30))
response = _run_command(command, timeout_seconds)
conn.sendall((json.dumps(response) + "\n").encode("utf-8"))

View file

@ -5,7 +5,7 @@
"firecracker": "1.12.1",
"jailer": "1.12.1",
"kernel": "5.10.210",
"guest_agent": "0.1.0-dev",
"guest_agent": "0.2.0-dev",
"base_distro": "debian-bookworm-20250210"
},
"capabilities": {