Add seeded task workspace creation

Current persistent tasks started with an empty workspace, which blocked the first useful host-to-task workflow in the task roadmap. This change lets task creation start from a host directory or tar archive without changing the one-shot VM surfaces.

Expose source_path on task create across the CLI, SDK, and MCP, add safe archive upload and extraction support for guest and host-compat backends, persist workspace_seed metadata, and patch the per-task rootfs with the bundled guest agent before boot so seeded guest tasks work without republishing environments. Also switch post--- command reconstruction to shlex.join() so documented sh -lc task examples preserve argument boundaries.

Validation:
- uv lock
- UV_CACHE_DIR=.uv-cache uv run pytest --no-cov tests/test_vm_guest.py tests/test_vm_manager.py tests/test_cli.py tests/test_api.py tests/test_server.py tests/test_public_contract.py
- UV_CACHE_DIR=.uv-cache make check
- UV_CACHE_DIR=.uv-cache make dist-check
- real guest-backed smoke: task create --source-path, task exec -- cat note.txt, task delete
This commit is contained in:
Thales Maciel 2026-03-11 21:45:38 -03:00
parent 58df176148
commit aa886b346e
25 changed files with 1076 additions and 75 deletions

View file

@ -4,6 +4,7 @@ from __future__ import annotations
import argparse
import json
import shlex
import sys
from textwrap import dedent
from typing import Any
@ -155,6 +156,14 @@ def _print_task_summary_human(payload: dict[str, Any], *, action: str) -> None:
print(f"Environment: {str(payload.get('environment', 'unknown'))}")
print(f"State: {str(payload.get('state', 'unknown'))}")
print(f"Workspace: {str(payload.get('workspace_path', '/workspace'))}")
workspace_seed = payload.get("workspace_seed")
if isinstance(workspace_seed, dict):
mode = str(workspace_seed.get("mode", "empty"))
source_path = workspace_seed.get("source_path")
if isinstance(source_path, str) and source_path != "":
print(f"Workspace seed: {mode} from {source_path}")
else:
print(f"Workspace seed: {mode}")
print(f"Execution mode: {str(payload.get('execution_mode', 'pending'))}")
print(
f"Resources: {int(payload.get('vcpu_count', 0))} vCPU / "
@ -446,7 +455,7 @@ def _build_parser() -> argparse.ArgumentParser:
epilog=dedent(
"""
Examples:
pyro task create debian:12
pyro task create debian:12 --source-path ./repo
pyro task exec TASK_ID -- sh -lc 'printf "hello\\n" > note.txt'
pyro task logs TASK_ID
"""
@ -458,7 +467,13 @@ def _build_parser() -> argparse.ArgumentParser:
"create",
help="Create and start a persistent task workspace.",
description="Create a task workspace that stays alive across repeated exec calls.",
epilog="Example:\n pyro task create debian:12",
epilog=dedent(
"""
Examples:
pyro task create debian:12
pyro task create debian:12 --source-path ./repo
"""
),
formatter_class=_HelpFormatter,
)
task_create_parser.add_argument(
@ -497,6 +512,13 @@ def _build_parser() -> argparse.ArgumentParser:
"is unavailable."
),
)
task_create_parser.add_argument(
"--source-path",
help=(
"Optional host directory or .tar/.tar.gz/.tgz archive to seed into `/workspace` "
"before the task is returned."
),
)
task_create_parser.add_argument(
"--json",
action="store_true",
@ -663,7 +685,7 @@ def _require_command(command_args: list[str]) -> str:
command_args = command_args[1:]
if not command_args:
raise ValueError("command is required after `--`")
return " ".join(command_args)
return shlex.join(command_args)
def main() -> None:
@ -764,6 +786,7 @@ def main() -> None:
ttl_seconds=args.ttl_seconds,
network=args.network,
allow_host_compat=args.allow_host_compat,
source_path=args.source_path,
)
if bool(args.json):
_print_json(payload)