diff --git a/CHANGELOG.md b/CHANGELOG.md index c7b46e1..5c6ea7f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,16 @@ All notable user-visible changes to `pyro-mcp` are documented here. +## 3.10.0 + +- Aligned the five guest-backed workspace smoke scenarios with the recipe docs + they advertise, so the smoke pack now follows the documented canonical user + paths instead of mixing in harness-only CLI formatting checks. +- Fixed the repro-plus-fix smoke to use the structured SDK patch flow directly, + removing its dependency on brittle human `[workspace-patch] ...` output. +- Promoted `make smoke-use-cases` in the docs as the trustworthy guest-backed + verification path for the advertised workspace workflows. + ## 3.9.0 - Added `--content-only` to `pyro workspace file read` and diff --git a/README.md b/README.md index ebb6b34..8e59c0a 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ It exposes the same runtime in three public forms: - Stable workspace walkthrough GIF: [docs/assets/workspace-first-run.gif](docs/assets/workspace-first-run.gif) - Terminal walkthrough GIF: [docs/assets/first-run.gif](docs/assets/first-run.gif) - PyPI package: [pypi.org/project/pyro-mcp](https://pypi.org/project/pyro-mcp/) -- What's new in 3.9.0: [CHANGELOG.md#390](CHANGELOG.md#390) +- What's new in 3.10.0: [CHANGELOG.md#3100](CHANGELOG.md#3100) - Host requirements: [docs/host-requirements.md](docs/host-requirements.md) - Integration targets: [docs/integrations.md](docs/integrations.md) - Public contract: [docs/public-contract.md](docs/public-contract.md) @@ -60,7 +60,7 @@ What success looks like: ```bash Platform: linux-x86_64 Runtime: PASS -Catalog version: 3.9.0 +Catalog version: 3.10.0 ... [pull] phase=install environment=debian:12 [pull] phase=ready environment=debian:12 @@ -85,7 +85,8 @@ diffs, exports, and reset. After that stable walkthrough works, continue with the recipe set in [docs/use-cases/README.md](docs/use-cases/README.md). It packages the five core workspace stories into documented flows plus real guest-backed smoke targets such as `make smoke-use-cases` and -`make smoke-repro-fix-loop`. +`make smoke-repro-fix-loop`. At this point `make smoke-use-cases` is the +trustworthy guest-backed release-gate path for the advertised workspace workflows. The commands below use plain `pyro ...`. Run the same flow with `uvx --from pyro-mcp pyro ...` for the published package, or `uv run pyro ...` from a source checkout. @@ -225,7 +226,7 @@ uvx --from pyro-mcp pyro env list Expected output: ```bash -Catalog version: 3.9.0 +Catalog version: 3.10.0 debian:12 [installed|not installed] Debian 12 environment with Git preinstalled for common agent workflows. debian:12-base [installed|not installed] Minimal Debian 12 environment for shell and core Unix tooling. debian:12-build [installed|not installed] Debian 12 environment with Git and common build tools preinstalled. @@ -342,7 +343,7 @@ machine consumption, use `--id-only` for only the identifier or `--json` for the workspace payload. Use `--seed-path` when you want the workspace to start from a host directory or a local `.tar` / `.tar.gz` / `.tgz` archive instead of an empty workspace. Use `pyro workspace sync push` when you want to import -later host-side changes into a started workspace. Sync is non-atomic in `3.9.0`; if it fails +later host-side changes into a started workspace. Sync is non-atomic in `3.10.0`; if it fails partway through, prefer `pyro workspace reset` to recover from `baseline` or one named snapshot. Use `pyro workspace diff` to compare the live `/workspace` tree to its immutable create-time baseline, and `pyro workspace export` to copy one changed file or directory back to the host. Use diff --git a/docs/first-run.md b/docs/first-run.md index 9260bd6..ea54aa7 100644 --- a/docs/first-run.md +++ b/docs/first-run.md @@ -22,7 +22,7 @@ Networking: tun=yes ip_forward=yes ```bash $ uvx --from pyro-mcp pyro env list -Catalog version: 3.9.0 +Catalog version: 3.10.0 debian:12 [installed|not installed] Debian 12 environment with Git preinstalled for common agent workflows. debian:12-base [installed|not installed] Minimal Debian 12 environment for shell and core Unix tooling. debian:12-build [installed|not installed] Debian 12 environment with Git and common build tools preinstalled. @@ -126,7 +126,8 @@ snapshots, secrets, network policy, or disk tools. Once that stable workspace flow works, continue with the five recipe docs in [use-cases/README.md](use-cases/README.md) or run the real guest-backed smoke packs directly with -`make smoke-use-cases`. +`make smoke-use-cases`. Treat that smoke pack as the trustworthy guest-backed +verification path for the advertised workspace workflows. When you need repeated commands in one sandbox, switch to `pyro workspace ...`: @@ -153,8 +154,7 @@ $ uvx --from pyro-mcp pyro workspace file read WORKSPACE_ID src/note.txt hello from synced workspace [workspace-file-read] workspace_id=... path=/workspace/src/note.txt size_bytes=... truncated=False execution_mode=guest_vsock -$ uvx --from pyro-mcp pyro workspace patch apply WORKSPACE_ID --patch "$(cat fix.patch)" -[workspace-patch] workspace_id=... total=... added=... modified=... deleted=... execution_mode=guest_vsock +$ uvx --from pyro-mcp pyro workspace patch apply WORKSPACE_ID --patch-file fix.patch $ uvx --from pyro-mcp pyro workspace exec WORKSPACE_ID -- cat src/note.txt hello from synced workspace @@ -259,7 +259,7 @@ State: started Use `--seed-path` when the workspace should start from a host directory or a local `.tar` / `.tar.gz` / `.tgz` archive instead of an empty `/workspace`. Use `pyro workspace sync push` when you need to import later host-side changes into a started -workspace. Sync is non-atomic in `3.9.0`; if it fails partway through, prefer `pyro workspace reset` +workspace. Sync is non-atomic in `3.10.0`; if it fails partway through, prefer `pyro workspace reset` to recover from `baseline` or one named snapshot. Use `pyro workspace diff` to compare the current `/workspace` tree to its immutable create-time baseline, `pyro workspace snapshot *` to create named checkpoints, and `pyro workspace export` to copy one changed file or directory back to the diff --git a/docs/install.md b/docs/install.md index 22e457c..c00bb31 100644 --- a/docs/install.md +++ b/docs/install.md @@ -85,7 +85,7 @@ uvx --from pyro-mcp pyro env list Expected output: ```bash -Catalog version: 3.9.0 +Catalog version: 3.10.0 debian:12 [installed|not installed] Debian 12 environment with Git preinstalled for common agent workflows. debian:12-base [installed|not installed] Minimal Debian 12 environment for shell and core Unix tooling. debian:12-build [installed|not installed] Debian 12 environment with Git and common build tools preinstalled. @@ -171,6 +171,8 @@ When that stable workspace path is working, continue with the recipe index at [use-cases/README.md](use-cases/README.md). It groups the five core workspace stories and the real smoke targets behind them, starting with `make smoke-use-cases` or one of the per-scenario targets such as `make smoke-repro-fix-loop`. +Treat `make smoke-use-cases` as the trustworthy guest-backed verification path for the advertised +workspace workflows. ## 6. Optional demo proof point @@ -294,7 +296,7 @@ the identifier programmatically, use `--id-only` for only the identifier or `--j workspace payload. Use `--seed-path` when the workspace should start from a host directory or a local `.tar` / `.tar.gz` / `.tgz` archive. Use `pyro workspace sync push` for later host-side changes to a started workspace. Sync -is non-atomic in `3.9.0`; if it fails partway through, prefer `pyro workspace reset` to recover +is non-atomic in `3.10.0`; if it fails partway through, prefer `pyro workspace reset` to recover from `baseline` or one named snapshot. Use `pyro workspace diff` to compare the current workspace tree to its immutable create-time baseline, `pyro workspace snapshot *` to capture named checkpoints, and `pyro workspace export` to copy one changed file or directory back to the host. Use diff --git a/docs/roadmap/llm-chat-ergonomics.md b/docs/roadmap/llm-chat-ergonomics.md index 78f59c6..c028071 100644 --- a/docs/roadmap/llm-chat-ergonomics.md +++ b/docs/roadmap/llm-chat-ergonomics.md @@ -6,7 +6,7 @@ goal: make the core agent-workspace use cases feel trivial from a chat-driven LLM interface. -Current baseline is `3.9.0`: +Current baseline is `3.10.0`: - the stable workspace contract exists across CLI, SDK, and MCP - one-shot `pyro run` still exists as the narrow entrypoint @@ -35,12 +35,8 @@ More concretely, the model should not need to: The remaining UX friction for a technically strong new user is now narrower: -- the recommended chat-host onramp is now explicit, but human-mode file reads - still need final transcript polish for copy-paste and chat logs -- the five use-case smokes now exist, but the advertised smoke pack is only as - trustworthy as its weakest scenario and exact recipe fidelity -- generic MCP guidance is strong, but Codex and OpenCode still ask the user to - translate the generic config into host-specific setup steps +- the generic MCP guidance is strong, but Codex and OpenCode still ask the user + to translate the generic config into host-specific setup steps - `workspace-core` is clearly the recommended profile, but `pyro mcp serve` and `create_server()` still default to `workspace-full` for `3.x` compatibility @@ -66,7 +62,7 @@ The remaining UX friction for a technically strong new user is now narrower: 6. [`3.7.0` Handoff Shortcuts And File Input Sources](llm-chat-ergonomics/3.7.0-handoff-shortcuts-and-file-input-sources.md) - Done 7. [`3.8.0` Chat-Host Onramp And Recommended Defaults](llm-chat-ergonomics/3.8.0-chat-host-onramp-and-recommended-defaults.md) - Done 8. [`3.9.0` Content-Only Reads And Human Output Polish](llm-chat-ergonomics/3.9.0-content-only-reads-and-human-output-polish.md) - Done -9. [`3.10.0` Use-Case Smoke Trust And Recipe Fidelity](llm-chat-ergonomics/3.10.0-use-case-smoke-trust-and-recipe-fidelity.md) +9. [`3.10.0` Use-Case Smoke Trust And Recipe Fidelity](llm-chat-ergonomics/3.10.0-use-case-smoke-trust-and-recipe-fidelity.md) - Done 10. [`3.11.0` Host-Specific MCP Onramps](llm-chat-ergonomics/3.11.0-host-specific-mcp-onramps.md) 11. [`4.0.0` Workspace-Core Default Profile](llm-chat-ergonomics/4.0.0-workspace-core-default-profile.md) @@ -92,13 +88,11 @@ Completed so far: docs pass while keeping `workspace-full` as the 3.x compatibility default. - `3.9.0` added content-only workspace file and disk reads plus cleaner default human-mode transcript separation for files that do not end with a trailing newline. +- `3.10.0` aligned the five guest-backed use-case smokes with their recipe docs and promoted + `make smoke-use-cases` as the trustworthy verification path for the advertised workspace flows. Planned next: -- `3.10.0` makes the use-case recipe set fully trustworthy by requiring - `make smoke-use-cases` to pass cleanly, aligning recipe docs with what the - smoke harness actually proves, and removing brittle assertions against - human-mode output when structured results are already available. - `3.11.0` adds exact host-specific onramps for Claude, Codex, and OpenCode so a new chat-host user can copy one known-good config or command instead of translating the generic MCP example by hand. diff --git a/docs/roadmap/llm-chat-ergonomics/3.10.0-use-case-smoke-trust-and-recipe-fidelity.md b/docs/roadmap/llm-chat-ergonomics/3.10.0-use-case-smoke-trust-and-recipe-fidelity.md index 6fd61b6..7d50ac6 100644 --- a/docs/roadmap/llm-chat-ergonomics/3.10.0-use-case-smoke-trust-and-recipe-fidelity.md +++ b/docs/roadmap/llm-chat-ergonomics/3.10.0-use-case-smoke-trust-and-recipe-fidelity.md @@ -1,6 +1,6 @@ # `3.10.0` Use-Case Smoke Trust And Recipe Fidelity -Status: Planned +Status: Done ## Goal diff --git a/docs/use-cases/README.md b/docs/use-cases/README.md index 0ce0035..15911d6 100644 --- a/docs/use-cases/README.md +++ b/docs/use-cases/README.md @@ -28,4 +28,5 @@ uv run python scripts/workspace_use_case_smoke.py --scenario all --environment d That runner generates its own host fixtures, creates real guest-backed workspaces, verifies the intended flow, exports one concrete result when relevant, and cleans -up on both success and failure. +up on both success and failure. Treat `make smoke-use-cases` as the trustworthy +guest-backed verification path for the advertised workspace workflows. diff --git a/pyproject.toml b/pyproject.toml index 044f601..bc66549 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "pyro-mcp" -version = "3.9.0" +version = "3.10.0" description = "Stable Firecracker workspaces, one-shot sandboxes, and MCP tools for coding agents." readme = "README.md" license = { file = "LICENSE" } diff --git a/src/pyro_mcp/vm_environments.py b/src/pyro_mcp/vm_environments.py index 365158b..a89e86e 100644 --- a/src/pyro_mcp/vm_environments.py +++ b/src/pyro_mcp/vm_environments.py @@ -19,7 +19,7 @@ from typing import Any from pyro_mcp.runtime import DEFAULT_PLATFORM, RuntimePaths DEFAULT_ENVIRONMENT_VERSION = "1.0.0" -DEFAULT_CATALOG_VERSION = "3.9.0" +DEFAULT_CATALOG_VERSION = "3.10.0" OCI_MANIFEST_ACCEPT = ", ".join( ( "application/vnd.oci.image.index.v1+json", diff --git a/src/pyro_mcp/workspace_use_case_smokes.py b/src/pyro_mcp/workspace_use_case_smokes.py index 5536f37..01c1338 100644 --- a/src/pyro_mcp/workspace_use_case_smokes.py +++ b/src/pyro_mcp/workspace_use_case_smokes.py @@ -3,8 +3,6 @@ from __future__ import annotations import argparse -import subprocess -import sys import tempfile import time from dataclasses import dataclass @@ -109,17 +107,6 @@ def _log(message: str) -> None: print(f"[smoke] {message}", flush=True) -def _run_pyro_cli(*args: str, cwd: Path) -> str: - completed = subprocess.run( - [sys.executable, "-m", "pyro_mcp.cli", *args], - cwd=str(cwd), - check=True, - capture_output=True, - text=True, - ) - return completed.stdout - - def _create_workspace( pyro: Pyro, *, @@ -246,16 +233,11 @@ def _scenario_repro_fix_loop(pyro: Pyro, *, root: Path, environment: str) -> Non assert str(initial_read["content"]) == "broken\n", initial_read failing = pyro.exec_workspace(workspace_id, command="sh check.sh") assert int(failing["exit_code"]) != 0, failing - patch_output = _run_pyro_cli( - "workspace", - "patch", - "apply", + patch_result = pyro.apply_workspace_patch( workspace_id, - "--patch-file", - str(patch_path), - cwd=root, + patch=patch_path.read_text(encoding="utf-8"), ) - assert "[workspace-patch] workspace_id=" in patch_output, patch_output + assert bool(patch_result["changed"]) is True, patch_result passing = pyro.exec_workspace(workspace_id, command="sh check.sh") assert int(passing["exit_code"]) == 0, passing assert str(passing["stdout"]) == "fixed\n", passing diff --git a/tests/test_workspace_use_case_smokes.py b/tests/test_workspace_use_case_smokes.py index 9e0c178..1a5a836 100644 --- a/tests/test_workspace_use_case_smokes.py +++ b/tests/test_workspace_use_case_smokes.py @@ -54,6 +54,7 @@ class _FakePyro: self._workspace_counter = 0 self._shell_counter = 0 self._clock = 0.0 + self.patch_apply_count = 0 def _tick(self) -> float: self._clock += 1.0 @@ -336,6 +337,7 @@ class _FakePyro: original = target.read_text(encoding="utf-8") updated = original.replace("broken\n", "fixed\n") target.write_text(updated, encoding="utf-8") + self.patch_apply_count += 1 workspace.last_activity_at = self._tick() return {"workspace_id": workspace_id, "changed": updated != original, "patch": patch} @@ -452,6 +454,8 @@ def test_use_case_docs_and_targets_stay_aligned() -> None: repo_root = _repo_root() index_text = (repo_root / "docs" / "use-cases" / "README.md").read_text(encoding="utf-8") makefile_text = (repo_root / "Makefile").read_text(encoding="utf-8") + assert "trustworthy" in index_text + assert "guest-backed verification path" in index_text for recipe in WORKSPACE_USE_CASE_RECIPES: assert (repo_root / recipe.doc_path).is_file(), recipe.doc_path recipe_text = (repo_root / recipe.doc_path).read_text(encoding="utf-8") @@ -478,22 +482,11 @@ def test_run_all_use_case_scenarios_with_fake_pyro( fake_pyro = _FakePyro(tmp_path / "fake-pyro") monkeypatch.setattr(smoke_module, "Pyro", lambda: fake_pyro) monkeypatch.setattr(time_module, "sleep", lambda _seconds: None) - monkeypatch.setattr( - smoke_module, - "_run_pyro_cli", - lambda *args, cwd: ( - fake_pyro.write_workspace_file( - args[3], - "message.txt", - text="fixed\n", - ), - f"[workspace-patch] workspace_id={args[3]} total=1\n", - )[1], - ) smoke_module.run_workspace_use_case_scenario("all") assert fake_pyro._workspaces == {} + assert fake_pyro.patch_apply_count == 1 def test_run_workspace_use_case_scenario_rejects_unknown() -> None: @@ -505,18 +498,6 @@ def test_main_runs_selected_scenario(monkeypatch: pytest.MonkeyPatch, tmp_path: fake_pyro = _FakePyro(tmp_path / "fake-pyro-main") monkeypatch.setattr(smoke_module, "Pyro", lambda: fake_pyro) monkeypatch.setattr(time_module, "sleep", lambda _seconds: None) - monkeypatch.setattr( - smoke_module, - "_run_pyro_cli", - lambda *args, cwd: ( - fake_pyro.write_workspace_file( - args[3], - "message.txt", - text="fixed\n", - ), - f"[workspace-patch] workspace_id={args[3]} total=1\n", - )[1], - ) monkeypatch.setattr( "sys.argv", [ @@ -531,3 +512,17 @@ def test_main_runs_selected_scenario(monkeypatch: pytest.MonkeyPatch, tmp_path: smoke_module.main() assert fake_pyro._workspaces == {} + assert fake_pyro.patch_apply_count == 1 + + +def test_repro_fix_scenario_uses_structured_patch_flow( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + fake_pyro = _FakePyro(tmp_path / "fake-pyro-repro-fix") + monkeypatch.setattr(smoke_module, "Pyro", lambda: fake_pyro) + monkeypatch.setattr(time_module, "sleep", lambda _seconds: None) + + smoke_module.run_workspace_use_case_scenario("repro-fix-loop") + + assert fake_pyro.patch_apply_count == 1 diff --git a/uv.lock b/uv.lock index d688e08..6bdffd0 100644 --- a/uv.lock +++ b/uv.lock @@ -715,7 +715,7 @@ crypto = [ [[package]] name = "pyro-mcp" -version = "3.9.0" +version = "3.10.0" source = { editable = "." } dependencies = [ { name = "mcp" },