Add persistent task workspace alpha

Start the first workspace milestone toward the task-oriented product without changing the existing one-shot vm_run/pyro run contract. Add a disk-backed task registry in the manager, auto-started task workspaces rooted at /workspace, repeated non-cleaning exec, and persisted command journals exposed through task create/exec/status/logs/delete across the CLI, Python SDK, and MCP server. Update the public contract, docs, examples, and version/catalog metadata for 2.1.0, and cover the new surface with manager, CLI, SDK, and MCP tests. Validation: UV_CACHE_DIR=.uv-cache make check and UV_CACHE_DIR=.uv-cache make dist-check.
2026-03-11 20:10:10 -03:00 · 2026-03-11 20:10:10 -03:00 · 58df176148
commit 58df176148
parent 6e16e74fd5
19 changed files with 1730 additions and 48 deletions
--- a/tests/test_api.py
+++ b/tests/test_api.py
@ -48,6 +48,7 @@ def test_pyro_create_server_registers_vm_run(tmp_path: Path) -> None:
    tool_names = asyncio.run(_run())
    assert "vm_run" in tool_names
    assert "vm_create" in tool_names
+    assert "task_create" in tool_names


 def test_pyro_vm_run_tool_executes(tmp_path: Path) -> None:
@ -102,3 +103,25 @@ def test_pyro_create_vm_defaults_sizing_and_host_compat(tmp_path: Path) -> None:
    assert created["vcpu_count"] == 1
    assert created["mem_mib"] == 1024
    assert created["allow_host_compat"] is True
+
+
+def test_pyro_task_methods_delegate_to_manager(tmp_path: Path) -> None:
+    pyro = Pyro(
+        manager=VmManager(
+            backend_name="mock",
+            base_dir=tmp_path / "vms",
+            network_manager=TapNetworkManager(enabled=False),
+        )
+    )
+
+    created = pyro.create_task(environment="debian:12-base", allow_host_compat=True)
+    task_id = str(created["task_id"])
+    executed = pyro.exec_task(task_id, command="printf 'ok\\n'")
+    status = pyro.status_task(task_id)
+    logs = pyro.logs_task(task_id)
+    deleted = pyro.delete_task(task_id)
+
+    assert executed["stdout"] == "ok\n"
+    assert status["command_count"] == 1
+    assert logs["count"] == 1
+    assert deleted["deleted"] is True
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@ -59,6 +59,14 @@ def test_cli_subcommand_help_includes_examples_and_guidance() -> None:
    assert "Expose pyro tools over stdio for an MCP client." in mcp_help
    assert "Use this from an MCP client config after the CLI evaluation path works." in mcp_help

+    task_help = _subparser_choice(parser, "task").format_help()
+    assert "pyro task create debian:12" in task_help
+    assert "pyro task exec TASK_ID" in task_help
+
+    task_exec_help = _subparser_choice(_subparser_choice(parser, "task"), "exec").format_help()
+    assert "persistent `/workspace`" in task_exec_help
+    assert "pyro task exec TASK_ID -- cat note.txt" in task_exec_help
+

 def test_cli_run_prints_json(
    monkeypatch: pytest.MonkeyPatch,
@ -318,6 +326,243 @@ def test_cli_requires_run_command() -> None:
        cli._require_command([])


+def test_cli_task_create_prints_json(
+    monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
+) -> None:
+    class StubPyro:
+        def create_task(self, **kwargs: Any) -> dict[str, Any]:
+            assert kwargs["environment"] == "debian:12"
+            return {"task_id": "task-123", "state": "started"}
+
+    class StubParser:
+        def parse_args(self) -> argparse.Namespace:
+            return argparse.Namespace(
+                command="task",
+                task_command="create",
+                environment="debian:12",
+                vcpu_count=1,
+                mem_mib=1024,
+                ttl_seconds=600,
+                network=False,
+                allow_host_compat=False,
+                json=True,
+            )
+
+    monkeypatch.setattr(cli, "_build_parser", lambda: StubParser())
+    monkeypatch.setattr(cli, "Pyro", StubPyro)
+    cli.main()
+    output = json.loads(capsys.readouterr().out)
+    assert output["task_id"] == "task-123"
+
+
+def test_cli_task_create_prints_human(
+    monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
+) -> None:
+    class StubPyro:
+        def create_task(self, **kwargs: Any) -> dict[str, Any]:
+            del kwargs
+            return {
+                "task_id": "task-123",
+                "environment": "debian:12",
+                "state": "started",
+                "workspace_path": "/workspace",
+                "execution_mode": "guest_vsock",
+                "vcpu_count": 1,
+                "mem_mib": 1024,
+                "command_count": 0,
+                "last_command": None,
+            }
+
+    class StubParser:
+        def parse_args(self) -> argparse.Namespace:
+            return argparse.Namespace(
+                command="task",
+                task_command="create",
+                environment="debian:12",
+                vcpu_count=1,
+                mem_mib=1024,
+                ttl_seconds=600,
+                network=False,
+                allow_host_compat=False,
+                json=False,
+            )
+
+    monkeypatch.setattr(cli, "_build_parser", lambda: StubParser())
+    monkeypatch.setattr(cli, "Pyro", StubPyro)
+    cli.main()
+    output = capsys.readouterr().out
+    assert "Task: task-123" in output
+    assert "Workspace: /workspace" in output
+
+
+def test_cli_task_exec_prints_human_output(
+    monkeypatch: pytest.MonkeyPatch,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    class StubPyro:
+        def exec_task(self, task_id: str, *, command: str, timeout_seconds: int) -> dict[str, Any]:
+            assert task_id == "task-123"
+            assert command == "cat note.txt"
+            assert timeout_seconds == 30
+            return {
+                "task_id": task_id,
+                "sequence": 2,
+                "cwd": "/workspace",
+                "execution_mode": "guest_vsock",
+                "exit_code": 0,
+                "duration_ms": 4,
+                "stdout": "hello\n",
+                "stderr": "",
+            }
+
+    class StubParser:
+        def parse_args(self) -> argparse.Namespace:
+            return argparse.Namespace(
+                command="task",
+                task_command="exec",
+                task_id="task-123",
+                timeout_seconds=30,
+                json=False,
+                command_args=["--", "cat", "note.txt"],
+            )
+
+    monkeypatch.setattr(cli, "_build_parser", lambda: StubParser())
+    monkeypatch.setattr(cli, "Pyro", StubPyro)
+    cli.main()
+    captured = capsys.readouterr()
+    assert captured.out == "hello\n"
+    assert "[task-exec] task_id=task-123 sequence=2 cwd=/workspace" in captured.err
+
+
+def test_cli_task_logs_and_delete_print_human(
+    monkeypatch: pytest.MonkeyPatch,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    class StubPyro:
+        def logs_task(self, task_id: str) -> dict[str, Any]:
+            assert task_id == "task-123"
+            return {
+                "task_id": task_id,
+                "count": 1,
+                "entries": [
+                    {
+                        "sequence": 1,
+                        "exit_code": 0,
+                        "duration_ms": 2,
+                        "cwd": "/workspace",
+                        "command": "printf 'ok\\n'",
+                        "stdout": "ok\n",
+                        "stderr": "",
+                    }
+                ],
+            }
+
+        def delete_task(self, task_id: str) -> dict[str, Any]:
+            assert task_id == "task-123"
+            return {"task_id": task_id, "deleted": True}
+
+    class LogsParser:
+        def parse_args(self) -> argparse.Namespace:
+            return argparse.Namespace(
+                command="task",
+                task_command="logs",
+                task_id="task-123",
+                json=False,
+            )
+
+    monkeypatch.setattr(cli, "_build_parser", lambda: LogsParser())
+    monkeypatch.setattr(cli, "Pyro", StubPyro)
+    cli.main()
+
+    class DeleteParser:
+        def parse_args(self) -> argparse.Namespace:
+            return argparse.Namespace(
+                command="task",
+                task_command="delete",
+                task_id="task-123",
+                json=False,
+            )
+
+    monkeypatch.setattr(cli, "_build_parser", lambda: DeleteParser())
+    cli.main()
+
+    output = capsys.readouterr().out
+    assert "#1 exit_code=0 duration_ms=2 cwd=/workspace" in output
+    assert "Deleted task: task-123" in output
+
+
+def test_cli_task_status_and_delete_print_json(
+    monkeypatch: pytest.MonkeyPatch,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    class StubPyro:
+        def status_task(self, task_id: str) -> dict[str, Any]:
+            assert task_id == "task-123"
+            return {"task_id": task_id, "state": "started"}
+
+        def delete_task(self, task_id: str) -> dict[str, Any]:
+            assert task_id == "task-123"
+            return {"task_id": task_id, "deleted": True}
+
+    class StatusParser:
+        def parse_args(self) -> argparse.Namespace:
+            return argparse.Namespace(
+                command="task",
+                task_command="status",
+                task_id="task-123",
+                json=True,
+            )
+
+    monkeypatch.setattr(cli, "_build_parser", lambda: StatusParser())
+    monkeypatch.setattr(cli, "Pyro", StubPyro)
+    cli.main()
+    status = json.loads(capsys.readouterr().out)
+    assert status["state"] == "started"
+
+    class DeleteParser:
+        def parse_args(self) -> argparse.Namespace:
+            return argparse.Namespace(
+                command="task",
+                task_command="delete",
+                task_id="task-123",
+                json=True,
+            )
+
+    monkeypatch.setattr(cli, "_build_parser", lambda: DeleteParser())
+    cli.main()
+    deleted = json.loads(capsys.readouterr().out)
+    assert deleted["deleted"] is True
+
+
+def test_cli_task_exec_json_error_exits_nonzero(
+    monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
+) -> None:
+    class StubPyro:
+        def exec_task(self, task_id: str, *, command: str, timeout_seconds: int) -> dict[str, Any]:
+            del task_id, command, timeout_seconds
+            raise RuntimeError("task is unavailable")
+
+    class StubParser:
+        def parse_args(self) -> argparse.Namespace:
+            return argparse.Namespace(
+                command="task",
+                task_command="exec",
+                task_id="task-123",
+                timeout_seconds=30,
+                json=True,
+                command_args=["--", "true"],
+            )
+
+    monkeypatch.setattr(cli, "_build_parser", lambda: StubParser())
+    monkeypatch.setattr(cli, "Pyro", StubPyro)
+
+    with pytest.raises(SystemExit, match="1"):
+        cli.main()
+
+    payload = json.loads(capsys.readouterr().out)
+    assert payload["ok"] is False
+
+
 def test_print_env_helpers_render_human_output(capsys: pytest.CaptureFixture[str]) -> None:
    cli._print_env_list_human(
        {
--- a/tests/test_public_contract.py
+++ b/tests/test_public_contract.py
@ -17,6 +17,7 @@ from pyro_mcp.contract import (
    PUBLIC_CLI_DEMO_SUBCOMMANDS,
    PUBLIC_CLI_ENV_SUBCOMMANDS,
    PUBLIC_CLI_RUN_FLAGS,
+    PUBLIC_CLI_TASK_SUBCOMMANDS,
    PUBLIC_MCP_TOOLS,
    PUBLIC_SDK_METHODS,
 )
@ -63,6 +64,10 @@ def test_public_cli_help_lists_commands_and_run_flags() -> None:
    for subcommand_name in PUBLIC_CLI_ENV_SUBCOMMANDS:
        assert subcommand_name in env_help_text

+    task_help_text = _subparser_choice(parser, "task").format_help()
+    for subcommand_name in PUBLIC_CLI_TASK_SUBCOMMANDS:
+        assert subcommand_name in task_help_text
+
    demo_help_text = _subparser_choice(parser, "demo").format_help()
    for subcommand_name in PUBLIC_CLI_DEMO_SUBCOMMANDS:
        assert subcommand_name in demo_help_text
--- a/tests/test_server.py
+++ b/tests/test_server.py
@ -31,6 +31,8 @@ def test_create_server_registers_vm_tools(tmp_path: Path) -> None:
    assert "vm_network_info" in tool_names
    assert "vm_run" in tool_names
    assert "vm_status" in tool_names
+    assert "task_create" in tool_names
+    assert "task_logs" in tool_names


 def test_vm_run_round_trip(tmp_path: Path) -> None:
@ -161,3 +163,50 @@ def test_server_main_runs_stdio_transport(monkeypatch: pytest.MonkeyPatch) -> No
    monkeypatch.setattr(server_module, "create_server", lambda: StubServer())
    server_module.main()
    assert called == {"transport": "stdio"}
+
+
+def test_task_tools_round_trip(tmp_path: Path) -> None:
+    manager = VmManager(
+        backend_name="mock",
+        base_dir=tmp_path / "vms",
+        network_manager=TapNetworkManager(enabled=False),
+    )
+
+    def _extract_structured(raw_result: object) -> dict[str, Any]:
+        if not isinstance(raw_result, tuple) or len(raw_result) != 2:
+            raise TypeError("unexpected call_tool result shape")
+        _, structured = raw_result
+        if not isinstance(structured, dict):
+            raise TypeError("expected structured dictionary result")
+        return cast(dict[str, Any], structured)
+
+    async def _run() -> tuple[dict[str, Any], dict[str, Any], dict[str, Any], dict[str, Any]]:
+        server = create_server(manager=manager)
+        created = _extract_structured(
+            await server.call_tool(
+                "task_create",
+                {
+                    "environment": "debian:12-base",
+                    "allow_host_compat": True,
+                },
+            )
+        )
+        task_id = str(created["task_id"])
+        executed = _extract_structured(
+            await server.call_tool(
+                "task_exec",
+                {
+                    "task_id": task_id,
+                    "command": "printf 'ok\\n'",
+                },
+            )
+        )
+        logs = _extract_structured(await server.call_tool("task_logs", {"task_id": task_id}))
+        deleted = _extract_structured(await server.call_tool("task_delete", {"task_id": task_id}))
+        return created, executed, logs, deleted
+
+    created, executed, logs, deleted = asyncio.run(_run())
+    assert created["state"] == "started"
+    assert executed["stdout"] == "ok\n"
+    assert logs["count"] == 1
+    assert deleted["deleted"] is True
--- a/tests/test_vm_manager.py
+++ b/tests/test_vm_manager.py
@ -1,14 +1,17 @@
 from __future__ import annotations

+import json
+import subprocess
+import time
 from pathlib import Path
 from typing import Any

 import pytest

 import pyro_mcp.vm_manager as vm_manager_module
-from pyro_mcp.runtime import resolve_runtime_paths
+from pyro_mcp.runtime import RuntimeCapabilities, resolve_runtime_paths
 from pyro_mcp.vm_manager import VmManager
-from pyro_mcp.vm_network import TapNetworkManager
+from pyro_mcp.vm_network import NetworkConfig, TapNetworkManager


 def test_vm_manager_lifecycle_and_auto_cleanup(tmp_path: Path) -> None:
@ -262,6 +265,95 @@ def test_vm_manager_run_vm(tmp_path: Path) -> None:
    assert str(result["stdout"]) == "ok\n"


+def test_task_lifecycle_and_logs(tmp_path: Path) -> None:
+    manager = VmManager(
+        backend_name="mock",
+        base_dir=tmp_path / "vms",
+        network_manager=TapNetworkManager(enabled=False),
+    )
+
+    created = manager.create_task(
+        environment="debian:12-base",
+        allow_host_compat=True,
+    )
+    task_id = str(created["task_id"])
+    assert created["state"] == "started"
+    assert created["workspace_path"] == "/workspace"
+
+    first = manager.exec_task(
+        task_id,
+        command="printf 'hello\\n' > note.txt",
+        timeout_seconds=30,
+    )
+    second = manager.exec_task(task_id, command="cat note.txt", timeout_seconds=30)
+
+    assert first["exit_code"] == 0
+    assert second["stdout"] == "hello\n"
+
+    status = manager.status_task(task_id)
+    assert status["command_count"] == 2
+    assert status["last_command"] is not None
+
+    logs = manager.logs_task(task_id)
+    assert logs["count"] == 2
+    entries = logs["entries"]
+    assert isinstance(entries, list)
+    assert entries[1]["stdout"] == "hello\n"
+
+    deleted = manager.delete_task(task_id)
+    assert deleted["deleted"] is True
+    with pytest.raises(ValueError, match="does not exist"):
+        manager.status_task(task_id)
+
+
+def test_task_rehydrates_across_manager_processes(tmp_path: Path) -> None:
+    base_dir = tmp_path / "vms"
+    manager = VmManager(
+        backend_name="mock",
+        base_dir=base_dir,
+        network_manager=TapNetworkManager(enabled=False),
+    )
+    task_id = str(
+        manager.create_task(
+            environment="debian:12-base",
+            allow_host_compat=True,
+        )["task_id"]
+    )
+
+    other = VmManager(
+        backend_name="mock",
+        base_dir=base_dir,
+        network_manager=TapNetworkManager(enabled=False),
+    )
+    executed = other.exec_task(task_id, command="printf 'ok\\n'", timeout_seconds=30)
+    assert executed["exit_code"] == 0
+    assert executed["stdout"] == "ok\n"
+
+    logs = other.logs_task(task_id)
+    assert logs["count"] == 1
+
+
+def test_task_requires_started_state(tmp_path: Path) -> None:
+    manager = VmManager(
+        backend_name="mock",
+        base_dir=tmp_path / "vms",
+        network_manager=TapNetworkManager(enabled=False),
+    )
+    task_id = str(
+        manager.create_task(
+            environment="debian:12-base",
+            allow_host_compat=True,
+        )["task_id"]
+    )
+    task_dir = tmp_path / "vms" / "tasks" / task_id / "task.json"
+    payload = json.loads(task_dir.read_text(encoding="utf-8"))
+    payload["state"] = "stopped"
+    task_dir.write_text(json.dumps(payload, indent=2, sort_keys=True), encoding="utf-8")
+
+    with pytest.raises(RuntimeError, match="must be in 'started' state"):
+        manager.exec_task(task_id, command="true", timeout_seconds=30)
+
+
 def test_vm_manager_firecracker_backend_path(
    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
 ) -> None:
@ -334,3 +426,193 @@ def test_vm_manager_uses_canonical_default_cache_dir(
    )

    assert manager._environment_store.cache_dir == tmp_path / "cache"  # noqa: SLF001
+
+
+def test_vm_manager_helper_round_trips() -> None:
+    network = NetworkConfig(
+        vm_id="abc123",
+        tap_name="tap0",
+        guest_ip="172.29.1.2",
+        gateway_ip="172.29.1.1",
+        subnet_cidr="172.29.1.0/24",
+        mac_address="06:00:aa:bb:cc:dd",
+        dns_servers=("1.1.1.1", "8.8.8.8"),
+    )
+
+    assert vm_manager_module._optional_int(None) is None  # noqa: SLF001
+    assert vm_manager_module._optional_int(True) == 1  # noqa: SLF001
+    assert vm_manager_module._optional_int(7) == 7  # noqa: SLF001
+    assert vm_manager_module._optional_int(7.2) == 7  # noqa: SLF001
+    assert vm_manager_module._optional_int("9") == 9  # noqa: SLF001
+    with pytest.raises(TypeError, match="integer-compatible"):
+        vm_manager_module._optional_int(object())  # noqa: SLF001
+
+    assert vm_manager_module._optional_str(None) is None  # noqa: SLF001
+    assert vm_manager_module._optional_str(1) == "1"  # noqa: SLF001
+    assert vm_manager_module._optional_dict(None) is None  # noqa: SLF001
+    assert vm_manager_module._optional_dict({"x": 1}) == {"x": 1}  # noqa: SLF001
+    with pytest.raises(TypeError, match="dictionary payload"):
+        vm_manager_module._optional_dict("bad")  # noqa: SLF001
+    assert vm_manager_module._string_dict({"x": 1}) == {"x": "1"}  # noqa: SLF001
+    assert vm_manager_module._string_dict("bad") == {}  # noqa: SLF001
+
+    serialized = vm_manager_module._serialize_network(network)  # noqa: SLF001
+    assert serialized is not None
+    restored = vm_manager_module._deserialize_network(serialized)  # noqa: SLF001
+    assert restored == network
+    assert vm_manager_module._deserialize_network(None) is None  # noqa: SLF001
+    with pytest.raises(TypeError, match="dictionary payload"):
+        vm_manager_module._deserialize_network("bad")  # noqa: SLF001
+
+    assert vm_manager_module._wrap_guest_command("echo hi") == "echo hi"  # noqa: SLF001
+    wrapped = vm_manager_module._wrap_guest_command("echo hi", cwd="/workspace")  # noqa: SLF001
+    assert "cd /workspace" in wrapped
+    assert vm_manager_module._pid_is_running(None) is False  # noqa: SLF001
+
+
+def test_copy_rootfs_falls_back_to_copy2(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    source = tmp_path / "rootfs.ext4"
+    source.write_text("payload", encoding="utf-8")
+    dest = tmp_path / "dest" / "rootfs.ext4"
+
+    def _raise_oserror(*args: Any, **kwargs: Any) -> Any:
+        del args, kwargs
+        raise OSError("no cp")
+
+    monkeypatch.setattr(subprocess, "run", _raise_oserror)
+
+    clone_mode = vm_manager_module._copy_rootfs(source, dest)  # noqa: SLF001
+    assert clone_mode == "copy2"
+    assert dest.read_text(encoding="utf-8") == "payload"
+
+
+def test_task_create_cleans_up_on_start_failure(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    manager = VmManager(
+        backend_name="mock",
+        base_dir=tmp_path / "vms",
+        network_manager=TapNetworkManager(enabled=False),
+    )
+
+    def _boom(instance: Any) -> None:
+        del instance
+        raise RuntimeError("boom")
+
+    monkeypatch.setattr(manager._backend, "start", _boom)  # noqa: SLF001
+
+    with pytest.raises(RuntimeError, match="boom"):
+        manager.create_task(environment="debian:12-base", allow_host_compat=True)
+
+    assert list((tmp_path / "vms" / "tasks").iterdir()) == []
+
+
+def test_exec_instance_wraps_guest_workspace_command(tmp_path: Path) -> None:
+    manager = VmManager(
+        backend_name="mock",
+        base_dir=tmp_path / "vms",
+        network_manager=TapNetworkManager(enabled=False),
+    )
+    manager._runtime_capabilities = RuntimeCapabilities(  # noqa: SLF001
+        supports_vm_boot=True,
+        supports_guest_exec=True,
+        supports_guest_network=False,
+        reason=None,
+    )
+    captured: dict[str, Any] = {}
+
+    class StubBackend:
+        def exec(
+            self,
+            instance: Any,
+            command: str,
+            timeout_seconds: int,
+            *,
+            workdir: Path | None = None,
+        ) -> vm_manager_module.VmExecResult:
+            del instance, timeout_seconds
+            captured["command"] = command
+            captured["workdir"] = workdir
+            return vm_manager_module.VmExecResult(
+                stdout="",
+                stderr="",
+                exit_code=0,
+                duration_ms=1,
+            )
+
+    manager._backend = StubBackend()  # type: ignore[assignment]  # noqa: SLF001
+    instance = vm_manager_module.VmInstance(  # noqa: SLF001
+        vm_id="vm-123",
+        environment="debian:12-base",
+        vcpu_count=1,
+        mem_mib=512,
+        ttl_seconds=600,
+        created_at=time.time(),
+        expires_at=time.time() + 600,
+        workdir=tmp_path / "runtime",
+        state="started",
+    )
+    result, execution_mode = manager._exec_instance(  # noqa: SLF001
+        instance,
+        command="echo hi",
+        timeout_seconds=30,
+        guest_cwd="/workspace",
+    )
+    assert result.exit_code == 0
+    assert execution_mode == "unknown"
+    assert "cd /workspace" in str(captured["command"])
+    assert captured["workdir"] is None
+
+
+def test_status_task_marks_dead_backing_process_stopped(tmp_path: Path) -> None:
+    manager = VmManager(
+        backend_name="mock",
+        base_dir=tmp_path / "vms",
+        network_manager=TapNetworkManager(enabled=False),
+    )
+    task_id = str(
+        manager.create_task(
+            environment="debian:12-base",
+            allow_host_compat=True,
+        )["task_id"]
+    )
+    task_path = tmp_path / "vms" / "tasks" / task_id / "task.json"
+    payload = json.loads(task_path.read_text(encoding="utf-8"))
+    payload["metadata"]["execution_mode"] = "guest_vsock"
+    payload["firecracker_pid"] = 999999
+    task_path.write_text(json.dumps(payload, indent=2, sort_keys=True), encoding="utf-8")
+
+    status = manager.status_task(task_id)
+    assert status["state"] == "stopped"
+    updated_payload = json.loads(task_path.read_text(encoding="utf-8"))
+    assert "backing guest process" in str(updated_payload.get("last_error", ""))
+
+
+def test_reap_expired_tasks_removes_invalid_and_expired_records(tmp_path: Path) -> None:
+    manager = VmManager(
+        backend_name="mock",
+        base_dir=tmp_path / "vms",
+        network_manager=TapNetworkManager(enabled=False),
+    )
+    invalid_dir = tmp_path / "vms" / "tasks" / "invalid"
+    invalid_dir.mkdir(parents=True)
+    (invalid_dir / "task.json").write_text("[]", encoding="utf-8")
+
+    task_id = str(
+        manager.create_task(
+            environment="debian:12-base",
+            allow_host_compat=True,
+        )["task_id"]
+    )
+    task_path = tmp_path / "vms" / "tasks" / task_id / "task.json"
+    payload = json.loads(task_path.read_text(encoding="utf-8"))
+    payload["expires_at"] = 0.0
+    task_path.write_text(json.dumps(payload, indent=2, sort_keys=True), encoding="utf-8")
+
+    with manager._lock:  # noqa: SLF001
+        manager._reap_expired_tasks_locked(time.time())  # noqa: SLF001
+
+    assert not invalid_dir.exists()
+    assert not (tmp_path / "vms" / "tasks" / task_id).exists()