Add persistent workspace shell sessions

Let agents inhabit a workspace across separate calls instead of only submitting one-shot execs. Add workspace shell open/read/write/signal/close across the CLI, Python SDK, and MCP server, with persisted shell records, a local PTY-backed mock implementation, and guest-agent support for real Firecracker workspaces. Mark the 2.5.0 roadmap milestone done, refresh docs/examples and the release metadata, and verify with uv lock, UV_CACHE_DIR=.uv-cache make check, and UV_CACHE_DIR=.uv-cache make dist-check.
2026-03-12 02:31:57 -03:00 · 2026-03-12 02:31:57 -03:00 · 3f8293ad24
commit 3f8293ad24
parent 2de31306b6
28 changed files with 3265 additions and 81 deletions
--- a/tests/test_server.py
+++ b/tests/test_server.py
@ -1,6 +1,7 @@
 from __future__ import annotations

 import asyncio
+import time
 from pathlib import Path
 from typing import Any, cast

@ -34,6 +35,11 @@ def test_create_server_registers_vm_tools(tmp_path: Path) -> None:
    assert "workspace_create" in tool_names
    assert "workspace_logs" in tool_names
    assert "workspace_sync_push" in tool_names
+    assert "shell_open" in tool_names
+    assert "shell_read" in tool_names
+    assert "shell_write" in tool_names
+    assert "shell_signal" in tool_names
+    assert "shell_close" in tool_names


 def test_vm_run_round_trip(tmp_path: Path) -> None:
@ -190,6 +196,11 @@ def test_workspace_tools_round_trip(tmp_path: Path) -> None:
        dict[str, Any],
        dict[str, Any],
        dict[str, Any],
+        dict[str, Any],
+        dict[str, Any],
+        dict[str, Any],
+        dict[str, Any],
+        dict[str, Any],
    ]:
        server = create_server(manager=manager)
        created = _extract_structured(
@ -225,18 +236,88 @@ def test_workspace_tools_round_trip(tmp_path: Path) -> None:
                },
            )
        )
+        opened = _extract_structured(
+            await server.call_tool("shell_open", {"workspace_id": workspace_id})
+        )
+        shell_id = str(opened["shell_id"])
+        written = _extract_structured(
+            await server.call_tool(
+                "shell_write",
+                {
+                    "workspace_id": workspace_id,
+                    "shell_id": shell_id,
+                    "input": "pwd",
+                },
+            )
+        )
+        read = _extract_structured(
+            await server.call_tool(
+                "shell_read",
+                {
+                    "workspace_id": workspace_id,
+                    "shell_id": shell_id,
+                },
+            )
+        )
+        deadline = time.time() + 5
+        while "/workspace" not in str(read["output"]) and time.time() < deadline:
+            read = _extract_structured(
+                await server.call_tool(
+                    "shell_read",
+                    {
+                        "workspace_id": workspace_id,
+                        "shell_id": shell_id,
+                        "cursor": 0,
+                    },
+                )
+            )
+            await asyncio.sleep(0.05)
+        signaled = _extract_structured(
+            await server.call_tool(
+                "shell_signal",
+                {
+                    "workspace_id": workspace_id,
+                    "shell_id": shell_id,
+                },
+            )
+        )
+        closed = _extract_structured(
+            await server.call_tool(
+                "shell_close",
+                {
+                    "workspace_id": workspace_id,
+                    "shell_id": shell_id,
+                },
+            )
+        )
        logs = _extract_structured(
            await server.call_tool("workspace_logs", {"workspace_id": workspace_id})
        )
        deleted = _extract_structured(
            await server.call_tool("workspace_delete", {"workspace_id": workspace_id})
        )
-        return created, synced, executed, logs, deleted
+        return created, synced, executed, opened, written, read, signaled, closed, logs, deleted

-    created, synced, executed, logs, deleted = asyncio.run(_run())
+    (
+        created,
+        synced,
+        executed,
+        opened,
+        written,
+        read,
+        signaled,
+        closed,
+        logs,
+        deleted,
+    ) = asyncio.run(_run())
    assert created["state"] == "started"
    assert created["workspace_seed"]["mode"] == "directory"
    assert synced["workspace_sync"]["destination"] == "/workspace/subdir"
    assert executed["stdout"] == "more\n"
+    assert opened["state"] == "running"
+    assert written["input_length"] == 3
+    assert "/workspace" in read["output"]
+    assert signaled["signal"] == "INT"
+    assert closed["closed"] is True
    assert logs["count"] == 1
    assert deleted["deleted"] is True