Add persistent workspace shell sessions

Let agents inhabit a workspace across separate calls instead of only submitting one-shot execs.

Add workspace shell open/read/write/signal/close across the CLI, Python SDK, and MCP server, with persisted shell records, a local PTY-backed mock implementation, and guest-agent support for real Firecracker workspaces.

Mark the 2.5.0 roadmap milestone done, refresh docs/examples and the release metadata, and verify with uv lock, UV_CACHE_DIR=.uv-cache make check, and UV_CACHE_DIR=.uv-cache make dist-check.
This commit is contained in:
Thales Maciel 2026-03-12 02:31:57 -03:00
parent 2de31306b6
commit 3f8293ad24
28 changed files with 3265 additions and 81 deletions

View file

@ -1,6 +1,7 @@
from __future__ import annotations
import asyncio
import time
from pathlib import Path
from typing import Any, cast
@ -34,6 +35,11 @@ def test_create_server_registers_vm_tools(tmp_path: Path) -> None:
assert "workspace_create" in tool_names
assert "workspace_logs" in tool_names
assert "workspace_sync_push" in tool_names
assert "shell_open" in tool_names
assert "shell_read" in tool_names
assert "shell_write" in tool_names
assert "shell_signal" in tool_names
assert "shell_close" in tool_names
def test_vm_run_round_trip(tmp_path: Path) -> None:
@ -190,6 +196,11 @@ def test_workspace_tools_round_trip(tmp_path: Path) -> None:
dict[str, Any],
dict[str, Any],
dict[str, Any],
dict[str, Any],
dict[str, Any],
dict[str, Any],
dict[str, Any],
dict[str, Any],
]:
server = create_server(manager=manager)
created = _extract_structured(
@ -225,18 +236,88 @@ def test_workspace_tools_round_trip(tmp_path: Path) -> None:
},
)
)
opened = _extract_structured(
await server.call_tool("shell_open", {"workspace_id": workspace_id})
)
shell_id = str(opened["shell_id"])
written = _extract_structured(
await server.call_tool(
"shell_write",
{
"workspace_id": workspace_id,
"shell_id": shell_id,
"input": "pwd",
},
)
)
read = _extract_structured(
await server.call_tool(
"shell_read",
{
"workspace_id": workspace_id,
"shell_id": shell_id,
},
)
)
deadline = time.time() + 5
while "/workspace" not in str(read["output"]) and time.time() < deadline:
read = _extract_structured(
await server.call_tool(
"shell_read",
{
"workspace_id": workspace_id,
"shell_id": shell_id,
"cursor": 0,
},
)
)
await asyncio.sleep(0.05)
signaled = _extract_structured(
await server.call_tool(
"shell_signal",
{
"workspace_id": workspace_id,
"shell_id": shell_id,
},
)
)
closed = _extract_structured(
await server.call_tool(
"shell_close",
{
"workspace_id": workspace_id,
"shell_id": shell_id,
},
)
)
logs = _extract_structured(
await server.call_tool("workspace_logs", {"workspace_id": workspace_id})
)
deleted = _extract_structured(
await server.call_tool("workspace_delete", {"workspace_id": workspace_id})
)
return created, synced, executed, logs, deleted
return created, synced, executed, opened, written, read, signaled, closed, logs, deleted
created, synced, executed, logs, deleted = asyncio.run(_run())
(
created,
synced,
executed,
opened,
written,
read,
signaled,
closed,
logs,
deleted,
) = asyncio.run(_run())
assert created["state"] == "started"
assert created["workspace_seed"]["mode"] == "directory"
assert synced["workspace_sync"]["destination"] == "/workspace/subdir"
assert executed["stdout"] == "more\n"
assert opened["state"] == "running"
assert written["input_length"] == 3
assert "/workspace" in read["output"]
assert signaled["signal"] == "INT"
assert closed["closed"] is True
assert logs["count"] == 1
assert deleted["deleted"] is True