Add workspace service lifecycle with typed readiness

Make persistent workspaces capable of running long-lived background processes instead of forcing everything through one-shot exec calls.

Add workspace service start/list/status/logs/stop across the CLI, Python SDK, and MCP server, with multiple named services per workspace, typed readiness probes (file, tcp, http, and command), and aggregate service counts on workspace status. Keep service state and logs outside /workspace so diff and export semantics stay workspace-scoped, and extend the guest agent plus backends to persist service records and logs across separate calls.

Update the 2.7.0 docs, examples, changelog, and roadmap milestone to reflect the shipped surface.

Validation: uv lock; UV_CACHE_DIR=.uv-cache make check; UV_CACHE_DIR=.uv-cache make dist-check; real guest-backed Firecracker smoke for workspace create, two service starts, list/status/logs, diff unaffected, stop, and delete.
This commit is contained in:
Thales Maciel 2026-03-12 05:36:28 -03:00
parent 84a7e18d4d
commit f504f0a331
28 changed files with 4098 additions and 124 deletions

View file

@ -262,6 +262,105 @@ def test_vsock_exec_client_shell_round_trip(monkeypatch: pytest.MonkeyPatch) ->
assert open_request["shell_id"] == "shell-1"
def test_vsock_exec_client_service_round_trip(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setattr(socket, "AF_VSOCK", 40, raising=False)
responses = [
json.dumps(
{
"service_name": "app",
"command": "echo ok",
"cwd": "/workspace",
"state": "running",
"started_at": 1.0,
"ready_at": 2.0,
"ended_at": None,
"exit_code": None,
"pid": 42,
"readiness": {"type": "file", "path": "/workspace/.ready"},
"stop_reason": None,
}
).encode("utf-8"),
json.dumps(
{
"service_name": "app",
"command": "echo ok",
"cwd": "/workspace",
"state": "running",
"started_at": 1.0,
"ready_at": 2.0,
"ended_at": None,
"exit_code": None,
"pid": 42,
"readiness": {"type": "file", "path": "/workspace/.ready"},
"stop_reason": None,
}
).encode("utf-8"),
json.dumps(
{
"service_name": "app",
"command": "echo ok",
"cwd": "/workspace",
"state": "running",
"started_at": 1.0,
"ready_at": 2.0,
"ended_at": None,
"exit_code": None,
"pid": 42,
"readiness": {"type": "file", "path": "/workspace/.ready"},
"stop_reason": None,
"stdout": "ok\n",
"stderr": "",
"tail_lines": 200,
"truncated": False,
}
).encode("utf-8"),
json.dumps(
{
"service_name": "app",
"command": "echo ok",
"cwd": "/workspace",
"state": "stopped",
"started_at": 1.0,
"ready_at": 2.0,
"ended_at": 3.0,
"exit_code": 0,
"pid": 42,
"readiness": {"type": "file", "path": "/workspace/.ready"},
"stop_reason": "sigterm",
}
).encode("utf-8"),
]
stubs = [StubSocket(response) for response in responses]
remaining = list(stubs)
def socket_factory(family: int, sock_type: int) -> StubSocket:
assert family == socket.AF_VSOCK
assert sock_type == socket.SOCK_STREAM
return remaining.pop(0)
client = VsockExecClient(socket_factory=socket_factory)
started = client.start_service(
1234,
5005,
service_name="app",
command="echo ok",
cwd="/workspace",
readiness={"type": "file", "path": "/workspace/.ready"},
ready_timeout_seconds=30,
ready_interval_ms=500,
)
assert started["service_name"] == "app"
status = client.status_service(1234, 5005, service_name="app")
assert status["state"] == "running"
logs = client.logs_service(1234, 5005, service_name="app", tail_lines=200)
assert logs["stdout"] == "ok\n"
stopped = client.stop_service(1234, 5005, service_name="app")
assert stopped["state"] == "stopped"
start_request = json.loads(stubs[0].sent.decode("utf-8").strip())
assert start_request["action"] == "start_service"
assert start_request["service_name"] == "app"
def test_vsock_exec_client_raises_agent_error(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setattr(socket, "AF_VSOCK", 40, raising=False)
stub = StubSocket(b'{"error":"shell is unavailable"}')