Add workspace service lifecycle with typed readiness

Make persistent workspaces capable of running long-lived background processes instead of forcing everything through one-shot exec calls.

Add workspace service start/list/status/logs/stop across the CLI, Python SDK, and MCP server, with multiple named services per workspace, typed readiness probes (file, tcp, http, and command), and aggregate service counts on workspace status. Keep service state and logs outside /workspace so diff and export semantics stay workspace-scoped, and extend the guest agent plus backends to persist service records and logs across separate calls.

Update the 2.7.0 docs, examples, changelog, and roadmap milestone to reflect the shipped surface.

Validation: uv lock; UV_CACHE_DIR=.uv-cache make check; UV_CACHE_DIR=.uv-cache make dist-check; real guest-backed Firecracker smoke for workspace create, two service starts, list/status/logs, diff unaffected, stop, and delete.
This commit is contained in:
Thales Maciel 2026-03-12 05:36:28 -03:00
parent 84a7e18d4d
commit f504f0a331
28 changed files with 4098 additions and 124 deletions

View file

@ -207,6 +207,50 @@ class Pyro:
def close_shell(self, workspace_id: str, shell_id: str) -> dict[str, Any]:
return self._manager.close_shell(workspace_id, shell_id)
def start_service(
self,
workspace_id: str,
service_name: str,
*,
command: str,
cwd: str = "/workspace",
readiness: dict[str, Any] | None = None,
ready_timeout_seconds: int = 30,
ready_interval_ms: int = 500,
) -> dict[str, Any]:
return self._manager.start_service(
workspace_id,
service_name,
command=command,
cwd=cwd,
readiness=readiness,
ready_timeout_seconds=ready_timeout_seconds,
ready_interval_ms=ready_interval_ms,
)
def list_services(self, workspace_id: str) -> dict[str, Any]:
return self._manager.list_services(workspace_id)
def status_service(self, workspace_id: str, service_name: str) -> dict[str, Any]:
return self._manager.status_service(workspace_id, service_name)
def logs_service(
self,
workspace_id: str,
service_name: str,
*,
tail_lines: int = 200,
all: bool = False,
) -> dict[str, Any]:
return self._manager.logs_service(
workspace_id,
service_name,
tail_lines=None if all else tail_lines,
)
def stop_service(self, workspace_id: str, service_name: str) -> dict[str, Any]:
return self._manager.stop_service(workspace_id, service_name)
def delete_workspace(self, workspace_id: str) -> dict[str, Any]:
return self._manager.delete_workspace(workspace_id)
@ -458,6 +502,69 @@ class Pyro:
"""Close a persistent workspace shell."""
return self.close_shell(workspace_id, shell_id)
@server.tool()
async def service_start(
workspace_id: str,
service_name: str,
command: str,
cwd: str = "/workspace",
ready_file: str | None = None,
ready_tcp: str | None = None,
ready_http: str | None = None,
ready_command: str | None = None,
ready_timeout_seconds: int = 30,
ready_interval_ms: int = 500,
) -> dict[str, Any]:
"""Start a named long-running service inside a workspace."""
readiness: dict[str, Any] | None = None
if ready_file is not None:
readiness = {"type": "file", "path": ready_file}
elif ready_tcp is not None:
readiness = {"type": "tcp", "address": ready_tcp}
elif ready_http is not None:
readiness = {"type": "http", "url": ready_http}
elif ready_command is not None:
readiness = {"type": "command", "command": ready_command}
return self.start_service(
workspace_id,
service_name,
command=command,
cwd=cwd,
readiness=readiness,
ready_timeout_seconds=ready_timeout_seconds,
ready_interval_ms=ready_interval_ms,
)
@server.tool()
async def service_list(workspace_id: str) -> dict[str, Any]:
"""List named services in one workspace."""
return self.list_services(workspace_id)
@server.tool()
async def service_status(workspace_id: str, service_name: str) -> dict[str, Any]:
"""Inspect one named workspace service."""
return self.status_service(workspace_id, service_name)
@server.tool()
async def service_logs(
workspace_id: str,
service_name: str,
tail_lines: int = 200,
all: bool = False,
) -> dict[str, Any]:
"""Read persisted stdout/stderr for one workspace service."""
return self.logs_service(
workspace_id,
service_name,
tail_lines=tail_lines,
all=all,
)
@server.tool()
async def service_stop(workspace_id: str, service_name: str) -> dict[str, Any]:
"""Stop one running service in a workspace."""
return self.stop_service(workspace_id, service_name)
@server.tool()
async def workspace_delete(workspace_id: str) -> dict[str, Any]:
"""Delete a persistent workspace and its backing sandbox."""

View file

@ -17,6 +17,9 @@ from pyro_mcp.runtime import DEFAULT_PLATFORM, doctor_report
from pyro_mcp.vm_environments import DEFAULT_CATALOG_VERSION
from pyro_mcp.vm_manager import (
DEFAULT_MEM_MIB,
DEFAULT_SERVICE_LOG_TAIL_LINES,
DEFAULT_SERVICE_READY_INTERVAL_MS,
DEFAULT_SERVICE_READY_TIMEOUT_SECONDS,
DEFAULT_VCPU_COUNT,
WORKSPACE_GUEST_PATH,
WORKSPACE_SHELL_SIGNAL_NAMES,
@ -171,6 +174,11 @@ def _print_workspace_summary_human(payload: dict[str, Any], *, action: str) -> N
f"{int(payload.get('mem_mib', 0))} MiB"
)
print(f"Command count: {int(payload.get('command_count', 0))}")
print(
"Services: "
f"{int(payload.get('running_service_count', 0))}/"
f"{int(payload.get('service_count', 0))} running"
)
last_command = payload.get("last_command")
if isinstance(last_command, dict):
print(
@ -304,6 +312,42 @@ def _print_workspace_shell_read_human(payload: dict[str, Any]) -> None:
)
def _print_workspace_service_summary_human(payload: dict[str, Any], *, prefix: str) -> None:
print(
f"[{prefix}] "
f"workspace_id={str(payload.get('workspace_id', 'unknown'))} "
f"service_name={str(payload.get('service_name', 'unknown'))} "
f"state={str(payload.get('state', 'unknown'))} "
f"cwd={str(payload.get('cwd', WORKSPACE_GUEST_PATH))} "
f"execution_mode={str(payload.get('execution_mode', 'unknown'))}",
file=sys.stderr,
flush=True,
)
def _print_workspace_service_list_human(payload: dict[str, Any]) -> None:
services = payload.get("services")
if not isinstance(services, list) or not services:
print("No workspace services found.")
return
for service in services:
if not isinstance(service, dict):
continue
print(
f"{str(service.get('service_name', 'unknown'))} "
f"[{str(service.get('state', 'unknown'))}] "
f"cwd={str(service.get('cwd', WORKSPACE_GUEST_PATH))}"
)
def _print_workspace_service_logs_human(payload: dict[str, Any]) -> None:
stdout = str(payload.get("stdout", ""))
stderr = str(payload.get("stderr", ""))
_write_stream(stdout, stream=sys.stdout)
_write_stream(stderr, stream=sys.stderr)
_print_workspace_service_summary_human(payload, prefix="workspace-service-logs")
class _HelpFormatter(
argparse.RawDescriptionHelpFormatter,
argparse.ArgumentDefaultsHelpFormatter,
@ -339,6 +383,8 @@ def _build_parser() -> argparse.ArgumentParser:
pyro workspace diff WORKSPACE_ID
pyro workspace export WORKSPACE_ID note.txt --output ./note.txt
pyro workspace shell open WORKSPACE_ID
pyro workspace service start WORKSPACE_ID app --ready-file .ready -- \
sh -lc 'touch .ready && while true; do sleep 60; done'
Use `pyro mcp serve` only after the CLI validation path works.
"""
@ -549,6 +595,8 @@ def _build_parser() -> argparse.ArgumentParser:
pyro workspace diff WORKSPACE_ID
pyro workspace export WORKSPACE_ID src/note.txt --output ./note.txt
pyro workspace shell open WORKSPACE_ID
pyro workspace service start WORKSPACE_ID app --ready-file .ready -- \
sh -lc 'touch .ready && while true; do sleep 60; done'
pyro workspace logs WORKSPACE_ID
"""
),
@ -570,6 +618,8 @@ def _build_parser() -> argparse.ArgumentParser:
pyro workspace create debian:12 --seed-path ./repo
pyro workspace sync push WORKSPACE_ID ./changes
pyro workspace diff WORKSPACE_ID
pyro workspace service start WORKSPACE_ID app --ready-file .ready -- \
sh -lc 'touch .ready && while true; do sleep 60; done'
"""
),
formatter_class=_HelpFormatter,
@ -943,6 +993,160 @@ def _build_parser() -> argparse.ArgumentParser:
action="store_true",
help="Print structured JSON instead of human-readable output.",
)
workspace_service_parser = workspace_subparsers.add_parser(
"service",
help="Manage long-running services inside a workspace.",
description=(
"Start, inspect, and stop named long-running services inside one started workspace."
),
epilog=dedent(
"""
Examples:
pyro workspace service start WORKSPACE_ID app --ready-file .ready -- \
sh -lc 'touch .ready && while true; do sleep 60; done'
pyro workspace service list WORKSPACE_ID
pyro workspace service status WORKSPACE_ID app
pyro workspace service logs WORKSPACE_ID app --tail-lines 50
pyro workspace service stop WORKSPACE_ID app
Use `--ready-file` by default in the curated Debian environments. `--ready-command`
remains available as an escape hatch.
"""
),
formatter_class=_HelpFormatter,
)
workspace_service_subparsers = workspace_service_parser.add_subparsers(
dest="workspace_service_command",
required=True,
metavar="SERVICE",
)
workspace_service_start_parser = workspace_service_subparsers.add_parser(
"start",
help="Start one named long-running service.",
description="Start a named service inside a started workspace with optional readiness.",
epilog=dedent(
"""
Examples:
pyro workspace service start WORKSPACE_ID app --ready-file .ready -- \
sh -lc 'touch .ready && while true; do sleep 60; done'
pyro workspace service start WORKSPACE_ID app --ready-command 'test -f .ready' -- \
sh -lc 'touch .ready && while true; do sleep 60; done'
"""
),
formatter_class=_HelpFormatter,
)
workspace_service_start_parser.add_argument("workspace_id", metavar="WORKSPACE_ID")
workspace_service_start_parser.add_argument("service_name", metavar="SERVICE_NAME")
workspace_service_start_parser.add_argument(
"--cwd",
default=WORKSPACE_GUEST_PATH,
help="Service working directory. Relative values resolve inside `/workspace`.",
)
workspace_service_start_parser.add_argument(
"--ready-file",
help="Mark the service ready once this workspace path exists.",
)
workspace_service_start_parser.add_argument(
"--ready-tcp",
help="Mark the service ready once this HOST:PORT accepts guest-local TCP connections.",
)
workspace_service_start_parser.add_argument(
"--ready-http",
help="Mark the service ready once this guest-local URL returns 2xx or 3xx.",
)
workspace_service_start_parser.add_argument(
"--ready-command",
help="Escape hatch readiness probe command. Use typed readiness when possible.",
)
workspace_service_start_parser.add_argument(
"--ready-timeout-seconds",
type=int,
default=DEFAULT_SERVICE_READY_TIMEOUT_SECONDS,
help="Maximum time to wait for readiness before failing the service start.",
)
workspace_service_start_parser.add_argument(
"--ready-interval-ms",
type=int,
default=DEFAULT_SERVICE_READY_INTERVAL_MS,
help="Polling interval between readiness checks.",
)
workspace_service_start_parser.add_argument(
"--json",
action="store_true",
help="Print structured JSON instead of human-readable output.",
)
workspace_service_start_parser.add_argument(
"command_args",
nargs="*",
metavar="ARG",
help="Service command and arguments. Prefix them with `--`.",
)
workspace_service_list_parser = workspace_service_subparsers.add_parser(
"list",
help="List named services in one workspace.",
description="List named services and their current states for one workspace.",
epilog="Example:\n pyro workspace service list WORKSPACE_ID",
formatter_class=_HelpFormatter,
)
workspace_service_list_parser.add_argument("workspace_id", metavar="WORKSPACE_ID")
workspace_service_list_parser.add_argument(
"--json",
action="store_true",
help="Print structured JSON instead of human-readable output.",
)
workspace_service_status_parser = workspace_service_subparsers.add_parser(
"status",
help="Inspect one service.",
description="Show state and readiness metadata for one named workspace service.",
epilog="Example:\n pyro workspace service status WORKSPACE_ID app",
formatter_class=_HelpFormatter,
)
workspace_service_status_parser.add_argument("workspace_id", metavar="WORKSPACE_ID")
workspace_service_status_parser.add_argument("service_name", metavar="SERVICE_NAME")
workspace_service_status_parser.add_argument(
"--json",
action="store_true",
help="Print structured JSON instead of human-readable output.",
)
workspace_service_logs_parser = workspace_service_subparsers.add_parser(
"logs",
help="Read persisted service stdout and stderr.",
description="Read service stdout and stderr without using `workspace logs`.",
epilog="Example:\n pyro workspace service logs WORKSPACE_ID app --tail-lines 50",
formatter_class=_HelpFormatter,
)
workspace_service_logs_parser.add_argument("workspace_id", metavar="WORKSPACE_ID")
workspace_service_logs_parser.add_argument("service_name", metavar="SERVICE_NAME")
workspace_service_logs_parser.add_argument(
"--tail-lines",
type=int,
default=DEFAULT_SERVICE_LOG_TAIL_LINES,
help="Maximum number of trailing lines to return from each service log stream.",
)
workspace_service_logs_parser.add_argument(
"--all",
action="store_true",
help="Return full stdout and stderr instead of tailing them.",
)
workspace_service_logs_parser.add_argument(
"--json",
action="store_true",
help="Print structured JSON instead of human-readable output.",
)
workspace_service_stop_parser = workspace_service_subparsers.add_parser(
"stop",
help="Stop one running service.",
description="Stop one named workspace service with TERM then KILL fallback.",
epilog="Example:\n pyro workspace service stop WORKSPACE_ID app",
formatter_class=_HelpFormatter,
)
workspace_service_stop_parser.add_argument("workspace_id", metavar="WORKSPACE_ID")
workspace_service_stop_parser.add_argument("service_name", metavar="SERVICE_NAME")
workspace_service_stop_parser.add_argument(
"--json",
action="store_true",
help="Print structured JSON instead of human-readable output.",
)
workspace_status_parser = workspace_subparsers.add_parser(
"status",
help="Inspect one workspace.",
@ -1372,6 +1576,128 @@ def main() -> None:
else:
_print_workspace_shell_summary_human(payload, prefix="workspace-shell-close")
return
if args.workspace_command == "service":
if args.workspace_service_command == "start":
readiness_count = sum(
value is not None
for value in (
args.ready_file,
args.ready_tcp,
args.ready_http,
args.ready_command,
)
)
if readiness_count > 1:
error = (
"choose at most one of --ready-file, --ready-tcp, "
"--ready-http, or --ready-command"
)
if bool(args.json):
_print_json({"ok": False, "error": error})
else:
print(f"[error] {error}", file=sys.stderr, flush=True)
raise SystemExit(1)
readiness: dict[str, Any] | None = None
if args.ready_file is not None:
readiness = {"type": "file", "path": args.ready_file}
elif args.ready_tcp is not None:
readiness = {"type": "tcp", "address": args.ready_tcp}
elif args.ready_http is not None:
readiness = {"type": "http", "url": args.ready_http}
elif args.ready_command is not None:
readiness = {"type": "command", "command": args.ready_command}
command = _require_command(args.command_args)
try:
payload = pyro.start_service(
args.workspace_id,
args.service_name,
command=command,
cwd=args.cwd,
readiness=readiness,
ready_timeout_seconds=args.ready_timeout_seconds,
ready_interval_ms=args.ready_interval_ms,
)
except Exception as exc: # noqa: BLE001
if bool(args.json):
_print_json({"ok": False, "error": str(exc)})
else:
print(f"[error] {exc}", file=sys.stderr, flush=True)
raise SystemExit(1) from exc
if bool(args.json):
_print_json(payload)
else:
_print_workspace_service_summary_human(
payload,
prefix="workspace-service-start",
)
return
if args.workspace_service_command == "list":
try:
payload = pyro.list_services(args.workspace_id)
except Exception as exc: # noqa: BLE001
if bool(args.json):
_print_json({"ok": False, "error": str(exc)})
else:
print(f"[error] {exc}", file=sys.stderr, flush=True)
raise SystemExit(1) from exc
if bool(args.json):
_print_json(payload)
else:
_print_workspace_service_list_human(payload)
return
if args.workspace_service_command == "status":
try:
payload = pyro.status_service(args.workspace_id, args.service_name)
except Exception as exc: # noqa: BLE001
if bool(args.json):
_print_json({"ok": False, "error": str(exc)})
else:
print(f"[error] {exc}", file=sys.stderr, flush=True)
raise SystemExit(1) from exc
if bool(args.json):
_print_json(payload)
else:
_print_workspace_service_summary_human(
payload,
prefix="workspace-service-status",
)
return
if args.workspace_service_command == "logs":
try:
payload = pyro.logs_service(
args.workspace_id,
args.service_name,
tail_lines=args.tail_lines,
all=bool(args.all),
)
except Exception as exc: # noqa: BLE001
if bool(args.json):
_print_json({"ok": False, "error": str(exc)})
else:
print(f"[error] {exc}", file=sys.stderr, flush=True)
raise SystemExit(1) from exc
if bool(args.json):
_print_json(payload)
else:
_print_workspace_service_logs_human(payload)
return
if args.workspace_service_command == "stop":
try:
payload = pyro.stop_service(args.workspace_id, args.service_name)
except Exception as exc: # noqa: BLE001
if bool(args.json):
_print_json({"ok": False, "error": str(exc)})
else:
print(f"[error] {exc}", file=sys.stderr, flush=True)
raise SystemExit(1) from exc
if bool(args.json):
_print_json(payload)
else:
_print_workspace_service_summary_human(
payload,
prefix="workspace-service-stop",
)
return
if args.workspace_command == "status":
payload = pyro.status_workspace(args.workspace_id)
if bool(args.json):

View file

@ -12,10 +12,12 @@ PUBLIC_CLI_WORKSPACE_SUBCOMMANDS = (
"exec",
"export",
"logs",
"service",
"shell",
"status",
"sync",
)
PUBLIC_CLI_WORKSPACE_SERVICE_SUBCOMMANDS = ("list", "logs", "start", "status", "stop")
PUBLIC_CLI_WORKSPACE_SHELL_SUBCOMMANDS = ("close", "open", "read", "signal", "write")
PUBLIC_CLI_WORKSPACE_SYNC_SUBCOMMANDS = ("push",)
PUBLIC_CLI_WORKSPACE_CREATE_FLAGS = (
@ -29,6 +31,20 @@ PUBLIC_CLI_WORKSPACE_CREATE_FLAGS = (
)
PUBLIC_CLI_WORKSPACE_DIFF_FLAGS = ("--json",)
PUBLIC_CLI_WORKSPACE_EXPORT_FLAGS = ("--output", "--json")
PUBLIC_CLI_WORKSPACE_SERVICE_LIST_FLAGS = ("--json",)
PUBLIC_CLI_WORKSPACE_SERVICE_LOGS_FLAGS = ("--tail-lines", "--all", "--json")
PUBLIC_CLI_WORKSPACE_SERVICE_START_FLAGS = (
"--cwd",
"--ready-file",
"--ready-tcp",
"--ready-http",
"--ready-command",
"--ready-timeout-seconds",
"--ready-interval-ms",
"--json",
)
PUBLIC_CLI_WORKSPACE_SERVICE_STATUS_FLAGS = ("--json",)
PUBLIC_CLI_WORKSPACE_SERVICE_STOP_FLAGS = ("--json",)
PUBLIC_CLI_WORKSPACE_SHELL_OPEN_FLAGS = ("--cwd", "--cols", "--rows", "--json")
PUBLIC_CLI_WORKSPACE_SHELL_READ_FLAGS = ("--cursor", "--max-chars", "--json")
PUBLIC_CLI_WORKSPACE_SHELL_WRITE_FLAGS = ("--input", "--no-newline", "--json")
@ -58,6 +74,8 @@ PUBLIC_SDK_METHODS = (
"export_workspace",
"inspect_environment",
"list_environments",
"list_services",
"logs_service",
"logs_workspace",
"network_info_vm",
"open_shell",
@ -68,14 +86,22 @@ PUBLIC_SDK_METHODS = (
"reap_expired",
"run_in_vm",
"signal_shell",
"start_service",
"start_vm",
"status_service",
"status_vm",
"status_workspace",
"stop_service",
"stop_vm",
"write_shell",
)
PUBLIC_MCP_TOOLS = (
"service_list",
"service_logs",
"service_start",
"service_status",
"service_stop",
"shell_close",
"shell_open",
"shell_read",

View file

@ -8,7 +8,8 @@ import fcntl
import io
import json
import os
import pty
import re
import shlex
import signal
import socket
import struct
@ -18,6 +19,8 @@ import tempfile
import termios
import threading
import time
import urllib.error
import urllib.request
from pathlib import Path, PurePosixPath
from typing import Any
@ -25,6 +28,8 @@ PORT = 5005
BUFFER_SIZE = 65536
WORKSPACE_ROOT = PurePosixPath("/workspace")
SHELL_ROOT = Path("/run/pyro-shells")
SERVICE_ROOT = Path("/run/pyro-services")
SERVICE_NAME_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]{0,63}$")
SHELL_SIGNAL_MAP = {
"HUP": signal.SIGHUP,
"INT": signal.SIGINT,
@ -105,6 +110,35 @@ def _normalize_shell_cwd(cwd: str) -> tuple[str, Path]:
return str(normalized), host_path
def _normalize_service_name(service_name: str) -> str:
normalized = service_name.strip()
if normalized == "":
raise RuntimeError("service_name is required")
if SERVICE_NAME_RE.fullmatch(normalized) is None:
raise RuntimeError("service_name is invalid")
return normalized
def _service_stdout_path(service_name: str) -> Path:
return SERVICE_ROOT / f"{service_name}.stdout"
def _service_stderr_path(service_name: str) -> Path:
return SERVICE_ROOT / f"{service_name}.stderr"
def _service_status_path(service_name: str) -> Path:
return SERVICE_ROOT / f"{service_name}.status"
def _service_runner_path(service_name: str) -> Path:
return SERVICE_ROOT / f"{service_name}.runner.sh"
def _service_metadata_path(service_name: str) -> Path:
return SERVICE_ROOT / f"{service_name}.json"
def _validate_symlink_target(member_path: PurePosixPath, link_target: str) -> None:
target = link_target.strip()
if target == "":
@ -286,7 +320,7 @@ class GuestShellSession:
self._log_path = SHELL_ROOT / f"{shell_id}.log"
self._master_fd: int | None = None
master_fd, slave_fd = pty.openpty()
master_fd, slave_fd = os.openpty()
try:
_set_pty_size(slave_fd, rows, cols)
env = os.environ.copy()
@ -512,6 +546,268 @@ def _remove_shell(shell_id: str) -> GuestShellSession:
raise RuntimeError(f"shell {shell_id!r} does not exist") from exc
def _read_service_metadata(service_name: str) -> dict[str, Any]:
metadata_path = _service_metadata_path(service_name)
if not metadata_path.exists():
raise RuntimeError(f"service {service_name!r} does not exist")
payload = json.loads(metadata_path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
raise RuntimeError(f"service record for {service_name!r} is invalid")
return payload
def _write_service_metadata(service_name: str, payload: dict[str, Any]) -> None:
_service_metadata_path(service_name).write_text(
json.dumps(payload, indent=2, sort_keys=True),
encoding="utf-8",
)
def _service_exit_code(service_name: str) -> int | None:
status_path = _service_status_path(service_name)
if not status_path.exists():
return None
raw_value = status_path.read_text(encoding="utf-8", errors="ignore").strip()
if raw_value == "":
return None
return int(raw_value)
def _service_pid_running(pid: int | None) -> bool:
if pid is None:
return False
try:
os.kill(pid, 0)
except ProcessLookupError:
return False
except PermissionError:
return True
return True
def _tail_service_text(path: Path, *, tail_lines: int | None) -> tuple[str, bool]:
if not path.exists():
return "", False
text = path.read_text(encoding="utf-8", errors="replace")
if tail_lines is None:
return text, False
lines = text.splitlines(keepends=True)
if len(lines) <= tail_lines:
return text, False
return "".join(lines[-tail_lines:]), True
def _stop_service_process(pid: int) -> tuple[bool, bool]:
try:
os.killpg(pid, signal.SIGTERM)
except ProcessLookupError:
return False, False
deadline = time.monotonic() + 5
while time.monotonic() < deadline:
if not _service_pid_running(pid):
return True, False
time.sleep(0.1)
try:
os.killpg(pid, signal.SIGKILL)
except ProcessLookupError:
return True, False
deadline = time.monotonic() + 5
while time.monotonic() < deadline:
if not _service_pid_running(pid):
return True, True
time.sleep(0.1)
return True, True
def _refresh_service_payload(service_name: str, payload: dict[str, Any]) -> dict[str, Any]:
if str(payload.get("state", "stopped")) != "running":
return payload
pid = payload.get("pid")
normalized_pid = None if pid is None else int(pid)
if _service_pid_running(normalized_pid):
return payload
refreshed = dict(payload)
refreshed["state"] = "exited"
refreshed["ended_at"] = refreshed.get("ended_at") or time.time()
refreshed["exit_code"] = _service_exit_code(service_name)
_write_service_metadata(service_name, refreshed)
return refreshed
def _run_readiness_probe(readiness: dict[str, Any] | None, *, cwd: Path) -> bool:
if readiness is None:
return True
readiness_type = str(readiness["type"])
if readiness_type == "file":
_, ready_path = _normalize_destination(str(readiness["path"]))
return ready_path.exists()
if readiness_type == "tcp":
host, raw_port = str(readiness["address"]).rsplit(":", 1)
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
sock.settimeout(1)
try:
sock.connect((host, int(raw_port)))
except OSError:
return False
return True
if readiness_type == "http":
request = urllib.request.Request(str(readiness["url"]), method="GET")
try:
with urllib.request.urlopen(request, timeout=2) as response: # noqa: S310
return 200 <= int(response.status) < 400
except (urllib.error.URLError, TimeoutError, ValueError):
return False
if readiness_type == "command":
proc = subprocess.run( # noqa: S603
["/bin/sh", "-lc", str(readiness["command"])],
cwd=str(cwd),
text=True,
capture_output=True,
timeout=10,
check=False,
)
return proc.returncode == 0
raise RuntimeError(f"unsupported readiness type: {readiness_type}")
def _start_service(
*,
service_name: str,
command: str,
cwd_text: str,
readiness: dict[str, Any] | None,
ready_timeout_seconds: int,
ready_interval_ms: int,
) -> dict[str, Any]:
normalized_service_name = _normalize_service_name(service_name)
normalized_cwd, cwd_path = _normalize_shell_cwd(cwd_text)
existing = None
metadata_path = _service_metadata_path(normalized_service_name)
if metadata_path.exists():
existing = _refresh_service_payload(
normalized_service_name,
_read_service_metadata(normalized_service_name),
)
if existing is not None and str(existing.get("state", "stopped")) == "running":
raise RuntimeError(f"service {normalized_service_name!r} is already running")
SERVICE_ROOT.mkdir(parents=True, exist_ok=True)
stdout_path = _service_stdout_path(normalized_service_name)
stderr_path = _service_stderr_path(normalized_service_name)
status_path = _service_status_path(normalized_service_name)
runner_path = _service_runner_path(normalized_service_name)
stdout_path.write_text("", encoding="utf-8")
stderr_path.write_text("", encoding="utf-8")
status_path.unlink(missing_ok=True)
runner_path.write_text(
"\n".join(
[
"#!/bin/sh",
"set +e",
f"cd {shlex.quote(str(cwd_path))}",
(
f"/bin/sh -lc {shlex.quote(command)}"
f" >> {shlex.quote(str(stdout_path))}"
f" 2>> {shlex.quote(str(stderr_path))}"
),
"status=$?",
f"printf '%s' \"$status\" > {shlex.quote(str(status_path))}",
"exit \"$status\"",
]
)
+ "\n",
encoding="utf-8",
)
runner_path.chmod(0o700)
process = subprocess.Popen( # noqa: S603
[str(runner_path)],
cwd=str(cwd_path),
text=True,
start_new_session=True,
)
payload: dict[str, Any] = {
"service_name": normalized_service_name,
"command": command,
"cwd": normalized_cwd,
"state": "running",
"started_at": time.time(),
"readiness": readiness,
"ready_at": None,
"ended_at": None,
"exit_code": None,
"pid": process.pid,
"stop_reason": None,
}
_write_service_metadata(normalized_service_name, payload)
deadline = time.monotonic() + ready_timeout_seconds
while True:
payload = _refresh_service_payload(normalized_service_name, payload)
if str(payload.get("state", "stopped")) != "running":
payload["state"] = "failed"
payload["stop_reason"] = "process_exited_before_ready"
payload["ended_at"] = payload.get("ended_at") or time.time()
_write_service_metadata(normalized_service_name, payload)
return payload
if _run_readiness_probe(readiness, cwd=cwd_path):
payload["ready_at"] = time.time()
_write_service_metadata(normalized_service_name, payload)
return payload
if time.monotonic() >= deadline:
_stop_service_process(process.pid)
payload = _refresh_service_payload(normalized_service_name, payload)
payload["state"] = "failed"
payload["stop_reason"] = "readiness_timeout"
payload["ended_at"] = payload.get("ended_at") or time.time()
_write_service_metadata(normalized_service_name, payload)
return payload
time.sleep(max(ready_interval_ms, 1) / 1000)
def _status_service(service_name: str) -> dict[str, Any]:
normalized_service_name = _normalize_service_name(service_name)
return _refresh_service_payload(
normalized_service_name,
_read_service_metadata(normalized_service_name),
)
def _logs_service(service_name: str, *, tail_lines: int | None) -> dict[str, Any]:
normalized_service_name = _normalize_service_name(service_name)
payload = _status_service(normalized_service_name)
stdout, stdout_truncated = _tail_service_text(
_service_stdout_path(normalized_service_name),
tail_lines=tail_lines,
)
stderr, stderr_truncated = _tail_service_text(
_service_stderr_path(normalized_service_name),
tail_lines=tail_lines,
)
payload.update(
{
"stdout": stdout,
"stderr": stderr,
"tail_lines": tail_lines,
"truncated": stdout_truncated or stderr_truncated,
}
)
return payload
def _stop_service(service_name: str) -> dict[str, Any]:
normalized_service_name = _normalize_service_name(service_name)
payload = _status_service(normalized_service_name)
pid = payload.get("pid")
if pid is None:
return payload
if str(payload.get("state", "stopped")) == "running":
_, killed = _stop_service_process(int(pid))
payload = _status_service(normalized_service_name)
payload["state"] = "stopped"
payload["stop_reason"] = "sigkill" if killed else "sigterm"
payload["ended_at"] = payload.get("ended_at") or time.time()
_write_service_metadata(normalized_service_name, payload)
return payload
def _dispatch(request: dict[str, Any], conn: socket.socket) -> dict[str, Any]:
action = str(request.get("action", "exec"))
if action == "extract_archive":
@ -564,6 +860,31 @@ def _dispatch(request: dict[str, Any], conn: socket.socket) -> dict[str, Any]:
if shell_id == "":
raise RuntimeError("shell_id is required")
return _remove_shell(shell_id).close()
if action == "start_service":
service_name = str(request.get("service_name", "")).strip()
command = str(request.get("command", ""))
cwd_text = str(request.get("cwd", "/workspace"))
readiness = request.get("readiness")
readiness_payload = dict(readiness) if isinstance(readiness, dict) else None
return _start_service(
service_name=service_name,
command=command,
cwd_text=cwd_text,
readiness=readiness_payload,
ready_timeout_seconds=int(request.get("ready_timeout_seconds", 30)),
ready_interval_ms=int(request.get("ready_interval_ms", 500)),
)
if action == "status_service":
service_name = str(request.get("service_name", "")).strip()
return _status_service(service_name)
if action == "logs_service":
service_name = str(request.get("service_name", "")).strip()
tail_lines = request.get("tail_lines")
normalized_tail_lines = None if tail_lines is None else int(tail_lines)
return _logs_service(service_name, tail_lines=normalized_tail_lines)
if action == "stop_service":
service_name = str(request.get("service_name", "")).strip()
return _stop_service(service_name)
command = str(request.get("command", ""))
timeout_seconds = int(request.get("timeout_seconds", 30))
return _run_command(command, timeout_seconds)
@ -571,6 +892,7 @@ def _dispatch(request: dict[str, Any], conn: socket.socket) -> dict[str, Any]:
def main() -> None:
SHELL_ROOT.mkdir(parents=True, exist_ok=True)
SERVICE_ROOT.mkdir(parents=True, exist_ok=True)
family = getattr(socket, "AF_VSOCK", None)
if family is None:
raise SystemExit("AF_VSOCK is unavailable")

View file

@ -25,7 +25,7 @@
"guest": {
"agent": {
"path": "guest/pyro_guest_agent.py",
"sha256": "4118589ccd8f4ac8200d9cedf25d13ff515d77c28094bbbdb208310247688b40"
"sha256": "58dd2e09d05538228540d8c667b1acb42c2e6c579f7883b70d483072570f2499"
}
},
"platform": "linux-x86_64",

View file

@ -19,7 +19,7 @@ from typing import Any
from pyro_mcp.runtime import DEFAULT_PLATFORM, RuntimePaths
DEFAULT_ENVIRONMENT_VERSION = "1.0.0"
DEFAULT_CATALOG_VERSION = "2.6.0"
DEFAULT_CATALOG_VERSION = "2.7.0"
OCI_MANIFEST_ACCEPT = ", ".join(
(
"application/vnd.oci.image.index.v1+json",

View file

@ -325,6 +325,102 @@ class VsockExecClient:
self._shell_summary_from_payload(payload)
return payload
def start_service(
self,
guest_cid: int,
port: int,
*,
service_name: str,
command: str,
cwd: str,
readiness: dict[str, Any] | None,
ready_timeout_seconds: int,
ready_interval_ms: int,
timeout_seconds: int = 60,
uds_path: str | None = None,
) -> dict[str, Any]:
return self._request_json(
guest_cid,
port,
{
"action": "start_service",
"service_name": service_name,
"command": command,
"cwd": cwd,
"readiness": readiness,
"ready_timeout_seconds": ready_timeout_seconds,
"ready_interval_ms": ready_interval_ms,
},
timeout_seconds=timeout_seconds,
uds_path=uds_path,
error_message="guest service start response must be a JSON object",
)
def status_service(
self,
guest_cid: int,
port: int,
*,
service_name: str,
timeout_seconds: int = 30,
uds_path: str | None = None,
) -> dict[str, Any]:
return self._request_json(
guest_cid,
port,
{
"action": "status_service",
"service_name": service_name,
},
timeout_seconds=timeout_seconds,
uds_path=uds_path,
error_message="guest service status response must be a JSON object",
)
def logs_service(
self,
guest_cid: int,
port: int,
*,
service_name: str,
tail_lines: int | None,
timeout_seconds: int = 30,
uds_path: str | None = None,
) -> dict[str, Any]:
return self._request_json(
guest_cid,
port,
{
"action": "logs_service",
"service_name": service_name,
"tail_lines": tail_lines,
},
timeout_seconds=timeout_seconds,
uds_path=uds_path,
error_message="guest service logs response must be a JSON object",
)
def stop_service(
self,
guest_cid: int,
port: int,
*,
service_name: str,
timeout_seconds: int = 30,
uds_path: str | None = None,
) -> dict[str, Any]:
return self._request_json(
guest_cid,
port,
{
"action": "stop_service",
"service_name": service_name,
},
timeout_seconds=timeout_seconds,
uds_path=uds_path,
error_message="guest service stop response must be a JSON object",
)
def _request_json(
self,
guest_cid: int,

File diff suppressed because it is too large Load diff

View file

@ -5,7 +5,6 @@ from __future__ import annotations
import codecs
import fcntl
import os
import pty
import shlex
import signal
import struct
@ -14,7 +13,7 @@ import termios
import threading
import time
from pathlib import Path
from typing import Literal
from typing import IO, Literal
ShellState = Literal["running", "stopped"]
@ -59,41 +58,60 @@ class LocalShellSession:
self._lock = threading.RLock()
self._output = ""
self._master_fd: int | None = None
self._input_pipe: IO[bytes] | None = None
self._output_pipe: IO[bytes] | None = None
self._reader: threading.Thread | None = None
self._waiter: threading.Thread | None = None
self._decoder = codecs.getincrementaldecoder("utf-8")("replace")
env = os.environ.copy()
env.update(
{
"TERM": env.get("TERM", "xterm-256color"),
"PS1": "pyro$ ",
"PROMPT_COMMAND": "",
}
)
master_fd, slave_fd = pty.openpty()
process: subprocess.Popen[bytes]
try:
_set_pty_size(slave_fd, rows, cols)
env = os.environ.copy()
env.update(
{
"TERM": env.get("TERM", "xterm-256color"),
"PS1": "pyro$ ",
"PROMPT_COMMAND": "",
}
)
master_fd, slave_fd = os.openpty()
except OSError:
process = subprocess.Popen( # noqa: S603
["/bin/bash", "--noprofile", "--norc", "-i"],
stdin=slave_fd,
stdout=slave_fd,
stderr=slave_fd,
["/bin/bash", "--noprofile", "--norc"],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
cwd=str(cwd),
env=env,
text=False,
close_fds=True,
preexec_fn=os.setsid,
)
except Exception:
os.close(master_fd)
raise
finally:
os.close(slave_fd)
self._input_pipe = process.stdin
self._output_pipe = process.stdout
else:
try:
_set_pty_size(slave_fd, rows, cols)
process = subprocess.Popen( # noqa: S603
["/bin/bash", "--noprofile", "--norc", "-i"],
stdin=slave_fd,
stdout=slave_fd,
stderr=slave_fd,
cwd=str(cwd),
env=env,
text=False,
close_fds=True,
preexec_fn=os.setsid,
)
except Exception:
os.close(master_fd)
raise
finally:
os.close(slave_fd)
self._master_fd = master_fd
self._process = process
self.pid = process.pid
self._master_fd = master_fd
self._reader = threading.Thread(target=self._reader_loop, daemon=True)
self._waiter = threading.Thread(target=self._waiter_loop, daemon=True)
self._reader.start()
@ -136,11 +154,16 @@ class LocalShellSession:
if self.state != "running":
raise RuntimeError(f"shell {self.shell_id} is not running")
master_fd = self._master_fd
if master_fd is None:
raise RuntimeError(f"shell {self.shell_id} transport is unavailable")
input_pipe = self._input_pipe
payload = text + ("\n" if append_newline else "")
try:
os.write(master_fd, payload.encode("utf-8"))
if master_fd is not None:
os.write(master_fd, payload.encode("utf-8"))
else:
if input_pipe is None:
raise RuntimeError(f"shell {self.shell_id} transport is unavailable")
input_pipe.write(payload.encode("utf-8"))
input_pipe.flush()
except OSError as exc:
self._refresh_process_state()
raise RuntimeError(f"failed to write to shell {self.shell_id}: {exc}") from exc
@ -195,11 +218,17 @@ class LocalShellSession:
def _reader_loop(self) -> None:
master_fd = self._master_fd
if master_fd is None:
output_pipe = self._output_pipe
if master_fd is None and output_pipe is None:
return
while True:
try:
chunk = os.read(master_fd, 65536)
if master_fd is not None:
chunk = os.read(master_fd, 65536)
else:
if output_pipe is None:
break
chunk = os.read(output_pipe.fileno(), 65536)
except OSError:
break
if chunk == b"":
@ -234,6 +263,14 @@ class LocalShellSession:
with self._lock:
master_fd = self._master_fd
self._master_fd = None
input_pipe = self._input_pipe
self._input_pipe = None
output_pipe = self._output_pipe
self._output_pipe = None
if input_pipe is not None:
input_pipe.close()
if output_pipe is not None:
output_pipe.close()
if master_fd is None:
return
try: