Add model-native workspace file operations

Remove shell-escaped file mutation from the stable workspace flow by adding explicit file and patch tools across the CLI, SDK, and MCP surfaces.

This adds workspace file list/read/write plus unified text patch application, backed by new guest and manager file primitives that stay scoped to started workspaces and /workspace only. Patch application is preflighted on the host, file writes stay text-only and bounded, and the existing diff/export/reset semantics remain intact.

The milestone also updates the 3.2.0 roadmap, public contract, docs, examples, and versioning, and includes focused coverage for the new helper module and dispatch paths.

Validation:
- uv lock
- UV_CACHE_DIR=.uv-cache make check
- UV_CACHE_DIR=.uv-cache make dist-check
- real guest-backed smoke for workspace file read, patch apply, exec, export, and delete
This commit is contained in:
Thales Maciel 2026-03-12 22:03:25 -03:00
parent dbb71a3174
commit ab02ae46c7
27 changed files with 3068 additions and 17 deletions

View file

@ -40,6 +40,25 @@ from pyro_mcp.workspace_disk import (
read_workspace_disk_file,
scrub_workspace_runtime_paths,
)
from pyro_mcp.workspace_files import (
DEFAULT_WORKSPACE_FILE_READ_MAX_BYTES as DEFAULT_WORKSPACE_FILE_READ_LIMIT,
)
from pyro_mcp.workspace_files import (
WORKSPACE_FILE_MAX_BYTES as WORKSPACE_FILE_MAX_LIMIT,
)
from pyro_mcp.workspace_files import (
WORKSPACE_PATCH_MAX_BYTES as WORKSPACE_PATCH_MAX_LIMIT,
)
from pyro_mcp.workspace_files import (
WorkspaceTextPatch,
apply_unified_text_patch,
delete_workspace_path,
list_workspace_files,
normalize_workspace_path,
parse_unified_text_patch,
read_workspace_file,
write_workspace_file,
)
from pyro_mcp.workspace_ports import DEFAULT_PUBLISHED_PORT_HOST
from pyro_mcp.workspace_shells import (
create_local_shell,
@ -79,6 +98,9 @@ DEFAULT_SHELL_COLS = 120
DEFAULT_SHELL_ROWS = 30
DEFAULT_SHELL_MAX_CHARS = 65536
DEFAULT_WORKSPACE_DISK_READ_MAX_BYTES = 65536
DEFAULT_WORKSPACE_FILE_READ_MAX_BYTES = DEFAULT_WORKSPACE_FILE_READ_LIMIT
WORKSPACE_FILE_MAX_BYTES = WORKSPACE_FILE_MAX_LIMIT
WORKSPACE_PATCH_MAX_BYTES = WORKSPACE_PATCH_MAX_LIMIT
DEFAULT_SERVICE_READY_TIMEOUT_SECONDS = 30
DEFAULT_SERVICE_READY_INTERVAL_MS = 500
DEFAULT_SERVICE_LOG_TAIL_LINES = 200
@ -818,6 +840,49 @@ def _normalize_workspace_disk_path(path: str) -> str:
return normalized
def _normalize_workspace_file_path(path: str) -> str:
return normalize_workspace_path(path)
def _validate_workspace_file_read_max_bytes(max_bytes: int) -> int:
if max_bytes <= 0:
raise ValueError("max_bytes must be positive")
if max_bytes > WORKSPACE_FILE_MAX_BYTES:
raise ValueError(
f"max_bytes must be at most {WORKSPACE_FILE_MAX_BYTES} bytes"
)
return max_bytes
def _validate_workspace_text_payload(text: str, *, field_name: str) -> str:
encoded = text.encode("utf-8")
if len(encoded) > WORKSPACE_FILE_MAX_BYTES:
raise ValueError(
f"{field_name} must be at most {WORKSPACE_FILE_MAX_BYTES} bytes when encoded as UTF-8"
)
return text
def _validate_workspace_patch_text(patch: str) -> str:
if patch.strip() == "":
raise ValueError("patch must not be empty")
encoded = patch.encode("utf-8")
if len(encoded) > WORKSPACE_PATCH_MAX_BYTES:
raise ValueError(
f"patch must be at most {WORKSPACE_PATCH_MAX_BYTES} bytes when encoded as UTF-8"
)
return patch
def _decode_workspace_patch_text(path: str, content_bytes: bytes) -> str:
try:
return content_bytes.decode("utf-8")
except UnicodeDecodeError as exc:
raise RuntimeError(
f"workspace patch only supports UTF-8 text files: {path}"
) from exc
def _normalize_archive_member_name(name: str) -> PurePosixPath:
candidate = name.strip()
if candidate == "":
@ -2077,6 +2142,41 @@ class VmBackend:
) -> dict[str, Any]:
raise NotImplementedError
def list_workspace_entries( # pragma: no cover
self,
instance: VmInstance,
*,
workspace_path: str,
recursive: bool,
) -> dict[str, Any]:
raise NotImplementedError
def read_workspace_file( # pragma: no cover
self,
instance: VmInstance,
*,
workspace_path: str,
max_bytes: int,
) -> dict[str, Any]:
raise NotImplementedError
def write_workspace_file( # pragma: no cover
self,
instance: VmInstance,
*,
workspace_path: str,
text: str,
) -> dict[str, Any]:
raise NotImplementedError
def delete_workspace_path( # pragma: no cover
self,
instance: VmInstance,
*,
workspace_path: str,
) -> dict[str, Any]:
raise NotImplementedError
def open_shell( # pragma: no cover
self,
instance: VmInstance,
@ -2256,6 +2356,79 @@ class MockBackend(VmBackend):
"execution_mode": "host_compat",
}
def list_workspace_entries(
self,
instance: VmInstance,
*,
workspace_path: str,
recursive: bool,
) -> dict[str, Any]:
listing = list_workspace_files(
_instance_workspace_host_dir(instance),
workspace_path=workspace_path,
recursive=recursive,
)
return {
"path": listing.path,
"artifact_type": listing.artifact_type,
"entries": [entry.to_payload() for entry in listing.entries],
"execution_mode": "host_compat",
}
def read_workspace_file(
self,
instance: VmInstance,
*,
workspace_path: str,
max_bytes: int,
) -> dict[str, Any]:
file_result = read_workspace_file(
_instance_workspace_host_dir(instance),
workspace_path=workspace_path,
max_bytes=max_bytes,
)
return {
"path": file_result.path,
"size_bytes": file_result.size_bytes,
"content_bytes": file_result.content_bytes,
"execution_mode": "host_compat",
}
def write_workspace_file(
self,
instance: VmInstance,
*,
workspace_path: str,
text: str,
) -> dict[str, Any]:
result = write_workspace_file(
_instance_workspace_host_dir(instance),
workspace_path=workspace_path,
text=text,
)
return {
"path": result.path,
"size_bytes": result.size_bytes,
"bytes_written": result.bytes_written,
"execution_mode": "host_compat",
}
def delete_workspace_path(
self,
instance: VmInstance,
*,
workspace_path: str,
) -> dict[str, Any]:
result = delete_workspace_path(
_instance_workspace_host_dir(instance),
workspace_path=workspace_path,
)
return {
"path": result.path,
"deleted": result.deleted,
"execution_mode": "host_compat",
}
def open_shell(
self,
instance: VmInstance,
@ -2776,6 +2949,134 @@ class FirecrackerBackend(VmBackend): # pragma: no cover
"execution_mode": "host_compat",
}
def list_workspace_entries(
self,
instance: VmInstance,
*,
workspace_path: str,
recursive: bool,
) -> dict[str, Any]:
if self._runtime_capabilities.supports_guest_exec:
guest_cid = int(instance.metadata["guest_cid"])
port = int(instance.metadata["guest_exec_port"])
uds_path = instance.metadata.get("guest_exec_uds_path")
payload = self._guest_exec_client.list_workspace_entries(
guest_cid,
port,
workspace_path=workspace_path,
recursive=recursive,
uds_path=uds_path,
)
payload["execution_mode"] = instance.metadata.get("execution_mode", "pending")
return payload
instance.metadata["execution_mode"] = "host_compat"
listing = list_workspace_files(
_instance_workspace_host_dir(instance),
workspace_path=workspace_path,
recursive=recursive,
)
return {
"path": listing.path,
"artifact_type": listing.artifact_type,
"entries": [entry.to_payload() for entry in listing.entries],
"execution_mode": "host_compat",
}
def read_workspace_file(
self,
instance: VmInstance,
*,
workspace_path: str,
max_bytes: int,
) -> dict[str, Any]:
if self._runtime_capabilities.supports_guest_exec:
guest_cid = int(instance.metadata["guest_cid"])
port = int(instance.metadata["guest_exec_port"])
uds_path = instance.metadata.get("guest_exec_uds_path")
payload = self._guest_exec_client.read_workspace_file(
guest_cid,
port,
workspace_path=workspace_path,
max_bytes=max_bytes,
uds_path=uds_path,
)
payload["execution_mode"] = instance.metadata.get("execution_mode", "pending")
return payload
instance.metadata["execution_mode"] = "host_compat"
file_result = read_workspace_file(
_instance_workspace_host_dir(instance),
workspace_path=workspace_path,
max_bytes=max_bytes,
)
return {
"path": file_result.path,
"size_bytes": file_result.size_bytes,
"content_bytes": file_result.content_bytes,
"execution_mode": "host_compat",
}
def write_workspace_file(
self,
instance: VmInstance,
*,
workspace_path: str,
text: str,
) -> dict[str, Any]:
if self._runtime_capabilities.supports_guest_exec:
guest_cid = int(instance.metadata["guest_cid"])
port = int(instance.metadata["guest_exec_port"])
uds_path = instance.metadata.get("guest_exec_uds_path")
payload = self._guest_exec_client.write_workspace_file(
guest_cid,
port,
workspace_path=workspace_path,
text=text,
uds_path=uds_path,
)
payload["execution_mode"] = instance.metadata.get("execution_mode", "pending")
return payload
instance.metadata["execution_mode"] = "host_compat"
result = write_workspace_file(
_instance_workspace_host_dir(instance),
workspace_path=workspace_path,
text=text,
)
return {
"path": result.path,
"size_bytes": result.size_bytes,
"bytes_written": result.bytes_written,
"execution_mode": "host_compat",
}
def delete_workspace_path(
self,
instance: VmInstance,
*,
workspace_path: str,
) -> dict[str, Any]:
if self._runtime_capabilities.supports_guest_exec:
guest_cid = int(instance.metadata["guest_cid"])
port = int(instance.metadata["guest_exec_port"])
uds_path = instance.metadata.get("guest_exec_uds_path")
payload = self._guest_exec_client.delete_workspace_path(
guest_cid,
port,
workspace_path=workspace_path,
uds_path=uds_path,
)
payload["execution_mode"] = instance.metadata.get("execution_mode", "pending")
return payload
instance.metadata["execution_mode"] = "host_compat"
result = delete_workspace_path(
_instance_workspace_host_dir(instance),
workspace_path=workspace_path,
)
return {
"path": result.path,
"deleted": result.deleted,
"execution_mode": "host_compat",
}
def open_shell(
self,
instance: VmInstance,
@ -3585,6 +3886,235 @@ class VmManager:
diff_payload["workspace_id"] = workspace_id
return diff_payload
def list_workspace_files(
self,
workspace_id: str,
*,
path: str = WORKSPACE_GUEST_PATH,
recursive: bool = False,
) -> dict[str, Any]:
normalized_path = _normalize_workspace_file_path(path)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
instance = self._workspace_instance_for_live_operation_locked(
workspace,
operation_name="workspace_file_list",
)
listing = self._backend.list_workspace_entries(
instance,
workspace_path=normalized_path,
recursive=recursive,
)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = instance.last_error
workspace.metadata = dict(instance.metadata)
self._save_workspace_locked(workspace)
return {
"workspace_id": workspace_id,
"path": str(listing["path"]),
"recursive": recursive,
"entries": cast(list[dict[str, Any]], list(listing.get("entries", []))),
"execution_mode": str(
listing.get("execution_mode", instance.metadata.get("execution_mode", "pending"))
),
}
def read_workspace_file(
self,
workspace_id: str,
path: str,
*,
max_bytes: int = DEFAULT_WORKSPACE_FILE_READ_MAX_BYTES,
) -> dict[str, Any]:
normalized_path = _normalize_workspace_file_path(path)
normalized_max_bytes = _validate_workspace_file_read_max_bytes(max_bytes)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
instance = self._workspace_instance_for_live_operation_locked(
workspace,
operation_name="workspace_file_read",
)
payload = self._backend.read_workspace_file(
instance,
workspace_path=normalized_path,
max_bytes=WORKSPACE_FILE_MAX_BYTES,
)
raw_bytes = cast(bytes, payload["content_bytes"])
content = raw_bytes[:normalized_max_bytes].decode("utf-8", errors="replace")
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = instance.last_error
workspace.metadata = dict(instance.metadata)
self._save_workspace_locked(workspace)
return {
"workspace_id": workspace_id,
"path": str(payload["path"]),
"size_bytes": int(payload["size_bytes"]),
"max_bytes": normalized_max_bytes,
"content": content,
"truncated": len(raw_bytes) > normalized_max_bytes,
"execution_mode": str(
payload.get("execution_mode", instance.metadata.get("execution_mode", "pending"))
),
}
def write_workspace_file(
self,
workspace_id: str,
path: str,
*,
text: str,
) -> dict[str, Any]:
normalized_path = _normalize_workspace_file_path(path)
normalized_text = _validate_workspace_text_payload(text, field_name="text")
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
instance = self._workspace_instance_for_live_operation_locked(
workspace,
operation_name="workspace_file_write",
)
payload = self._backend.write_workspace_file(
instance,
workspace_path=normalized_path,
text=normalized_text,
)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = instance.last_error
workspace.metadata = dict(instance.metadata)
self._save_workspace_locked(workspace)
return {
"workspace_id": workspace_id,
"path": str(payload["path"]),
"size_bytes": int(payload["size_bytes"]),
"bytes_written": int(payload["bytes_written"]),
"execution_mode": str(
payload.get("execution_mode", instance.metadata.get("execution_mode", "pending"))
),
}
def apply_workspace_patch(
self,
workspace_id: str,
*,
patch: str,
) -> dict[str, Any]:
patch_text = _validate_workspace_patch_text(patch)
parsed_patches = parse_unified_text_patch(patch_text)
patch_by_path: dict[str, WorkspaceTextPatch] = {}
for text_patch in parsed_patches:
if text_patch.path in patch_by_path:
raise ValueError(f"patch contains duplicate file entries for {text_patch.path}")
patch_by_path[text_patch.path] = text_patch
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
instance = self._workspace_instance_for_live_operation_locked(
workspace,
operation_name="workspace_patch_apply",
)
planned_writes: dict[str, str] = {}
planned_deletes: list[str] = []
summary = {
"total": 0,
"added": 0,
"modified": 0,
"deleted": 0,
}
entries: list[dict[str, str]] = []
for path_text in sorted(patch_by_path):
file_patch = patch_by_path[path_text]
listing: dict[str, Any] | None = None
current_text: str | None = None
exists = True
try:
listing = self._backend.list_workspace_entries(
instance,
workspace_path=file_patch.path,
recursive=False,
)
except RuntimeError as exc:
if "does not exist" in str(exc):
exists = False
else:
raise
if exists:
if listing is None:
raise RuntimeError(
f"workspace patch could not inspect current path: {file_patch.path}"
)
artifact_type = str(listing["artifact_type"])
if artifact_type != "file":
raise RuntimeError(
f"workspace patch only supports regular files: {file_patch.path}"
)
current_payload = self._backend.read_workspace_file(
instance,
workspace_path=file_patch.path,
max_bytes=WORKSPACE_FILE_MAX_BYTES,
)
current_text = _decode_workspace_patch_text(
file_patch.path,
cast(bytes, current_payload["content_bytes"]),
)
if file_patch.status == "added" and exists:
raise RuntimeError(
f"workspace patch cannot add an existing path: {file_patch.path}"
)
if file_patch.status in {"modified", "deleted"} and not exists:
raise RuntimeError(
f"workspace patch cannot modify a missing path: {file_patch.path}"
)
after_text = apply_unified_text_patch(
path=file_patch.path,
patch=file_patch,
before_text=current_text,
)
if after_text is None:
planned_deletes.append(file_patch.path)
else:
planned_writes[file_patch.path] = after_text
summary["total"] += 1
summary[file_patch.status] += 1
entries.append({"path": file_patch.path, "status": file_patch.status})
for path_text in sorted(planned_writes):
self._backend.write_workspace_file(
instance,
workspace_path=path_text,
text=planned_writes[path_text],
)
for path_text in sorted(planned_deletes):
self._backend.delete_workspace_path(
instance,
workspace_path=path_text,
)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = instance.last_error
workspace.metadata = dict(instance.metadata)
self._save_workspace_locked(workspace)
return {
"workspace_id": workspace_id,
"changed": bool(entries),
"summary": summary,
"entries": entries,
"patch": patch_text,
"execution_mode": instance.metadata.get("execution_mode", "pending"),
}
def create_snapshot(
self,
workspace_id: str,