Add model-native workspace file operations

Remove shell-escaped file mutation from the stable workspace flow by adding explicit file and patch tools across the CLI, SDK, and MCP surfaces.

This adds workspace file list/read/write plus unified text patch application, backed by new guest and manager file primitives that stay scoped to started workspaces and /workspace only. Patch application is preflighted on the host, file writes stay text-only and bounded, and the existing diff/export/reset semantics remain intact.

The milestone also updates the 3.2.0 roadmap, public contract, docs, examples, and versioning, and includes focused coverage for the new helper module and dispatch paths.

Validation:
- uv lock
- UV_CACHE_DIR=.uv-cache make check
- UV_CACHE_DIR=.uv-cache make dist-check
- real guest-backed smoke for workspace file read, patch apply, exec, export, and delete
This commit is contained in:
Thales Maciel 2026-03-12 22:03:25 -03:00
parent dbb71a3174
commit ab02ae46c7
27 changed files with 3068 additions and 17 deletions

View file

@ -157,6 +157,56 @@ class Pyro:
def diff_workspace(self, workspace_id: str) -> dict[str, Any]:
return self._manager.diff_workspace(workspace_id)
def list_workspace_files(
self,
workspace_id: str,
*,
path: str = "/workspace",
recursive: bool = False,
) -> dict[str, Any]:
return self._manager.list_workspace_files(
workspace_id,
path=path,
recursive=recursive,
)
def read_workspace_file(
self,
workspace_id: str,
path: str,
*,
max_bytes: int = 65536,
) -> dict[str, Any]:
return self._manager.read_workspace_file(
workspace_id,
path,
max_bytes=max_bytes,
)
def write_workspace_file(
self,
workspace_id: str,
path: str,
*,
text: str,
) -> dict[str, Any]:
return self._manager.write_workspace_file(
workspace_id,
path,
text=text,
)
def apply_workspace_patch(
self,
workspace_id: str,
*,
patch: str,
) -> dict[str, Any]:
return self._manager.apply_workspace_patch(
workspace_id,
patch=patch,
)
def export_workspace_disk(
self,
workspace_id: str,
@ -529,6 +579,56 @@ class Pyro:
"""Compare `/workspace` to the immutable create-time baseline."""
return self.diff_workspace(workspace_id)
@server.tool()
async def workspace_file_list(
workspace_id: str,
path: str = "/workspace",
recursive: bool = False,
) -> dict[str, Any]:
"""List metadata for files and directories under one live workspace path."""
return self.list_workspace_files(
workspace_id,
path=path,
recursive=recursive,
)
@server.tool()
async def workspace_file_read(
workspace_id: str,
path: str,
max_bytes: int = 65536,
) -> dict[str, Any]:
"""Read one regular text file from a live workspace path."""
return self.read_workspace_file(
workspace_id,
path,
max_bytes=max_bytes,
)
@server.tool()
async def workspace_file_write(
workspace_id: str,
path: str,
text: str,
) -> dict[str, Any]:
"""Create or replace one regular text file under `/workspace`."""
return self.write_workspace_file(
workspace_id,
path,
text=text,
)
@server.tool()
async def workspace_patch_apply(
workspace_id: str,
patch: str,
) -> dict[str, Any]:
"""Apply a unified text patch inside one live workspace."""
return self.apply_workspace_patch(
workspace_id,
patch=patch,
)
@server.tool()
async def workspace_disk_export(
workspace_id: str,

View file

@ -22,6 +22,7 @@ from pyro_mcp.vm_manager import (
DEFAULT_SERVICE_READY_TIMEOUT_SECONDS,
DEFAULT_VCPU_COUNT,
DEFAULT_WORKSPACE_DISK_READ_MAX_BYTES,
DEFAULT_WORKSPACE_FILE_READ_MAX_BYTES,
WORKSPACE_GUEST_PATH,
WORKSPACE_SHELL_SIGNAL_NAMES,
)
@ -322,6 +323,72 @@ def _print_workspace_diff_human(payload: dict[str, Any]) -> None:
print(patch, end="" if patch.endswith("\n") else "\n")
def _print_workspace_file_list_human(payload: dict[str, Any]) -> None:
print(
f"Workspace path: {str(payload.get('path', WORKSPACE_GUEST_PATH))} "
f"(recursive={'yes' if bool(payload.get('recursive')) else 'no'})"
)
entries = payload.get("entries")
if not isinstance(entries, list) or not entries:
print("No workspace entries found.")
return
for entry in entries:
if not isinstance(entry, dict):
continue
line = (
f"{str(entry.get('path', 'unknown'))} "
f"[{str(entry.get('artifact_type', 'unknown'))}] "
f"size={int(entry.get('size_bytes', 0))}"
)
link_target = entry.get("link_target")
if isinstance(link_target, str) and link_target != "":
line += f" -> {link_target}"
print(line)
def _print_workspace_file_read_human(payload: dict[str, Any]) -> None:
_write_stream(str(payload.get("content", "")), stream=sys.stdout)
print(
"[workspace-file-read] "
f"workspace_id={str(payload.get('workspace_id', 'unknown'))} "
f"path={str(payload.get('path', 'unknown'))} "
f"size_bytes={int(payload.get('size_bytes', 0))} "
f"truncated={'yes' if bool(payload.get('truncated', False)) else 'no'}",
file=sys.stderr,
flush=True,
)
def _print_workspace_file_write_human(payload: dict[str, Any]) -> None:
print(
"[workspace-file-write] "
f"workspace_id={str(payload.get('workspace_id', 'unknown'))} "
f"path={str(payload.get('path', 'unknown'))} "
f"bytes_written={int(payload.get('bytes_written', 0))} "
f"execution_mode={str(payload.get('execution_mode', 'unknown'))}"
)
def _print_workspace_patch_human(payload: dict[str, Any]) -> None:
summary = payload.get("summary")
if isinstance(summary, dict):
print(
"[workspace-patch] "
f"workspace_id={str(payload.get('workspace_id', 'unknown'))} "
f"total={int(summary.get('total', 0))} "
f"added={int(summary.get('added', 0))} "
f"modified={int(summary.get('modified', 0))} "
f"deleted={int(summary.get('deleted', 0))} "
f"execution_mode={str(payload.get('execution_mode', 'unknown'))}"
)
return
print(
"[workspace-patch] "
f"workspace_id={str(payload.get('workspace_id', 'unknown'))} "
f"execution_mode={str(payload.get('execution_mode', 'unknown'))}"
)
def _print_workspace_logs_human(payload: dict[str, Any]) -> None:
entries = payload.get("entries")
if not isinstance(entries, list) or not entries:
@ -733,6 +800,8 @@ def _build_parser() -> argparse.ArgumentParser:
Examples:
pyro workspace create debian:12 --seed-path ./repo
pyro workspace sync push WORKSPACE_ID ./repo --dest src
pyro workspace file read WORKSPACE_ID src/app.py
pyro workspace patch apply WORKSPACE_ID --patch "$(cat fix.patch)"
pyro workspace exec WORKSPACE_ID -- sh -lc 'printf "hello\\n" > note.txt'
pyro workspace stop WORKSPACE_ID
pyro workspace disk list WORKSPACE_ID
@ -996,6 +1065,145 @@ def _build_parser() -> argparse.ArgumentParser:
action="store_true",
help="Print structured JSON instead of human-readable output.",
)
workspace_file_parser = workspace_subparsers.add_parser(
"file",
help="List, read, and write workspace files without shell quoting.",
description=(
"Use workspace file operations for model-native tree inspection and text edits "
"inside one started workspace."
),
epilog=dedent(
"""
Examples:
pyro workspace file list WORKSPACE_ID
pyro workspace file read WORKSPACE_ID src/app.py
pyro workspace file write WORKSPACE_ID src/app.py --text 'print("hi")'
"""
),
formatter_class=_HelpFormatter,
)
workspace_file_subparsers = workspace_file_parser.add_subparsers(
dest="workspace_file_command",
required=True,
metavar="FILE",
)
workspace_file_list_parser = workspace_file_subparsers.add_parser(
"list",
help="List metadata for one live workspace path.",
description="List files, directories, and symlinks under one started workspace path.",
epilog="Example:\n pyro workspace file list WORKSPACE_ID src --recursive",
formatter_class=_HelpFormatter,
)
workspace_file_list_parser.add_argument("workspace_id", metavar="WORKSPACE_ID")
workspace_file_list_parser.add_argument(
"path",
nargs="?",
default=WORKSPACE_GUEST_PATH,
metavar="PATH",
help="Workspace path to inspect. Relative values resolve inside `/workspace`.",
)
workspace_file_list_parser.add_argument(
"--recursive",
action="store_true",
help="Walk directories recursively.",
)
workspace_file_list_parser.add_argument(
"--json",
action="store_true",
help="Print structured JSON instead of human-readable output.",
)
workspace_file_read_parser = workspace_file_subparsers.add_parser(
"read",
help="Read one regular text file from a started workspace.",
description=(
"Read one regular text file under `/workspace`. This is bounded and does not "
"follow symlinks."
),
epilog="Example:\n pyro workspace file read WORKSPACE_ID src/app.py",
formatter_class=_HelpFormatter,
)
workspace_file_read_parser.add_argument("workspace_id", metavar="WORKSPACE_ID")
workspace_file_read_parser.add_argument("path", metavar="PATH")
workspace_file_read_parser.add_argument(
"--max-bytes",
type=int,
default=DEFAULT_WORKSPACE_FILE_READ_MAX_BYTES,
help="Maximum number of bytes to return in the decoded text response.",
)
workspace_file_read_parser.add_argument(
"--json",
action="store_true",
help="Print structured JSON instead of human-readable output.",
)
workspace_file_write_parser = workspace_file_subparsers.add_parser(
"write",
help="Create or replace one regular text file in a started workspace.",
description=(
"Write one UTF-8 text file under `/workspace`. Missing parent directories are "
"created automatically."
),
epilog=(
"Example:\n"
" pyro workspace file write WORKSPACE_ID src/app.py --text 'print(\"hi\")'"
),
formatter_class=_HelpFormatter,
)
workspace_file_write_parser.add_argument("workspace_id", metavar="WORKSPACE_ID")
workspace_file_write_parser.add_argument("path", metavar="PATH")
workspace_file_write_parser.add_argument(
"--text",
required=True,
help="UTF-8 text content to write into the target file.",
)
workspace_file_write_parser.add_argument(
"--json",
action="store_true",
help="Print structured JSON instead of human-readable output.",
)
workspace_patch_parser = workspace_subparsers.add_parser(
"patch",
help="Apply unified text patches inside a started workspace.",
description=(
"Apply add/modify/delete unified text patches under `/workspace` without shell "
"editing tricks."
),
epilog=dedent(
"""
Example:
pyro workspace patch apply WORKSPACE_ID --patch "$(cat fix.patch)"
Patch application is preflighted but not fully transactional. If an apply fails
partway through, prefer `pyro workspace reset WORKSPACE_ID`.
"""
),
formatter_class=_HelpFormatter,
)
workspace_patch_subparsers = workspace_patch_parser.add_subparsers(
dest="workspace_patch_command",
required=True,
metavar="PATCH",
)
workspace_patch_apply_parser = workspace_patch_subparsers.add_parser(
"apply",
help="Apply one unified text patch to a started workspace.",
description=(
"Apply one unified text patch for add, modify, and delete operations under "
"`/workspace`."
),
epilog="Example:\n pyro workspace patch apply WORKSPACE_ID --patch \"$(cat fix.patch)\"",
formatter_class=_HelpFormatter,
)
workspace_patch_apply_parser.add_argument("workspace_id", metavar="WORKSPACE_ID")
workspace_patch_apply_parser.add_argument(
"--patch",
required=True,
help="Unified text patch to apply under `/workspace`.",
)
workspace_patch_apply_parser.add_argument(
"--json",
action="store_true",
help="Print structured JSON instead of human-readable output.",
)
workspace_snapshot_parser = workspace_subparsers.add_parser(
"snapshot",
help="Create, list, and delete workspace snapshots.",
@ -2005,6 +2213,78 @@ def main() -> None:
raise SystemExit(1) from exc
_print_workspace_diff_human(payload)
return
if args.workspace_command == "file":
if args.workspace_file_command == "list":
try:
payload = pyro.list_workspace_files(
args.workspace_id,
path=args.path,
recursive=bool(args.recursive),
)
except Exception as exc: # noqa: BLE001
if bool(args.json):
_print_json({"ok": False, "error": str(exc)})
else:
print(f"[error] {exc}", file=sys.stderr, flush=True)
raise SystemExit(1) from exc
if bool(args.json):
_print_json(payload)
else:
_print_workspace_file_list_human(payload)
return
if args.workspace_file_command == "read":
try:
payload = pyro.read_workspace_file(
args.workspace_id,
args.path,
max_bytes=args.max_bytes,
)
except Exception as exc: # noqa: BLE001
if bool(args.json):
_print_json({"ok": False, "error": str(exc)})
else:
print(f"[error] {exc}", file=sys.stderr, flush=True)
raise SystemExit(1) from exc
if bool(args.json):
_print_json(payload)
else:
_print_workspace_file_read_human(payload)
return
if args.workspace_file_command == "write":
try:
payload = pyro.write_workspace_file(
args.workspace_id,
args.path,
text=args.text,
)
except Exception as exc: # noqa: BLE001
if bool(args.json):
_print_json({"ok": False, "error": str(exc)})
else:
print(f"[error] {exc}", file=sys.stderr, flush=True)
raise SystemExit(1) from exc
if bool(args.json):
_print_json(payload)
else:
_print_workspace_file_write_human(payload)
return
if args.workspace_command == "patch" and args.workspace_patch_command == "apply":
try:
payload = pyro.apply_workspace_patch(
args.workspace_id,
patch=args.patch,
)
except Exception as exc: # noqa: BLE001
if bool(args.json):
_print_json({"ok": False, "error": str(exc)})
else:
print(f"[error] {exc}", file=sys.stderr, flush=True)
raise SystemExit(1) from exc
if bool(args.json):
_print_json(payload)
else:
_print_workspace_patch_human(payload)
return
if args.workspace_command == "snapshot":
if args.workspace_snapshot_command == "create":
try:

View file

@ -12,7 +12,9 @@ PUBLIC_CLI_WORKSPACE_SUBCOMMANDS = (
"diff",
"exec",
"export",
"file",
"logs",
"patch",
"reset",
"service",
"shell",
@ -23,6 +25,8 @@ PUBLIC_CLI_WORKSPACE_SUBCOMMANDS = (
"sync",
)
PUBLIC_CLI_WORKSPACE_DISK_SUBCOMMANDS = ("export", "list", "read")
PUBLIC_CLI_WORKSPACE_FILE_SUBCOMMANDS = ("list", "read", "write")
PUBLIC_CLI_WORKSPACE_PATCH_SUBCOMMANDS = ("apply",)
PUBLIC_CLI_WORKSPACE_SERVICE_SUBCOMMANDS = ("list", "logs", "start", "status", "stop")
PUBLIC_CLI_WORKSPACE_SHELL_SUBCOMMANDS = ("close", "open", "read", "signal", "write")
PUBLIC_CLI_WORKSPACE_SNAPSHOT_SUBCOMMANDS = ("create", "delete", "list")
@ -44,6 +48,10 @@ PUBLIC_CLI_WORKSPACE_DISK_READ_FLAGS = ("--max-bytes", "--json")
PUBLIC_CLI_WORKSPACE_EXEC_FLAGS = ("--timeout-seconds", "--secret-env", "--json")
PUBLIC_CLI_WORKSPACE_DIFF_FLAGS = ("--json",)
PUBLIC_CLI_WORKSPACE_EXPORT_FLAGS = ("--output", "--json")
PUBLIC_CLI_WORKSPACE_FILE_LIST_FLAGS = ("--recursive", "--json")
PUBLIC_CLI_WORKSPACE_FILE_READ_FLAGS = ("--max-bytes", "--json")
PUBLIC_CLI_WORKSPACE_FILE_WRITE_FLAGS = ("--text", "--json")
PUBLIC_CLI_WORKSPACE_PATCH_APPLY_FLAGS = ("--patch", "--json")
PUBLIC_CLI_WORKSPACE_RESET_FLAGS = ("--snapshot", "--json")
PUBLIC_CLI_WORKSPACE_SERVICE_LIST_FLAGS = ("--json",)
PUBLIC_CLI_WORKSPACE_SERVICE_LOGS_FLAGS = ("--tail-lines", "--all", "--json")
@ -90,6 +98,7 @@ PUBLIC_CLI_RUN_FLAGS = (
)
PUBLIC_SDK_METHODS = (
"apply_workspace_patch",
"close_shell",
"create_server",
"create_snapshot",
@ -108,6 +117,7 @@ PUBLIC_SDK_METHODS = (
"list_services",
"list_snapshots",
"list_workspace_disk",
"list_workspace_files",
"logs_service",
"logs_workspace",
"network_info_vm",
@ -117,6 +127,7 @@ PUBLIC_SDK_METHODS = (
"push_workspace_sync",
"read_shell",
"read_workspace_disk",
"read_workspace_file",
"reap_expired",
"reset_workspace",
"run_in_vm",
@ -131,6 +142,7 @@ PUBLIC_SDK_METHODS = (
"stop_vm",
"stop_workspace",
"write_shell",
"write_workspace_file",
)
PUBLIC_MCP_TOOLS = (
@ -165,7 +177,11 @@ PUBLIC_MCP_TOOLS = (
"workspace_diff",
"workspace_exec",
"workspace_export",
"workspace_file_list",
"workspace_file_read",
"workspace_file_write",
"workspace_logs",
"workspace_patch_apply",
"workspace_reset",
"workspace_start",
"workspace_status",

View file

@ -3,6 +3,7 @@
from __future__ import annotations
import base64
import codecs
import fcntl
import io
@ -31,6 +32,7 @@ WORKSPACE_ROOT = PurePosixPath("/workspace")
SHELL_ROOT = Path("/run/pyro-shells")
SERVICE_ROOT = Path("/run/pyro-services")
SECRET_ROOT = Path("/run/pyro-secrets")
WORKSPACE_FILE_MAX_BYTES = 1024 * 1024
SERVICE_NAME_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]{0,63}$")
SHELL_SIGNAL_MAP = {
"HUP": signal.SIGHUP,
@ -328,6 +330,153 @@ def _prepare_export_archive(path: str) -> dict[str, Any]:
raise
def _workspace_entry(path_text: str, host_path: Path) -> dict[str, Any]:
try:
stat_result = os.lstat(host_path)
except FileNotFoundError as exc:
raise RuntimeError(f"workspace path does not exist: {path_text}") from exc
if host_path.is_symlink():
return {
"path": path_text,
"artifact_type": "symlink",
"size_bytes": stat_result.st_size,
"link_target": os.readlink(host_path),
}
if host_path.is_dir():
return {
"path": path_text,
"artifact_type": "directory",
"size_bytes": 0,
"link_target": None,
}
if host_path.is_file():
return {
"path": path_text,
"artifact_type": "file",
"size_bytes": stat_result.st_size,
"link_target": None,
}
raise RuntimeError(f"unsupported workspace path type: {path_text}")
def _join_workspace_path(base: str, child_name: str) -> str:
base_path = PurePosixPath(base)
return str(base_path / child_name) if str(base_path) != "/" else f"/{child_name}"
def _list_workspace(path: str, *, recursive: bool) -> dict[str, Any]:
normalized_path, host_path = _normalize_destination(path)
entry = _workspace_entry(str(normalized_path), host_path)
if entry["artifact_type"] != "directory":
return {
"path": str(normalized_path),
"artifact_type": entry["artifact_type"],
"entries": [entry],
}
entries: list[dict[str, Any]] = []
def walk(current_path: str, current_host_path: Path) -> None:
children: list[tuple[dict[str, Any], Path]] = []
with os.scandir(current_host_path) as iterator:
for child in iterator:
child_host_path = Path(child.path)
children.append(
(
_workspace_entry(
_join_workspace_path(current_path, child.name),
child_host_path,
),
child_host_path,
)
)
children.sort(key=lambda item: str(item[0]["path"]))
for child_entry, child_host_path in children:
entries.append(child_entry)
if recursive and child_entry["artifact_type"] == "directory":
walk(str(child_entry["path"]), child_host_path)
walk(str(normalized_path), host_path)
return {
"path": str(normalized_path),
"artifact_type": "directory",
"entries": entries,
}
def _read_workspace_file(path: str, *, max_bytes: int) -> dict[str, Any]:
if max_bytes <= 0:
raise RuntimeError("max_bytes must be positive")
if max_bytes > WORKSPACE_FILE_MAX_BYTES:
raise RuntimeError(
f"max_bytes must be at most {WORKSPACE_FILE_MAX_BYTES} bytes"
)
normalized_path, host_path = _normalize_destination(path)
entry = _workspace_entry(str(normalized_path), host_path)
if entry["artifact_type"] != "file":
raise RuntimeError("workspace file read only supports regular files")
raw_bytes = host_path.read_bytes()
if len(raw_bytes) > max_bytes:
raise RuntimeError(
f"workspace file exceeds the maximum supported size of {max_bytes} bytes"
)
return {
"path": str(normalized_path),
"size_bytes": len(raw_bytes),
"content_b64": base64.b64encode(raw_bytes).decode("ascii"),
}
def _ensure_no_symlink_parents_for_write(root: Path, target_path: Path, path_text: str) -> None:
relative_path = target_path.relative_to(root)
current = root
for part in relative_path.parts[:-1]:
current = current / part
if current.is_symlink():
raise RuntimeError(
f"workspace path would traverse through a symlinked parent: {path_text}"
)
def _write_workspace_file(path: str, *, text: str) -> dict[str, Any]:
raw_bytes = text.encode("utf-8")
if len(raw_bytes) > WORKSPACE_FILE_MAX_BYTES:
raise RuntimeError(
f"text must be at most {WORKSPACE_FILE_MAX_BYTES} bytes when encoded as UTF-8"
)
normalized_path, host_path = _normalize_destination(path)
_ensure_no_symlink_parents_for_write(Path("/workspace"), host_path, str(normalized_path))
if host_path.exists() or host_path.is_symlink():
entry = _workspace_entry(str(normalized_path), host_path)
if entry["artifact_type"] != "file":
raise RuntimeError("workspace file write only supports regular file targets")
host_path.parent.mkdir(parents=True, exist_ok=True)
with tempfile.NamedTemporaryFile(
prefix=".pyro-workspace-write-",
dir=host_path.parent,
delete=False,
) as handle:
temp_path = Path(handle.name)
handle.write(raw_bytes)
os.replace(temp_path, host_path)
return {
"path": str(normalized_path),
"size_bytes": len(raw_bytes),
"bytes_written": len(raw_bytes),
}
def _delete_workspace_path(path: str) -> dict[str, Any]:
normalized_path, host_path = _normalize_destination(path)
entry = _workspace_entry(str(normalized_path), host_path)
if entry["artifact_type"] == "directory":
raise RuntimeError("workspace file delete does not support directories")
host_path.unlink(missing_ok=False)
return {
"path": str(normalized_path),
"deleted": True,
}
def _run_command(
command: str,
timeout_seconds: int,
@ -931,6 +1080,23 @@ def _dispatch(request: dict[str, Any], conn: socket.socket) -> dict[str, Any]:
raise RuntimeError("archive_size must not be negative")
payload = _read_exact(conn, archive_size)
return _install_secrets_archive(payload)
if action == "list_workspace":
return _list_workspace(
str(request.get("path", "/workspace")),
recursive=bool(request.get("recursive", False)),
)
if action == "read_workspace_file":
return _read_workspace_file(
str(request.get("path", "/workspace")),
max_bytes=int(request.get("max_bytes", WORKSPACE_FILE_MAX_BYTES)),
)
if action == "write_workspace_file":
return _write_workspace_file(
str(request.get("path", "/workspace")),
text=str(request.get("text", "")),
)
if action == "delete_workspace_path":
return _delete_workspace_path(str(request.get("path", "/workspace")))
if action == "open_shell":
shell_id = str(request.get("shell_id", "")).strip()
if shell_id == "":

View file

@ -25,7 +25,7 @@
"guest": {
"agent": {
"path": "guest/pyro_guest_agent.py",
"sha256": "76a0bd05b523bb952ab9eaf5a3f2e0cbf1fc458d1e44894e2c0d206b05896328"
"sha256": "81fe2523a40f9e88ee38601292b25919059be7faa049c9d02e9466453319c7dd"
},
"init": {
"path": "guest/pyro-init",

View file

@ -19,7 +19,7 @@ from typing import Any
from pyro_mcp.runtime import DEFAULT_PLATFORM, RuntimePaths
DEFAULT_ENVIRONMENT_VERSION = "1.0.0"
DEFAULT_CATALOG_VERSION = "3.1.0"
DEFAULT_CATALOG_VERSION = "3.2.0"
OCI_MANIFEST_ACCEPT = ", ".join(
(
"application/vnd.oci.image.index.v1+json",

View file

@ -2,6 +2,7 @@
from __future__ import annotations
import base64
import json
import socket
from dataclasses import dataclass
@ -47,6 +48,13 @@ class GuestArchiveExportResponse:
bytes_written: int
@dataclass(frozen=True)
class GuestWorkspaceFileReadResponse:
path: str
size_bytes: int
content_bytes: bytes
@dataclass(frozen=True)
class GuestShellSummary:
shell_id: str
@ -218,6 +226,102 @@ class VsockExecClient:
bytes_written=int(payload.get("bytes_written", 0)),
)
def list_workspace_entries(
self,
guest_cid: int,
port: int,
*,
workspace_path: str,
recursive: bool,
timeout_seconds: int = 30,
uds_path: str | None = None,
) -> dict[str, Any]:
return self._request_json(
guest_cid,
port,
{
"action": "list_workspace",
"path": workspace_path,
"recursive": recursive,
},
timeout_seconds=timeout_seconds,
uds_path=uds_path,
error_message="guest workspace file list response must be a JSON object",
)
def read_workspace_file(
self,
guest_cid: int,
port: int,
*,
workspace_path: str,
max_bytes: int,
timeout_seconds: int = 30,
uds_path: str | None = None,
) -> dict[str, Any]:
payload = self._request_json(
guest_cid,
port,
{
"action": "read_workspace_file",
"path": workspace_path,
"max_bytes": max_bytes,
},
timeout_seconds=timeout_seconds,
uds_path=uds_path,
error_message="guest workspace file read response must be a JSON object",
)
raw_content = payload.get("content_b64", "")
if not isinstance(raw_content, str):
raise RuntimeError("guest workspace file read response is missing content_b64")
payload["content_bytes"] = base64.b64decode(raw_content.encode("ascii"), validate=True)
payload.pop("content_b64", None)
return payload
def write_workspace_file(
self,
guest_cid: int,
port: int,
*,
workspace_path: str,
text: str,
timeout_seconds: int = 30,
uds_path: str | None = None,
) -> dict[str, Any]:
return self._request_json(
guest_cid,
port,
{
"action": "write_workspace_file",
"path": workspace_path,
"text": text,
},
timeout_seconds=timeout_seconds,
uds_path=uds_path,
error_message="guest workspace file write response must be a JSON object",
)
def delete_workspace_path(
self,
guest_cid: int,
port: int,
*,
workspace_path: str,
timeout_seconds: int = 30,
uds_path: str | None = None,
) -> dict[str, Any]:
return self._request_json(
guest_cid,
port,
{
"action": "delete_workspace_path",
"path": workspace_path,
},
timeout_seconds=timeout_seconds,
uds_path=uds_path,
error_message="guest workspace path delete response must be a JSON object",
)
def open_shell(
self,
guest_cid: int,

View file

@ -40,6 +40,25 @@ from pyro_mcp.workspace_disk import (
read_workspace_disk_file,
scrub_workspace_runtime_paths,
)
from pyro_mcp.workspace_files import (
DEFAULT_WORKSPACE_FILE_READ_MAX_BYTES as DEFAULT_WORKSPACE_FILE_READ_LIMIT,
)
from pyro_mcp.workspace_files import (
WORKSPACE_FILE_MAX_BYTES as WORKSPACE_FILE_MAX_LIMIT,
)
from pyro_mcp.workspace_files import (
WORKSPACE_PATCH_MAX_BYTES as WORKSPACE_PATCH_MAX_LIMIT,
)
from pyro_mcp.workspace_files import (
WorkspaceTextPatch,
apply_unified_text_patch,
delete_workspace_path,
list_workspace_files,
normalize_workspace_path,
parse_unified_text_patch,
read_workspace_file,
write_workspace_file,
)
from pyro_mcp.workspace_ports import DEFAULT_PUBLISHED_PORT_HOST
from pyro_mcp.workspace_shells import (
create_local_shell,
@ -79,6 +98,9 @@ DEFAULT_SHELL_COLS = 120
DEFAULT_SHELL_ROWS = 30
DEFAULT_SHELL_MAX_CHARS = 65536
DEFAULT_WORKSPACE_DISK_READ_MAX_BYTES = 65536
DEFAULT_WORKSPACE_FILE_READ_MAX_BYTES = DEFAULT_WORKSPACE_FILE_READ_LIMIT
WORKSPACE_FILE_MAX_BYTES = WORKSPACE_FILE_MAX_LIMIT
WORKSPACE_PATCH_MAX_BYTES = WORKSPACE_PATCH_MAX_LIMIT
DEFAULT_SERVICE_READY_TIMEOUT_SECONDS = 30
DEFAULT_SERVICE_READY_INTERVAL_MS = 500
DEFAULT_SERVICE_LOG_TAIL_LINES = 200
@ -818,6 +840,49 @@ def _normalize_workspace_disk_path(path: str) -> str:
return normalized
def _normalize_workspace_file_path(path: str) -> str:
return normalize_workspace_path(path)
def _validate_workspace_file_read_max_bytes(max_bytes: int) -> int:
if max_bytes <= 0:
raise ValueError("max_bytes must be positive")
if max_bytes > WORKSPACE_FILE_MAX_BYTES:
raise ValueError(
f"max_bytes must be at most {WORKSPACE_FILE_MAX_BYTES} bytes"
)
return max_bytes
def _validate_workspace_text_payload(text: str, *, field_name: str) -> str:
encoded = text.encode("utf-8")
if len(encoded) > WORKSPACE_FILE_MAX_BYTES:
raise ValueError(
f"{field_name} must be at most {WORKSPACE_FILE_MAX_BYTES} bytes when encoded as UTF-8"
)
return text
def _validate_workspace_patch_text(patch: str) -> str:
if patch.strip() == "":
raise ValueError("patch must not be empty")
encoded = patch.encode("utf-8")
if len(encoded) > WORKSPACE_PATCH_MAX_BYTES:
raise ValueError(
f"patch must be at most {WORKSPACE_PATCH_MAX_BYTES} bytes when encoded as UTF-8"
)
return patch
def _decode_workspace_patch_text(path: str, content_bytes: bytes) -> str:
try:
return content_bytes.decode("utf-8")
except UnicodeDecodeError as exc:
raise RuntimeError(
f"workspace patch only supports UTF-8 text files: {path}"
) from exc
def _normalize_archive_member_name(name: str) -> PurePosixPath:
candidate = name.strip()
if candidate == "":
@ -2077,6 +2142,41 @@ class VmBackend:
) -> dict[str, Any]:
raise NotImplementedError
def list_workspace_entries( # pragma: no cover
self,
instance: VmInstance,
*,
workspace_path: str,
recursive: bool,
) -> dict[str, Any]:
raise NotImplementedError
def read_workspace_file( # pragma: no cover
self,
instance: VmInstance,
*,
workspace_path: str,
max_bytes: int,
) -> dict[str, Any]:
raise NotImplementedError
def write_workspace_file( # pragma: no cover
self,
instance: VmInstance,
*,
workspace_path: str,
text: str,
) -> dict[str, Any]:
raise NotImplementedError
def delete_workspace_path( # pragma: no cover
self,
instance: VmInstance,
*,
workspace_path: str,
) -> dict[str, Any]:
raise NotImplementedError
def open_shell( # pragma: no cover
self,
instance: VmInstance,
@ -2256,6 +2356,79 @@ class MockBackend(VmBackend):
"execution_mode": "host_compat",
}
def list_workspace_entries(
self,
instance: VmInstance,
*,
workspace_path: str,
recursive: bool,
) -> dict[str, Any]:
listing = list_workspace_files(
_instance_workspace_host_dir(instance),
workspace_path=workspace_path,
recursive=recursive,
)
return {
"path": listing.path,
"artifact_type": listing.artifact_type,
"entries": [entry.to_payload() for entry in listing.entries],
"execution_mode": "host_compat",
}
def read_workspace_file(
self,
instance: VmInstance,
*,
workspace_path: str,
max_bytes: int,
) -> dict[str, Any]:
file_result = read_workspace_file(
_instance_workspace_host_dir(instance),
workspace_path=workspace_path,
max_bytes=max_bytes,
)
return {
"path": file_result.path,
"size_bytes": file_result.size_bytes,
"content_bytes": file_result.content_bytes,
"execution_mode": "host_compat",
}
def write_workspace_file(
self,
instance: VmInstance,
*,
workspace_path: str,
text: str,
) -> dict[str, Any]:
result = write_workspace_file(
_instance_workspace_host_dir(instance),
workspace_path=workspace_path,
text=text,
)
return {
"path": result.path,
"size_bytes": result.size_bytes,
"bytes_written": result.bytes_written,
"execution_mode": "host_compat",
}
def delete_workspace_path(
self,
instance: VmInstance,
*,
workspace_path: str,
) -> dict[str, Any]:
result = delete_workspace_path(
_instance_workspace_host_dir(instance),
workspace_path=workspace_path,
)
return {
"path": result.path,
"deleted": result.deleted,
"execution_mode": "host_compat",
}
def open_shell(
self,
instance: VmInstance,
@ -2776,6 +2949,134 @@ class FirecrackerBackend(VmBackend): # pragma: no cover
"execution_mode": "host_compat",
}
def list_workspace_entries(
self,
instance: VmInstance,
*,
workspace_path: str,
recursive: bool,
) -> dict[str, Any]:
if self._runtime_capabilities.supports_guest_exec:
guest_cid = int(instance.metadata["guest_cid"])
port = int(instance.metadata["guest_exec_port"])
uds_path = instance.metadata.get("guest_exec_uds_path")
payload = self._guest_exec_client.list_workspace_entries(
guest_cid,
port,
workspace_path=workspace_path,
recursive=recursive,
uds_path=uds_path,
)
payload["execution_mode"] = instance.metadata.get("execution_mode", "pending")
return payload
instance.metadata["execution_mode"] = "host_compat"
listing = list_workspace_files(
_instance_workspace_host_dir(instance),
workspace_path=workspace_path,
recursive=recursive,
)
return {
"path": listing.path,
"artifact_type": listing.artifact_type,
"entries": [entry.to_payload() for entry in listing.entries],
"execution_mode": "host_compat",
}
def read_workspace_file(
self,
instance: VmInstance,
*,
workspace_path: str,
max_bytes: int,
) -> dict[str, Any]:
if self._runtime_capabilities.supports_guest_exec:
guest_cid = int(instance.metadata["guest_cid"])
port = int(instance.metadata["guest_exec_port"])
uds_path = instance.metadata.get("guest_exec_uds_path")
payload = self._guest_exec_client.read_workspace_file(
guest_cid,
port,
workspace_path=workspace_path,
max_bytes=max_bytes,
uds_path=uds_path,
)
payload["execution_mode"] = instance.metadata.get("execution_mode", "pending")
return payload
instance.metadata["execution_mode"] = "host_compat"
file_result = read_workspace_file(
_instance_workspace_host_dir(instance),
workspace_path=workspace_path,
max_bytes=max_bytes,
)
return {
"path": file_result.path,
"size_bytes": file_result.size_bytes,
"content_bytes": file_result.content_bytes,
"execution_mode": "host_compat",
}
def write_workspace_file(
self,
instance: VmInstance,
*,
workspace_path: str,
text: str,
) -> dict[str, Any]:
if self._runtime_capabilities.supports_guest_exec:
guest_cid = int(instance.metadata["guest_cid"])
port = int(instance.metadata["guest_exec_port"])
uds_path = instance.metadata.get("guest_exec_uds_path")
payload = self._guest_exec_client.write_workspace_file(
guest_cid,
port,
workspace_path=workspace_path,
text=text,
uds_path=uds_path,
)
payload["execution_mode"] = instance.metadata.get("execution_mode", "pending")
return payload
instance.metadata["execution_mode"] = "host_compat"
result = write_workspace_file(
_instance_workspace_host_dir(instance),
workspace_path=workspace_path,
text=text,
)
return {
"path": result.path,
"size_bytes": result.size_bytes,
"bytes_written": result.bytes_written,
"execution_mode": "host_compat",
}
def delete_workspace_path(
self,
instance: VmInstance,
*,
workspace_path: str,
) -> dict[str, Any]:
if self._runtime_capabilities.supports_guest_exec:
guest_cid = int(instance.metadata["guest_cid"])
port = int(instance.metadata["guest_exec_port"])
uds_path = instance.metadata.get("guest_exec_uds_path")
payload = self._guest_exec_client.delete_workspace_path(
guest_cid,
port,
workspace_path=workspace_path,
uds_path=uds_path,
)
payload["execution_mode"] = instance.metadata.get("execution_mode", "pending")
return payload
instance.metadata["execution_mode"] = "host_compat"
result = delete_workspace_path(
_instance_workspace_host_dir(instance),
workspace_path=workspace_path,
)
return {
"path": result.path,
"deleted": result.deleted,
"execution_mode": "host_compat",
}
def open_shell(
self,
instance: VmInstance,
@ -3585,6 +3886,235 @@ class VmManager:
diff_payload["workspace_id"] = workspace_id
return diff_payload
def list_workspace_files(
self,
workspace_id: str,
*,
path: str = WORKSPACE_GUEST_PATH,
recursive: bool = False,
) -> dict[str, Any]:
normalized_path = _normalize_workspace_file_path(path)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
instance = self._workspace_instance_for_live_operation_locked(
workspace,
operation_name="workspace_file_list",
)
listing = self._backend.list_workspace_entries(
instance,
workspace_path=normalized_path,
recursive=recursive,
)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = instance.last_error
workspace.metadata = dict(instance.metadata)
self._save_workspace_locked(workspace)
return {
"workspace_id": workspace_id,
"path": str(listing["path"]),
"recursive": recursive,
"entries": cast(list[dict[str, Any]], list(listing.get("entries", []))),
"execution_mode": str(
listing.get("execution_mode", instance.metadata.get("execution_mode", "pending"))
),
}
def read_workspace_file(
self,
workspace_id: str,
path: str,
*,
max_bytes: int = DEFAULT_WORKSPACE_FILE_READ_MAX_BYTES,
) -> dict[str, Any]:
normalized_path = _normalize_workspace_file_path(path)
normalized_max_bytes = _validate_workspace_file_read_max_bytes(max_bytes)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
instance = self._workspace_instance_for_live_operation_locked(
workspace,
operation_name="workspace_file_read",
)
payload = self._backend.read_workspace_file(
instance,
workspace_path=normalized_path,
max_bytes=WORKSPACE_FILE_MAX_BYTES,
)
raw_bytes = cast(bytes, payload["content_bytes"])
content = raw_bytes[:normalized_max_bytes].decode("utf-8", errors="replace")
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = instance.last_error
workspace.metadata = dict(instance.metadata)
self._save_workspace_locked(workspace)
return {
"workspace_id": workspace_id,
"path": str(payload["path"]),
"size_bytes": int(payload["size_bytes"]),
"max_bytes": normalized_max_bytes,
"content": content,
"truncated": len(raw_bytes) > normalized_max_bytes,
"execution_mode": str(
payload.get("execution_mode", instance.metadata.get("execution_mode", "pending"))
),
}
def write_workspace_file(
self,
workspace_id: str,
path: str,
*,
text: str,
) -> dict[str, Any]:
normalized_path = _normalize_workspace_file_path(path)
normalized_text = _validate_workspace_text_payload(text, field_name="text")
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
instance = self._workspace_instance_for_live_operation_locked(
workspace,
operation_name="workspace_file_write",
)
payload = self._backend.write_workspace_file(
instance,
workspace_path=normalized_path,
text=normalized_text,
)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = instance.last_error
workspace.metadata = dict(instance.metadata)
self._save_workspace_locked(workspace)
return {
"workspace_id": workspace_id,
"path": str(payload["path"]),
"size_bytes": int(payload["size_bytes"]),
"bytes_written": int(payload["bytes_written"]),
"execution_mode": str(
payload.get("execution_mode", instance.metadata.get("execution_mode", "pending"))
),
}
def apply_workspace_patch(
self,
workspace_id: str,
*,
patch: str,
) -> dict[str, Any]:
patch_text = _validate_workspace_patch_text(patch)
parsed_patches = parse_unified_text_patch(patch_text)
patch_by_path: dict[str, WorkspaceTextPatch] = {}
for text_patch in parsed_patches:
if text_patch.path in patch_by_path:
raise ValueError(f"patch contains duplicate file entries for {text_patch.path}")
patch_by_path[text_patch.path] = text_patch
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
instance = self._workspace_instance_for_live_operation_locked(
workspace,
operation_name="workspace_patch_apply",
)
planned_writes: dict[str, str] = {}
planned_deletes: list[str] = []
summary = {
"total": 0,
"added": 0,
"modified": 0,
"deleted": 0,
}
entries: list[dict[str, str]] = []
for path_text in sorted(patch_by_path):
file_patch = patch_by_path[path_text]
listing: dict[str, Any] | None = None
current_text: str | None = None
exists = True
try:
listing = self._backend.list_workspace_entries(
instance,
workspace_path=file_patch.path,
recursive=False,
)
except RuntimeError as exc:
if "does not exist" in str(exc):
exists = False
else:
raise
if exists:
if listing is None:
raise RuntimeError(
f"workspace patch could not inspect current path: {file_patch.path}"
)
artifact_type = str(listing["artifact_type"])
if artifact_type != "file":
raise RuntimeError(
f"workspace patch only supports regular files: {file_patch.path}"
)
current_payload = self._backend.read_workspace_file(
instance,
workspace_path=file_patch.path,
max_bytes=WORKSPACE_FILE_MAX_BYTES,
)
current_text = _decode_workspace_patch_text(
file_patch.path,
cast(bytes, current_payload["content_bytes"]),
)
if file_patch.status == "added" and exists:
raise RuntimeError(
f"workspace patch cannot add an existing path: {file_patch.path}"
)
if file_patch.status in {"modified", "deleted"} and not exists:
raise RuntimeError(
f"workspace patch cannot modify a missing path: {file_patch.path}"
)
after_text = apply_unified_text_patch(
path=file_patch.path,
patch=file_patch,
before_text=current_text,
)
if after_text is None:
planned_deletes.append(file_patch.path)
else:
planned_writes[file_patch.path] = after_text
summary["total"] += 1
summary[file_patch.status] += 1
entries.append({"path": file_patch.path, "status": file_patch.status})
for path_text in sorted(planned_writes):
self._backend.write_workspace_file(
instance,
workspace_path=path_text,
text=planned_writes[path_text],
)
for path_text in sorted(planned_deletes):
self._backend.delete_workspace_path(
instance,
workspace_path=path_text,
)
with self._lock:
workspace = self._load_workspace_locked(workspace_id)
workspace.state = instance.state
workspace.firecracker_pid = instance.firecracker_pid
workspace.last_error = instance.last_error
workspace.metadata = dict(instance.metadata)
self._save_workspace_locked(workspace)
return {
"workspace_id": workspace_id,
"changed": bool(entries),
"summary": summary,
"entries": entries,
"patch": patch_text,
"execution_mode": instance.metadata.get("execution_mode", "pending"),
}
def create_snapshot(
self,
workspace_id: str,

View file

@ -0,0 +1,456 @@
"""Live workspace file operations and unified text patch helpers."""
from __future__ import annotations
import os
import re
import tempfile
from dataclasses import dataclass
from pathlib import Path, PurePosixPath
from typing import Literal
WORKSPACE_ROOT = PurePosixPath("/workspace")
DEFAULT_WORKSPACE_FILE_READ_MAX_BYTES = 65536
WORKSPACE_FILE_MAX_BYTES = 1024 * 1024
WORKSPACE_PATCH_MAX_BYTES = 1024 * 1024
WorkspaceFileArtifactType = Literal["file", "directory", "symlink"]
WorkspacePatchStatus = Literal["added", "modified", "deleted"]
_PATCH_HUNK_RE = re.compile(
r"^@@ -(?P<old_start>\d+)(?:,(?P<old_count>\d+))? "
r"\+(?P<new_start>\d+)(?:,(?P<new_count>\d+))? @@"
)
@dataclass(frozen=True)
class WorkspaceFileEntry:
path: str
artifact_type: WorkspaceFileArtifactType
size_bytes: int
link_target: str | None = None
def to_payload(self) -> dict[str, str | int | None]:
return {
"path": self.path,
"artifact_type": self.artifact_type,
"size_bytes": self.size_bytes,
"link_target": self.link_target,
}
@dataclass(frozen=True)
class WorkspacePathListing:
path: str
artifact_type: WorkspaceFileArtifactType
entries: list[WorkspaceFileEntry]
@dataclass(frozen=True)
class WorkspaceFileReadResult:
path: str
size_bytes: int
content_bytes: bytes
@dataclass(frozen=True)
class WorkspaceFileWriteResult:
path: str
size_bytes: int
bytes_written: int
@dataclass(frozen=True)
class WorkspaceFileDeleteResult:
path: str
deleted: bool
@dataclass(frozen=True)
class WorkspacePatchHunk:
old_start: int
old_count: int
new_start: int
new_count: int
lines: list[str]
@dataclass(frozen=True)
class WorkspaceTextPatch:
path: str
status: WorkspacePatchStatus
hunks: list[WorkspacePatchHunk]
def list_workspace_files(
workspace_dir: Path,
*,
workspace_path: str,
recursive: bool,
) -> WorkspacePathListing:
normalized_path, host_path = _workspace_host_path(workspace_dir, workspace_path)
entry = _entry_for_host_path(normalized_path, host_path)
if entry.artifact_type != "directory":
return WorkspacePathListing(
path=entry.path,
artifact_type=entry.artifact_type,
entries=[entry],
)
entries: list[WorkspaceFileEntry] = []
def walk(current_path: str, current_host_path: Path) -> None:
children: list[WorkspaceFileEntry] = []
with os.scandir(current_host_path) as iterator:
for child in iterator:
child_entry = _entry_for_host_path(
_join_workspace_path(current_path, child.name),
Path(child.path),
)
children.append(child_entry)
children.sort(key=lambda item: item.path)
for child_entry in children:
entries.append(child_entry)
if recursive and child_entry.artifact_type == "directory":
walk(child_entry.path, workspace_host_path(workspace_dir, child_entry.path))
walk(normalized_path, host_path)
return WorkspacePathListing(path=normalized_path, artifact_type="directory", entries=entries)
def read_workspace_file(
workspace_dir: Path,
*,
workspace_path: str,
max_bytes: int = WORKSPACE_FILE_MAX_BYTES,
) -> WorkspaceFileReadResult:
_validate_max_bytes(max_bytes)
normalized_path, host_path = _workspace_host_path(workspace_dir, workspace_path)
entry = _entry_for_host_path(normalized_path, host_path)
if entry.artifact_type != "file":
raise RuntimeError("workspace file read only supports regular files")
raw_bytes = host_path.read_bytes()
if len(raw_bytes) > max_bytes:
raise RuntimeError(
f"workspace file exceeds the maximum supported size of {max_bytes} bytes"
)
return WorkspaceFileReadResult(
path=normalized_path,
size_bytes=len(raw_bytes),
content_bytes=raw_bytes,
)
def write_workspace_file(
workspace_dir: Path,
*,
workspace_path: str,
text: str,
) -> WorkspaceFileWriteResult:
encoded = text.encode("utf-8")
if len(encoded) > WORKSPACE_FILE_MAX_BYTES:
raise ValueError(
f"text must be at most {WORKSPACE_FILE_MAX_BYTES} bytes when encoded as UTF-8"
)
normalized_path, host_path = _workspace_host_path(workspace_dir, workspace_path)
_ensure_no_symlink_parents(workspace_dir, host_path, normalized_path)
if host_path.exists() or host_path.is_symlink():
entry = _entry_for_host_path(normalized_path, host_path)
if entry.artifact_type != "file":
raise RuntimeError("workspace file write only supports regular file targets")
host_path.parent.mkdir(parents=True, exist_ok=True)
with tempfile.NamedTemporaryFile(
prefix=".pyro-workspace-write-",
dir=host_path.parent,
delete=False,
) as handle:
temp_path = Path(handle.name)
handle.write(encoded)
os.replace(temp_path, host_path)
return WorkspaceFileWriteResult(
path=normalized_path,
size_bytes=len(encoded),
bytes_written=len(encoded),
)
def delete_workspace_path(
workspace_dir: Path,
*,
workspace_path: str,
) -> WorkspaceFileDeleteResult:
normalized_path, host_path = _workspace_host_path(workspace_dir, workspace_path)
entry = _entry_for_host_path(normalized_path, host_path)
if entry.artifact_type == "directory":
raise RuntimeError("workspace file delete does not support directories")
host_path.unlink(missing_ok=False)
return WorkspaceFileDeleteResult(path=normalized_path, deleted=True)
def parse_unified_text_patch(patch_text: str) -> list[WorkspaceTextPatch]:
encoded = patch_text.encode("utf-8")
if len(encoded) > WORKSPACE_PATCH_MAX_BYTES:
raise ValueError(
f"patch must be at most {WORKSPACE_PATCH_MAX_BYTES} bytes when encoded as UTF-8"
)
if patch_text.strip() == "":
raise ValueError("patch must not be empty")
lines = patch_text.splitlines(keepends=True)
patches: list[WorkspaceTextPatch] = []
index = 0
while index < len(lines):
line = lines[index]
if line.startswith("diff --git "):
index += 1
continue
if line.startswith("index "):
index += 1
continue
if _is_unsupported_patch_prelude(line):
raise ValueError(f"unsupported patch feature: {line.rstrip()}")
if not line.startswith("--- "):
if line.strip() == "":
index += 1
continue
raise ValueError(f"invalid patch header: {line.rstrip()}")
old_path = _parse_patch_label(line[4:].rstrip("\n"))
index += 1
if index >= len(lines) or not lines[index].startswith("+++ "):
raise ValueError("patch is missing '+++' header")
new_path = _parse_patch_label(lines[index][4:].rstrip("\n"))
index += 1
if old_path is not None and new_path is not None and old_path != new_path:
raise ValueError("rename and copy patches are not supported")
patch_path = new_path or old_path
if patch_path is None:
raise ValueError("patch must target a workspace path")
if old_path is None:
status: WorkspacePatchStatus = "added"
elif new_path is None:
status = "deleted"
else:
status = "modified"
hunks: list[WorkspacePatchHunk] = []
while index < len(lines):
line = lines[index]
if line.startswith("diff --git ") or line.startswith("--- "):
break
if line.startswith("index "):
index += 1
continue
if _is_unsupported_patch_prelude(line):
raise ValueError(f"unsupported patch feature: {line.rstrip()}")
header_match = _PATCH_HUNK_RE.match(line.rstrip("\n"))
if header_match is None:
raise ValueError(f"invalid patch hunk header: {line.rstrip()}")
old_count = int(header_match.group("old_count") or "1")
new_count = int(header_match.group("new_count") or "1")
hunk_lines: list[str] = []
index += 1
while index < len(lines):
hunk_line = lines[index]
if hunk_line.startswith(("diff --git ", "--- ", "@@ ")):
break
if hunk_line.startswith("@@"):
break
if hunk_line.startswith("\\ No newline at end of file"):
index += 1
continue
if not hunk_line.startswith((" ", "+", "-")):
raise ValueError(f"invalid patch hunk line: {hunk_line.rstrip()}")
hunk_lines.append(hunk_line)
index += 1
_validate_hunk_counts(old_count, new_count, hunk_lines)
hunks.append(
WorkspacePatchHunk(
old_start=int(header_match.group("old_start")),
old_count=old_count,
new_start=int(header_match.group("new_start")),
new_count=new_count,
lines=hunk_lines,
)
)
if not hunks:
raise ValueError(f"patch for {patch_path} has no hunks")
patches.append(WorkspaceTextPatch(path=patch_path, status=status, hunks=hunks))
if not patches:
raise ValueError("patch must contain at least one file change")
return patches
def apply_unified_text_patch(
*,
path: str,
patch: WorkspaceTextPatch,
before_text: str | None,
) -> str | None:
before_lines = [] if before_text is None else before_text.splitlines(keepends=True)
output_lines: list[str] = []
cursor = 0
for hunk in patch.hunks:
start_index = 0 if hunk.old_start == 0 else hunk.old_start - 1
if start_index < cursor or start_index > len(before_lines):
raise RuntimeError(f"patch hunk is out of range for {path}")
output_lines.extend(before_lines[cursor:start_index])
local_index = start_index
for hunk_line in hunk.lines:
prefix = hunk_line[:1]
payload = hunk_line[1:]
if prefix in {" ", "-"}:
if local_index >= len(before_lines):
raise RuntimeError(f"patch context does not match for {path}")
if before_lines[local_index] != payload:
raise RuntimeError(f"patch context does not match for {path}")
if prefix == " ":
output_lines.append(payload)
local_index += 1
continue
if prefix == "+":
output_lines.append(payload)
continue
raise RuntimeError(f"invalid patch line prefix for {path}")
cursor = local_index
output_lines.extend(before_lines[cursor:])
after_text = "".join(output_lines)
if patch.status == "deleted":
if after_text != "":
raise RuntimeError(f"delete patch did not remove all content for {path}")
return None
encoded = after_text.encode("utf-8")
if len(encoded) > WORKSPACE_FILE_MAX_BYTES:
raise RuntimeError(
f"patched file {path} exceeds the maximum supported size of "
f"{WORKSPACE_FILE_MAX_BYTES} bytes"
)
return after_text
def workspace_host_path(workspace_dir: Path, workspace_path: str) -> Path:
_, host_path = _workspace_host_path(workspace_dir, workspace_path)
return host_path
def _workspace_host_path(workspace_dir: Path, workspace_path: str) -> tuple[str, Path]:
normalized = normalize_workspace_path(workspace_path)
suffix = PurePosixPath(normalized).relative_to(WORKSPACE_ROOT)
host_path = workspace_dir if str(suffix) in {"", "."} else workspace_dir.joinpath(*suffix.parts)
return normalized, host_path
def normalize_workspace_path(path: str) -> str:
candidate = path.strip()
if candidate == "":
raise ValueError("workspace path must not be empty")
raw_path = PurePosixPath(candidate)
if any(part == ".." for part in raw_path.parts):
raise ValueError("workspace path must stay inside /workspace")
if not raw_path.is_absolute():
raw_path = WORKSPACE_ROOT / raw_path
parts = [part for part in raw_path.parts if part not in {"", "."}]
normalized = PurePosixPath("/") / PurePosixPath(*parts)
if normalized == PurePosixPath("/"):
raise ValueError("workspace path must stay inside /workspace")
if normalized.parts[: len(WORKSPACE_ROOT.parts)] != WORKSPACE_ROOT.parts:
raise ValueError("workspace path must stay inside /workspace")
return str(normalized)
def _entry_for_host_path(guest_path: str, host_path: Path) -> WorkspaceFileEntry:
try:
stat_result = os.lstat(host_path)
except FileNotFoundError as exc:
raise RuntimeError(f"workspace path does not exist: {guest_path}") from exc
if os.path.islink(host_path):
return WorkspaceFileEntry(
path=guest_path,
artifact_type="symlink",
size_bytes=stat_result.st_size,
link_target=os.readlink(host_path),
)
if host_path.is_dir():
return WorkspaceFileEntry(
path=guest_path,
artifact_type="directory",
size_bytes=0,
link_target=None,
)
if host_path.is_file():
return WorkspaceFileEntry(
path=guest_path,
artifact_type="file",
size_bytes=stat_result.st_size,
link_target=None,
)
raise RuntimeError(f"unsupported workspace path type: {guest_path}")
def _join_workspace_path(base: str, child_name: str) -> str:
base_path = PurePosixPath(base)
return str(base_path / child_name) if str(base_path) != "/" else f"/{child_name}"
def _ensure_no_symlink_parents(workspace_dir: Path, target_path: Path, guest_path: str) -> None:
relative_path = target_path.relative_to(workspace_dir)
current = workspace_dir
for part in relative_path.parts[:-1]:
current = current / part
if current.is_symlink():
raise RuntimeError(
f"workspace path would traverse through a symlinked parent: {guest_path}"
)
def _validate_max_bytes(max_bytes: int) -> None:
if max_bytes <= 0:
raise ValueError("max_bytes must be positive")
if max_bytes > WORKSPACE_FILE_MAX_BYTES:
raise ValueError(
f"max_bytes must be at most {WORKSPACE_FILE_MAX_BYTES} bytes"
)
def _is_unsupported_patch_prelude(line: str) -> bool:
return line.startswith(
(
"old mode ",
"new mode ",
"deleted file mode ",
"new file mode ",
"rename from ",
"rename to ",
"copy from ",
"copy to ",
"similarity index ",
"dissimilarity index ",
"GIT binary patch",
"Binary files ",
)
)
def _parse_patch_label(label: str) -> str | None:
raw = label.split("\t", 1)[0].strip()
if raw == "/dev/null":
return None
if raw.startswith(("a/", "b/")):
raw = raw[2:]
if raw.startswith("/workspace/"):
return normalize_workspace_path(raw)
return normalize_workspace_path(raw)
def _validate_hunk_counts(old_count: int, new_count: int, hunk_lines: list[str]) -> None:
old_seen = 0
new_seen = 0
for hunk_line in hunk_lines:
prefix = hunk_line[:1]
if prefix in {" ", "-"}:
old_seen += 1
if prefix in {" ", "+"}:
new_seen += 1
if old_seen != old_count or new_seen != new_count:
raise ValueError("patch hunk line counts do not match the header")