Add workspace export and baseline diff
Complete the 2.6.0 workspace milestone by adding explicit host-out export and immutable-baseline diff across the CLI, Python SDK, and MCP server. Capture a baseline archive at workspace creation, export live /workspace paths through the guest agent, and compute structured whole-workspace diffs on the host without affecting command logs or shell state. The docs, roadmap, bundled guest agent, and workspace example now reflect the new create -> sync -> diff -> export workflow. Validation: uv lock, UV_CACHE_DIR=.uv-cache make check, UV_CACHE_DIR=.uv-cache make dist-check, and a real guest-backed Firecracker smoke covering workspace create, sync push, diff, export, and delete.
This commit is contained in:
parent
3f8293ad24
commit
84a7e18d4d
26 changed files with 1492 additions and 43 deletions
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
import difflib
|
||||
import json
|
||||
import os
|
||||
import shlex
|
||||
|
|
@ -43,7 +44,9 @@ DEFAULT_TIMEOUT_SECONDS = 30
|
|||
DEFAULT_TTL_SECONDS = 600
|
||||
DEFAULT_ALLOW_HOST_COMPAT = False
|
||||
|
||||
WORKSPACE_LAYOUT_VERSION = 3
|
||||
WORKSPACE_LAYOUT_VERSION = 4
|
||||
WORKSPACE_BASELINE_DIRNAME = "baseline"
|
||||
WORKSPACE_BASELINE_ARCHIVE_NAME = "workspace.tar"
|
||||
WORKSPACE_DIRNAME = "workspace"
|
||||
WORKSPACE_COMMANDS_DIRNAME = "commands"
|
||||
WORKSPACE_SHELLS_DIRNAME = "shells"
|
||||
|
|
@ -57,6 +60,7 @@ DEFAULT_SHELL_MAX_CHARS = 65536
|
|||
WORKSPACE_SHELL_SIGNAL_NAMES = shell_signal_names()
|
||||
|
||||
WorkspaceSeedMode = Literal["empty", "directory", "tar_archive"]
|
||||
WorkspaceArtifactType = Literal["file", "directory", "symlink"]
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -287,6 +291,24 @@ class VmExecResult:
|
|||
duration_ms: int
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ExportedWorkspaceArchive:
|
||||
workspace_path: str
|
||||
artifact_type: WorkspaceArtifactType
|
||||
archive_path: Path
|
||||
entry_count: int
|
||||
bytes_written: int
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class WorkspaceTreeEntry:
|
||||
path: str
|
||||
artifact_type: WorkspaceArtifactType
|
||||
disk_path: Path
|
||||
size_bytes: int = 0
|
||||
link_target: str | None = None
|
||||
|
||||
|
||||
def _optional_int(value: object) -> int | None:
|
||||
if value is None:
|
||||
return None
|
||||
|
|
@ -522,6 +544,66 @@ def _write_directory_seed_archive(source_dir: Path, archive_path: Path) -> None:
|
|||
archive.add(child, arcname=child.name, recursive=True)
|
||||
|
||||
|
||||
def _write_empty_seed_archive(archive_path: Path) -> None:
|
||||
archive_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with tarfile.open(archive_path, "w"):
|
||||
pass
|
||||
|
||||
|
||||
def _persist_workspace_baseline(
|
||||
prepared_seed: PreparedWorkspaceSeed,
|
||||
*,
|
||||
baseline_archive_path: Path,
|
||||
) -> None:
|
||||
baseline_archive_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
if prepared_seed.archive_path is None:
|
||||
_write_empty_seed_archive(baseline_archive_path)
|
||||
return
|
||||
shutil.copy2(prepared_seed.archive_path, baseline_archive_path)
|
||||
|
||||
|
||||
def _write_workspace_export_archive(
|
||||
source_path: Path,
|
||||
*,
|
||||
archive_path: Path,
|
||||
) -> WorkspaceArtifactType:
|
||||
archive_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
if source_path.is_symlink():
|
||||
artifact_type: WorkspaceArtifactType = "symlink"
|
||||
elif source_path.is_file():
|
||||
artifact_type = "file"
|
||||
elif source_path.is_dir():
|
||||
artifact_type = "directory"
|
||||
else:
|
||||
raise RuntimeError(f"unsupported workspace path type: {source_path}")
|
||||
|
||||
def validate_source(current_path: Path, relative_path: PurePosixPath) -> None:
|
||||
if current_path.is_symlink():
|
||||
_validate_archive_symlink_target(relative_path, os.readlink(current_path))
|
||||
return
|
||||
if current_path.is_file():
|
||||
return
|
||||
if current_path.is_dir():
|
||||
for child in sorted(current_path.iterdir(), key=lambda item: item.name):
|
||||
validate_source(child, relative_path / child.name)
|
||||
return
|
||||
raise RuntimeError(f"unsupported workspace path type: {current_path}")
|
||||
|
||||
if artifact_type == "directory":
|
||||
for child in sorted(source_path.iterdir(), key=lambda item: item.name):
|
||||
validate_source(child, PurePosixPath(child.name))
|
||||
else:
|
||||
validate_source(source_path, PurePosixPath(source_path.name))
|
||||
with tarfile.open(archive_path, "w") as archive:
|
||||
archive.dereference = False
|
||||
if artifact_type == "directory":
|
||||
for child in sorted(source_path.iterdir(), key=lambda item: item.name):
|
||||
archive.add(child, arcname=child.name, recursive=True)
|
||||
else:
|
||||
archive.add(source_path, arcname=source_path.name, recursive=False)
|
||||
return artifact_type
|
||||
|
||||
|
||||
def _extract_seed_archive_to_host_workspace(
|
||||
archive_path: Path,
|
||||
*,
|
||||
|
|
@ -576,6 +658,120 @@ def _extract_seed_archive_to_host_workspace(
|
|||
}
|
||||
|
||||
|
||||
def _prepare_workspace_export_archive(
|
||||
*,
|
||||
workspace_dir: Path,
|
||||
workspace_path: str,
|
||||
archive_path: Path,
|
||||
) -> ExportedWorkspaceArchive:
|
||||
normalized_workspace_path, _ = _normalize_workspace_destination(workspace_path)
|
||||
source_path = _workspace_host_destination(workspace_dir, normalized_workspace_path)
|
||||
if not source_path.exists() and not source_path.is_symlink():
|
||||
raise RuntimeError(f"workspace path does not exist: {normalized_workspace_path}")
|
||||
artifact_type = _write_workspace_export_archive(source_path, archive_path=archive_path)
|
||||
entry_count, bytes_written = _inspect_seed_archive(archive_path)
|
||||
return ExportedWorkspaceArchive(
|
||||
workspace_path=normalized_workspace_path,
|
||||
artifact_type=artifact_type,
|
||||
archive_path=archive_path,
|
||||
entry_count=entry_count,
|
||||
bytes_written=bytes_written,
|
||||
)
|
||||
|
||||
|
||||
def _extract_workspace_export_archive(
|
||||
archive_path: Path,
|
||||
*,
|
||||
output_path: Path,
|
||||
artifact_type: WorkspaceArtifactType,
|
||||
) -> dict[str, Any]:
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
if output_path.exists() or output_path.is_symlink():
|
||||
raise RuntimeError(f"output_path already exists: {output_path}")
|
||||
|
||||
entry_count = 0
|
||||
bytes_written = 0
|
||||
if artifact_type == "directory":
|
||||
output_path.mkdir(parents=True, exist_ok=False)
|
||||
with tarfile.open(archive_path, "r:*") as archive:
|
||||
for member in archive.getmembers():
|
||||
member_name = _normalize_archive_member_name(member.name)
|
||||
target_path = output_path.joinpath(*member_name.parts)
|
||||
entry_count += 1
|
||||
_ensure_no_symlink_parents(output_path, target_path, member.name)
|
||||
if member.isdir():
|
||||
if target_path.is_symlink() or (
|
||||
target_path.exists() and not target_path.is_dir()
|
||||
):
|
||||
raise RuntimeError(f"directory conflicts with existing path: {member.name}")
|
||||
target_path.mkdir(parents=True, exist_ok=True)
|
||||
continue
|
||||
if member.isfile():
|
||||
target_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
if target_path.is_symlink() or target_path.is_dir():
|
||||
raise RuntimeError(f"file conflicts with existing path: {member.name}")
|
||||
source = archive.extractfile(member)
|
||||
if source is None:
|
||||
raise RuntimeError(f"failed to read archive member: {member.name}")
|
||||
with target_path.open("wb") as handle:
|
||||
shutil.copyfileobj(source, handle)
|
||||
bytes_written += member.size
|
||||
continue
|
||||
if member.issym():
|
||||
_validate_archive_symlink_target(member_name, member.linkname)
|
||||
target_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
if target_path.exists() and not target_path.is_symlink():
|
||||
raise RuntimeError(f"symlink conflicts with existing path: {member.name}")
|
||||
if target_path.is_symlink():
|
||||
target_path.unlink()
|
||||
os.symlink(member.linkname, target_path)
|
||||
continue
|
||||
if member.islnk():
|
||||
raise RuntimeError(
|
||||
f"hard links are not allowed in workspace archives: {member.name}"
|
||||
)
|
||||
raise RuntimeError(f"unsupported archive member type: {member.name}")
|
||||
return {
|
||||
"output_path": str(output_path),
|
||||
"artifact_type": artifact_type,
|
||||
"entry_count": entry_count,
|
||||
"bytes_written": bytes_written,
|
||||
}
|
||||
|
||||
with tarfile.open(archive_path, "r:*") as archive:
|
||||
members = archive.getmembers()
|
||||
if len(members) != 1:
|
||||
raise RuntimeError(
|
||||
"expected exactly one archive member for "
|
||||
f"{artifact_type} export, got {len(members)}"
|
||||
)
|
||||
member = members[0]
|
||||
_normalize_archive_member_name(member.name)
|
||||
entry_count = 1
|
||||
if artifact_type == "file":
|
||||
if not member.isfile():
|
||||
raise RuntimeError("exported archive did not contain a regular file")
|
||||
source = archive.extractfile(member)
|
||||
if source is None:
|
||||
raise RuntimeError(f"failed to read archive member: {member.name}")
|
||||
with output_path.open("wb") as handle:
|
||||
shutil.copyfileobj(source, handle)
|
||||
bytes_written = member.size
|
||||
elif artifact_type == "symlink":
|
||||
if not member.issym():
|
||||
raise RuntimeError("exported archive did not contain a symlink")
|
||||
_validate_archive_symlink_target(PurePosixPath(member.name), member.linkname)
|
||||
os.symlink(member.linkname, output_path)
|
||||
else:
|
||||
raise RuntimeError(f"unsupported artifact type: {artifact_type}")
|
||||
return {
|
||||
"output_path": str(output_path),
|
||||
"artifact_type": artifact_type,
|
||||
"entry_count": entry_count,
|
||||
"bytes_written": bytes_written,
|
||||
}
|
||||
|
||||
|
||||
def _instance_workspace_host_dir(instance: VmInstance) -> Path:
|
||||
raw_value = instance.metadata.get("workspace_host_dir")
|
||||
if raw_value is None or raw_value == "":
|
||||
|
|
@ -640,6 +836,205 @@ def _pid_is_running(pid: int | None) -> bool:
|
|||
return True
|
||||
|
||||
|
||||
def _collect_workspace_tree(root: Path) -> dict[str, WorkspaceTreeEntry]:
|
||||
entries: dict[str, WorkspaceTreeEntry] = {}
|
||||
|
||||
def walk(current: Path, relative_parts: tuple[str, ...] = ()) -> bool:
|
||||
has_entries = False
|
||||
for child in sorted(current.iterdir(), key=lambda item: item.name):
|
||||
child_relative_parts = relative_parts + (child.name,)
|
||||
relative_path = "/".join(child_relative_parts)
|
||||
if child.is_symlink():
|
||||
entries[relative_path] = WorkspaceTreeEntry(
|
||||
path=relative_path,
|
||||
artifact_type="symlink",
|
||||
disk_path=child,
|
||||
link_target=os.readlink(child),
|
||||
)
|
||||
has_entries = True
|
||||
continue
|
||||
if child.is_file():
|
||||
entries[relative_path] = WorkspaceTreeEntry(
|
||||
path=relative_path,
|
||||
artifact_type="file",
|
||||
disk_path=child,
|
||||
size_bytes=child.stat().st_size,
|
||||
)
|
||||
has_entries = True
|
||||
continue
|
||||
if child.is_dir():
|
||||
child_has_entries = walk(child, child_relative_parts)
|
||||
if not child_has_entries:
|
||||
entries[relative_path] = WorkspaceTreeEntry(
|
||||
path=relative_path,
|
||||
artifact_type="directory",
|
||||
disk_path=child,
|
||||
)
|
||||
has_entries = True
|
||||
else:
|
||||
has_entries = True
|
||||
continue
|
||||
raise RuntimeError(f"unsupported workspace artifact type: {child}")
|
||||
return has_entries
|
||||
|
||||
walk(root)
|
||||
return entries
|
||||
|
||||
|
||||
def _is_probably_text(data: bytes) -> bool:
|
||||
if b"\x00" in data:
|
||||
return False
|
||||
try:
|
||||
data.decode("utf-8")
|
||||
except UnicodeDecodeError:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _build_text_patch(
|
||||
*,
|
||||
path: str,
|
||||
before_text: str,
|
||||
after_text: str,
|
||||
status: str,
|
||||
) -> str:
|
||||
if status == "added":
|
||||
fromfile = "/dev/null"
|
||||
tofile = f"b/{path}"
|
||||
elif status == "deleted":
|
||||
fromfile = f"a/{path}"
|
||||
tofile = "/dev/null"
|
||||
else:
|
||||
fromfile = f"a/{path}"
|
||||
tofile = f"b/{path}"
|
||||
lines = list(
|
||||
difflib.unified_diff(
|
||||
before_text.splitlines(keepends=True),
|
||||
after_text.splitlines(keepends=True),
|
||||
fromfile=fromfile,
|
||||
tofile=tofile,
|
||||
n=3,
|
||||
)
|
||||
)
|
||||
if not lines:
|
||||
return ""
|
||||
return "".join(lines)
|
||||
|
||||
|
||||
def _diff_workspace_trees(
|
||||
baseline_root: Path,
|
||||
current_root: Path,
|
||||
) -> dict[str, Any]:
|
||||
baseline_entries = _collect_workspace_tree(baseline_root)
|
||||
current_entries = _collect_workspace_tree(current_root)
|
||||
changed_entries: list[dict[str, Any]] = []
|
||||
patch_parts: list[str] = []
|
||||
summary = {
|
||||
"total": 0,
|
||||
"added": 0,
|
||||
"modified": 0,
|
||||
"deleted": 0,
|
||||
"type_changed": 0,
|
||||
"text_patched": 0,
|
||||
"non_text": 0,
|
||||
}
|
||||
|
||||
for path in sorted(set(baseline_entries) | set(current_entries)):
|
||||
baseline_entry = baseline_entries.get(path)
|
||||
current_entry = current_entries.get(path)
|
||||
entry_payload: dict[str, Any] | None = None
|
||||
text_patch = ""
|
||||
|
||||
if baseline_entry is None and current_entry is not None:
|
||||
entry_payload = {
|
||||
"path": path,
|
||||
"status": "added",
|
||||
"artifact_type": current_entry.artifact_type,
|
||||
"text_patch": None,
|
||||
}
|
||||
if current_entry.artifact_type == "file":
|
||||
current_bytes = current_entry.disk_path.read_bytes()
|
||||
if _is_probably_text(current_bytes):
|
||||
text_patch = _build_text_patch(
|
||||
path=path,
|
||||
before_text="",
|
||||
after_text=current_bytes.decode("utf-8"),
|
||||
status="added",
|
||||
)
|
||||
elif current_entry is None and baseline_entry is not None:
|
||||
entry_payload = {
|
||||
"path": path,
|
||||
"status": "deleted",
|
||||
"artifact_type": baseline_entry.artifact_type,
|
||||
"text_patch": None,
|
||||
}
|
||||
if baseline_entry.artifact_type == "file":
|
||||
baseline_bytes = baseline_entry.disk_path.read_bytes()
|
||||
if _is_probably_text(baseline_bytes):
|
||||
text_patch = _build_text_patch(
|
||||
path=path,
|
||||
before_text=baseline_bytes.decode("utf-8"),
|
||||
after_text="",
|
||||
status="deleted",
|
||||
)
|
||||
elif baseline_entry is not None and current_entry is not None:
|
||||
if baseline_entry.artifact_type != current_entry.artifact_type:
|
||||
entry_payload = {
|
||||
"path": path,
|
||||
"status": "type_changed",
|
||||
"artifact_type": current_entry.artifact_type,
|
||||
"text_patch": None,
|
||||
}
|
||||
elif current_entry.artifact_type == "directory":
|
||||
continue
|
||||
elif current_entry.artifact_type == "symlink":
|
||||
if baseline_entry.link_target != current_entry.link_target:
|
||||
entry_payload = {
|
||||
"path": path,
|
||||
"status": "modified",
|
||||
"artifact_type": current_entry.artifact_type,
|
||||
"text_patch": None,
|
||||
}
|
||||
else:
|
||||
baseline_bytes = baseline_entry.disk_path.read_bytes()
|
||||
current_bytes = current_entry.disk_path.read_bytes()
|
||||
if baseline_bytes == current_bytes:
|
||||
continue
|
||||
entry_payload = {
|
||||
"path": path,
|
||||
"status": "modified",
|
||||
"artifact_type": current_entry.artifact_type,
|
||||
"text_patch": None,
|
||||
}
|
||||
if _is_probably_text(baseline_bytes) and _is_probably_text(current_bytes):
|
||||
text_patch = _build_text_patch(
|
||||
path=path,
|
||||
before_text=baseline_bytes.decode("utf-8"),
|
||||
after_text=current_bytes.decode("utf-8"),
|
||||
status="modified",
|
||||
)
|
||||
|
||||
if entry_payload is None:
|
||||
continue
|
||||
|
||||
summary["total"] += 1
|
||||
summary[str(entry_payload["status"])] += 1
|
||||
if text_patch != "":
|
||||
entry_payload["text_patch"] = text_patch
|
||||
patch_parts.append(text_patch)
|
||||
summary["text_patched"] += 1
|
||||
else:
|
||||
summary["non_text"] += 1
|
||||
changed_entries.append(entry_payload)
|
||||
|
||||
return {
|
||||
"changed": bool(changed_entries),
|
||||
"summary": summary,
|
||||
"entries": changed_entries,
|
||||
"patch": "".join(patch_parts),
|
||||
}
|
||||
|
||||
|
||||
class VmBackend:
|
||||
"""Backend interface for lifecycle operations."""
|
||||
|
||||
|
|
@ -674,6 +1069,15 @@ class VmBackend:
|
|||
) -> dict[str, Any]:
|
||||
raise NotImplementedError
|
||||
|
||||
def export_archive( # pragma: no cover
|
||||
self,
|
||||
instance: VmInstance,
|
||||
*,
|
||||
workspace_path: str,
|
||||
archive_path: Path,
|
||||
) -> dict[str, Any]:
|
||||
raise NotImplementedError
|
||||
|
||||
def open_shell( # pragma: no cover
|
||||
self,
|
||||
instance: VmInstance,
|
||||
|
|
@ -768,6 +1172,26 @@ class MockBackend(VmBackend):
|
|||
destination=destination,
|
||||
)
|
||||
|
||||
def export_archive(
|
||||
self,
|
||||
instance: VmInstance,
|
||||
*,
|
||||
workspace_path: str,
|
||||
archive_path: Path,
|
||||
) -> dict[str, Any]:
|
||||
exported = _prepare_workspace_export_archive(
|
||||
workspace_dir=_instance_workspace_host_dir(instance),
|
||||
workspace_path=workspace_path,
|
||||
archive_path=archive_path,
|
||||
)
|
||||
return {
|
||||
"workspace_path": exported.workspace_path,
|
||||
"artifact_type": exported.artifact_type,
|
||||
"entry_count": exported.entry_count,
|
||||
"bytes_written": exported.bytes_written,
|
||||
"execution_mode": "host_compat",
|
||||
}
|
||||
|
||||
def open_shell(
|
||||
self,
|
||||
instance: VmInstance,
|
||||
|
|
@ -1086,6 +1510,55 @@ class FirecrackerBackend(VmBackend): # pragma: no cover
|
|||
destination=destination,
|
||||
)
|
||||
|
||||
def export_archive(
|
||||
self,
|
||||
instance: VmInstance,
|
||||
*,
|
||||
workspace_path: str,
|
||||
archive_path: Path,
|
||||
) -> dict[str, Any]:
|
||||
if self._runtime_capabilities.supports_guest_exec:
|
||||
guest_cid = int(instance.metadata["guest_cid"])
|
||||
port = int(instance.metadata["guest_exec_port"])
|
||||
uds_path = instance.metadata.get("guest_exec_uds_path")
|
||||
deadline = time.monotonic() + 10
|
||||
while True:
|
||||
try:
|
||||
response = self._guest_exec_client.export_archive(
|
||||
guest_cid,
|
||||
port,
|
||||
workspace_path=workspace_path,
|
||||
archive_path=archive_path,
|
||||
timeout_seconds=WORKSPACE_ARCHIVE_UPLOAD_TIMEOUT_SECONDS,
|
||||
uds_path=uds_path,
|
||||
)
|
||||
return {
|
||||
"workspace_path": response.workspace_path,
|
||||
"artifact_type": response.artifact_type,
|
||||
"entry_count": response.entry_count,
|
||||
"bytes_written": response.bytes_written,
|
||||
"execution_mode": instance.metadata.get("execution_mode", "pending"),
|
||||
}
|
||||
except (OSError, RuntimeError) as exc:
|
||||
if time.monotonic() >= deadline:
|
||||
raise RuntimeError(
|
||||
f"guest export transport did not become ready: {exc}"
|
||||
) from exc
|
||||
time.sleep(0.2)
|
||||
instance.metadata["execution_mode"] = "host_compat"
|
||||
exported = _prepare_workspace_export_archive(
|
||||
workspace_dir=_instance_workspace_host_dir(instance),
|
||||
workspace_path=workspace_path,
|
||||
archive_path=archive_path,
|
||||
)
|
||||
return {
|
||||
"workspace_path": exported.workspace_path,
|
||||
"artifact_type": exported.artifact_type,
|
||||
"entry_count": exported.entry_count,
|
||||
"bytes_written": exported.bytes_written,
|
||||
"execution_mode": "host_compat",
|
||||
}
|
||||
|
||||
def open_shell(
|
||||
self,
|
||||
instance: VmInstance,
|
||||
|
|
@ -1488,10 +1961,15 @@ class VmManager:
|
|||
host_workspace_dir = self._workspace_host_dir(workspace_id)
|
||||
commands_dir = self._workspace_commands_dir(workspace_id)
|
||||
shells_dir = self._workspace_shells_dir(workspace_id)
|
||||
baseline_archive_path = self._workspace_baseline_archive_path(workspace_id)
|
||||
workspace_dir.mkdir(parents=True, exist_ok=False)
|
||||
host_workspace_dir.mkdir(parents=True, exist_ok=True)
|
||||
commands_dir.mkdir(parents=True, exist_ok=True)
|
||||
shells_dir.mkdir(parents=True, exist_ok=True)
|
||||
_persist_workspace_baseline(
|
||||
prepared_seed,
|
||||
baseline_archive_path=baseline_archive_path,
|
||||
)
|
||||
instance = VmInstance(
|
||||
vm_id=workspace_id,
|
||||
environment=environment,
|
||||
|
|
@ -1523,23 +2001,14 @@ class VmManager:
|
|||
self._start_instance_locked(instance)
|
||||
self._require_guest_exec_or_opt_in(instance)
|
||||
workspace_seed = prepared_seed.to_payload()
|
||||
if prepared_seed.archive_path is not None:
|
||||
import_summary = self._backend.import_archive(
|
||||
instance,
|
||||
archive_path=prepared_seed.archive_path,
|
||||
destination=WORKSPACE_GUEST_PATH,
|
||||
)
|
||||
workspace_seed["entry_count"] = int(import_summary["entry_count"])
|
||||
workspace_seed["bytes_written"] = int(import_summary["bytes_written"])
|
||||
workspace_seed["destination"] = str(import_summary["destination"])
|
||||
elif self._runtime_capabilities.supports_guest_exec:
|
||||
self._backend.exec(
|
||||
instance,
|
||||
f"mkdir -p {shlex.quote(WORKSPACE_GUEST_PATH)}",
|
||||
10,
|
||||
)
|
||||
else:
|
||||
instance.metadata["execution_mode"] = "host_compat"
|
||||
import_summary = self._backend.import_archive(
|
||||
instance,
|
||||
archive_path=baseline_archive_path,
|
||||
destination=WORKSPACE_GUEST_PATH,
|
||||
)
|
||||
workspace_seed["entry_count"] = int(import_summary["entry_count"])
|
||||
workspace_seed["bytes_written"] = int(import_summary["bytes_written"])
|
||||
workspace_seed["destination"] = str(import_summary["destination"])
|
||||
workspace = WorkspaceRecord.from_instance(instance, workspace_seed=workspace_seed)
|
||||
self._save_workspace_locked(workspace)
|
||||
return self._serialize_workspace(workspace)
|
||||
|
|
@ -1612,6 +2081,98 @@ class VmManager:
|
|||
"workspace_sync": workspace_sync,
|
||||
}
|
||||
|
||||
def export_workspace(
|
||||
self,
|
||||
workspace_id: str,
|
||||
*,
|
||||
path: str,
|
||||
output_path: str | Path,
|
||||
) -> dict[str, Any]:
|
||||
normalized_path, _ = _normalize_workspace_destination(path)
|
||||
raw_output_path = str(output_path).strip()
|
||||
if raw_output_path == "":
|
||||
raise ValueError("output_path must not be empty")
|
||||
resolved_output_path = Path(output_path).expanduser().resolve()
|
||||
with self._lock:
|
||||
workspace = self._load_workspace_locked(workspace_id)
|
||||
instance = self._workspace_instance_for_live_operation_locked(
|
||||
workspace,
|
||||
operation_name="workspace_export",
|
||||
)
|
||||
with tempfile.TemporaryDirectory(prefix="pyro-workspace-export-") as temp_dir:
|
||||
archive_path = Path(temp_dir) / "workspace-export.tar"
|
||||
exported = self._backend.export_archive(
|
||||
instance,
|
||||
workspace_path=normalized_path,
|
||||
archive_path=archive_path,
|
||||
)
|
||||
extracted = _extract_workspace_export_archive(
|
||||
archive_path,
|
||||
output_path=resolved_output_path,
|
||||
artifact_type=cast(WorkspaceArtifactType, str(exported["artifact_type"])),
|
||||
)
|
||||
with self._lock:
|
||||
workspace = self._load_workspace_locked(workspace_id)
|
||||
workspace.state = instance.state
|
||||
workspace.firecracker_pid = instance.firecracker_pid
|
||||
workspace.last_error = instance.last_error
|
||||
workspace.metadata = dict(instance.metadata)
|
||||
self._save_workspace_locked(workspace)
|
||||
return {
|
||||
"workspace_id": workspace_id,
|
||||
"workspace_path": normalized_path,
|
||||
"output_path": str(Path(str(extracted["output_path"]))),
|
||||
"artifact_type": extracted["artifact_type"],
|
||||
"entry_count": int(extracted["entry_count"]),
|
||||
"bytes_written": int(extracted["bytes_written"]),
|
||||
"execution_mode": str(
|
||||
exported.get("execution_mode", instance.metadata.get("execution_mode", "pending"))
|
||||
),
|
||||
}
|
||||
|
||||
def diff_workspace(self, workspace_id: str) -> dict[str, Any]:
|
||||
with self._lock:
|
||||
workspace = self._load_workspace_locked(workspace_id)
|
||||
instance = self._workspace_instance_for_live_operation_locked(
|
||||
workspace,
|
||||
operation_name="workspace_diff",
|
||||
)
|
||||
baseline_archive_path = self._workspace_baseline_archive_path(workspace_id)
|
||||
if not baseline_archive_path.exists():
|
||||
raise RuntimeError(
|
||||
"workspace diff requires a baseline snapshot. Recreate the workspace to use diff."
|
||||
)
|
||||
with tempfile.TemporaryDirectory(prefix="pyro-workspace-diff-") as temp_dir:
|
||||
temp_root = Path(temp_dir)
|
||||
current_archive_path = temp_root / "current.tar"
|
||||
baseline_root = temp_root / "baseline"
|
||||
current_root = temp_root / "current"
|
||||
self._backend.export_archive(
|
||||
instance,
|
||||
workspace_path=WORKSPACE_GUEST_PATH,
|
||||
archive_path=current_archive_path,
|
||||
)
|
||||
_extract_seed_archive_to_host_workspace(
|
||||
baseline_archive_path,
|
||||
workspace_dir=baseline_root,
|
||||
destination=WORKSPACE_GUEST_PATH,
|
||||
)
|
||||
_extract_seed_archive_to_host_workspace(
|
||||
current_archive_path,
|
||||
workspace_dir=current_root,
|
||||
destination=WORKSPACE_GUEST_PATH,
|
||||
)
|
||||
diff_payload = _diff_workspace_trees(baseline_root, current_root)
|
||||
with self._lock:
|
||||
workspace = self._load_workspace_locked(workspace_id)
|
||||
workspace.state = instance.state
|
||||
workspace.firecracker_pid = instance.firecracker_pid
|
||||
workspace.last_error = instance.last_error
|
||||
workspace.metadata = dict(instance.metadata)
|
||||
self._save_workspace_locked(workspace)
|
||||
diff_payload["workspace_id"] = workspace_id
|
||||
return diff_payload
|
||||
|
||||
def exec_workspace(
|
||||
self,
|
||||
workspace_id: str,
|
||||
|
|
@ -2136,6 +2697,12 @@ class VmManager:
|
|||
def _workspace_host_dir(self, workspace_id: str) -> Path:
|
||||
return self._workspace_dir(workspace_id) / WORKSPACE_DIRNAME
|
||||
|
||||
def _workspace_baseline_dir(self, workspace_id: str) -> Path:
|
||||
return self._workspace_dir(workspace_id) / WORKSPACE_BASELINE_DIRNAME
|
||||
|
||||
def _workspace_baseline_archive_path(self, workspace_id: str) -> Path:
|
||||
return self._workspace_baseline_dir(workspace_id) / WORKSPACE_BASELINE_ARCHIVE_NAME
|
||||
|
||||
def _workspace_commands_dir(self, workspace_id: str) -> Path:
|
||||
return self._workspace_dir(workspace_id) / WORKSPACE_COMMANDS_DIRNAME
|
||||
|
||||
|
|
@ -2278,17 +2845,29 @@ class VmManager:
|
|||
return entries
|
||||
|
||||
def _workspace_instance_for_live_shell_locked(self, workspace: WorkspaceRecord) -> VmInstance:
|
||||
instance = self._workspace_instance_for_live_operation_locked(
|
||||
workspace,
|
||||
operation_name="shell operations",
|
||||
)
|
||||
self._require_workspace_shell_support(instance)
|
||||
return instance
|
||||
|
||||
def _workspace_instance_for_live_operation_locked(
|
||||
self,
|
||||
workspace: WorkspaceRecord,
|
||||
*,
|
||||
operation_name: str,
|
||||
) -> VmInstance:
|
||||
self._ensure_workspace_not_expired_locked(workspace, time.time())
|
||||
self._refresh_workspace_liveness_locked(workspace)
|
||||
if workspace.state != "started":
|
||||
raise RuntimeError(
|
||||
"workspace "
|
||||
f"{workspace.workspace_id} must be in 'started' state before shell operations"
|
||||
f"{workspace.workspace_id} must be in 'started' state before {operation_name}"
|
||||
)
|
||||
instance = workspace.to_instance(
|
||||
workdir=self._workspace_runtime_dir(workspace.workspace_id)
|
||||
)
|
||||
self._require_workspace_shell_support(instance)
|
||||
return instance
|
||||
|
||||
def _workspace_shell_record_from_payload(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue