Turn the stable workspace surface into five documented, runnable stories with a shared guest-backed smoke runner, new docs/use-cases recipes, and Make targets for cold-start validation, repro/fix loops, parallel workspaces, untrusted inspection, and review/eval workflows. Bump the package and catalog surface to 3.6.0, update the main docs to point users from the stable workspace walkthrough into the recipe index and smoke packs, and mark the 3.6.0 roadmap milestone done. Fix a regression uncovered by the real parallel-workspaces smoke: workspace_file_read must not bump last_activity_at. Verified with uv lock, UV_CACHE_DIR=.uv-cache make check, UV_CACHE_DIR=.uv-cache make dist-check, and USE_CASE_ENVIRONMENT=debian:12 UV_CACHE_DIR=.uv-cache make smoke-use-cases.
685 lines
27 KiB
Python
685 lines
27 KiB
Python
"""Official environment catalog and local cache management."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import hashlib
|
|
import json
|
|
import os
|
|
import shutil
|
|
import tarfile
|
|
import tempfile
|
|
import time
|
|
import urllib.error
|
|
import urllib.parse
|
|
import urllib.request
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
from pyro_mcp.runtime import DEFAULT_PLATFORM, RuntimePaths
|
|
|
|
DEFAULT_ENVIRONMENT_VERSION = "1.0.0"
|
|
DEFAULT_CATALOG_VERSION = "3.6.0"
|
|
OCI_MANIFEST_ACCEPT = ", ".join(
|
|
(
|
|
"application/vnd.oci.image.index.v1+json",
|
|
"application/vnd.oci.image.manifest.v1+json",
|
|
"application/vnd.docker.distribution.manifest.list.v2+json",
|
|
"application/vnd.docker.distribution.manifest.v2+json",
|
|
)
|
|
)
|
|
OCI_READ_CHUNK_SIZE = 1024 * 1024
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class VmEnvironment:
|
|
"""Catalog entry describing a curated Linux environment."""
|
|
|
|
name: str
|
|
version: str
|
|
description: str
|
|
default_packages: tuple[str, ...]
|
|
distribution: str
|
|
distribution_version: str
|
|
source_profile: str
|
|
platform: str = DEFAULT_PLATFORM
|
|
source_url: str | None = None
|
|
oci_registry: str | None = None
|
|
oci_repository: str | None = None
|
|
oci_reference: str | None = None
|
|
source_digest: str | None = None
|
|
compatibility: str = ">=3.0.0,<4.0.0"
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class InstalledEnvironment:
|
|
"""Resolved environment artifact locations."""
|
|
|
|
name: str
|
|
version: str
|
|
install_dir: Path
|
|
kernel_image: Path
|
|
rootfs_image: Path
|
|
source: str
|
|
source_digest: str | None
|
|
installed: bool
|
|
|
|
|
|
CATALOG: dict[str, VmEnvironment] = {
|
|
"debian:12": VmEnvironment(
|
|
name="debian:12",
|
|
version=DEFAULT_ENVIRONMENT_VERSION,
|
|
description="Debian 12 environment with Git preinstalled for common agent workflows.",
|
|
default_packages=("bash", "coreutils", "git"),
|
|
distribution="debian",
|
|
distribution_version="12",
|
|
source_profile="debian-git",
|
|
oci_registry="registry-1.docker.io",
|
|
oci_repository="thalesmaciel/pyro-environment-debian-12",
|
|
oci_reference=DEFAULT_ENVIRONMENT_VERSION,
|
|
),
|
|
"debian:12-base": VmEnvironment(
|
|
name="debian:12-base",
|
|
version=DEFAULT_ENVIRONMENT_VERSION,
|
|
description="Minimal Debian 12 environment for shell and core Unix tooling.",
|
|
default_packages=("bash", "coreutils"),
|
|
distribution="debian",
|
|
distribution_version="12",
|
|
source_profile="debian-base",
|
|
oci_registry="registry-1.docker.io",
|
|
oci_repository="thalesmaciel/pyro-environment-debian-12-base",
|
|
oci_reference=DEFAULT_ENVIRONMENT_VERSION,
|
|
),
|
|
"debian:12-build": VmEnvironment(
|
|
name="debian:12-build",
|
|
version=DEFAULT_ENVIRONMENT_VERSION,
|
|
description="Debian 12 environment with Git and common build tools preinstalled.",
|
|
default_packages=("bash", "coreutils", "git", "gcc", "make", "cmake", "python3"),
|
|
distribution="debian",
|
|
distribution_version="12",
|
|
source_profile="debian-build",
|
|
oci_registry="registry-1.docker.io",
|
|
oci_repository="thalesmaciel/pyro-environment-debian-12-build",
|
|
oci_reference=DEFAULT_ENVIRONMENT_VERSION,
|
|
),
|
|
}
|
|
|
|
|
|
def _default_cache_dir() -> Path:
|
|
return Path(
|
|
os.environ.get(
|
|
"PYRO_ENVIRONMENT_CACHE_DIR",
|
|
str(Path.home() / ".cache" / "pyro-mcp" / "environments"),
|
|
)
|
|
)
|
|
|
|
|
|
def default_cache_dir() -> Path:
|
|
"""Return the canonical default environment cache directory."""
|
|
return _default_cache_dir()
|
|
|
|
|
|
def _manifest_profile_digest(runtime_paths: RuntimePaths, profile_name: str) -> str | None:
|
|
profiles = runtime_paths.manifest.get("profiles")
|
|
if not isinstance(profiles, dict):
|
|
return None
|
|
profile = profiles.get(profile_name)
|
|
if not isinstance(profile, dict):
|
|
return None
|
|
rootfs = profile.get("rootfs")
|
|
if not isinstance(rootfs, dict):
|
|
return None
|
|
raw_digest = rootfs.get("sha256")
|
|
return raw_digest if isinstance(raw_digest, str) else None
|
|
|
|
|
|
def get_environment(name: str, *, runtime_paths: RuntimePaths | None = None) -> VmEnvironment:
|
|
"""Resolve a curated environment by name."""
|
|
try:
|
|
spec = CATALOG[name]
|
|
except KeyError as exc:
|
|
known = ", ".join(sorted(CATALOG))
|
|
raise ValueError(f"unknown environment {name!r}; expected one of: {known}") from exc
|
|
if runtime_paths is None:
|
|
return spec
|
|
return VmEnvironment(
|
|
name=spec.name,
|
|
version=spec.version,
|
|
description=spec.description,
|
|
default_packages=spec.default_packages,
|
|
distribution=spec.distribution,
|
|
distribution_version=spec.distribution_version,
|
|
source_profile=spec.source_profile,
|
|
platform=spec.platform,
|
|
source_url=spec.source_url,
|
|
oci_registry=spec.oci_registry,
|
|
oci_repository=spec.oci_repository,
|
|
oci_reference=spec.oci_reference,
|
|
source_digest=_manifest_profile_digest(runtime_paths, spec.source_profile),
|
|
compatibility=spec.compatibility,
|
|
)
|
|
|
|
|
|
def list_environments(*, runtime_paths: RuntimePaths | None = None) -> list[dict[str, object]]:
|
|
"""Return catalog metadata in a JSON-safe format."""
|
|
return [
|
|
_serialize_environment(get_environment(name, runtime_paths=runtime_paths))
|
|
for name in sorted(CATALOG)
|
|
]
|
|
|
|
|
|
def _serialize_environment(environment: VmEnvironment) -> dict[str, object]:
|
|
return {
|
|
"name": environment.name,
|
|
"version": environment.version,
|
|
"description": environment.description,
|
|
"default_packages": list(environment.default_packages),
|
|
"distribution": environment.distribution,
|
|
"distribution_version": environment.distribution_version,
|
|
"platform": environment.platform,
|
|
"oci_registry": environment.oci_registry,
|
|
"oci_repository": environment.oci_repository,
|
|
"oci_reference": environment.oci_reference,
|
|
"source_digest": environment.source_digest,
|
|
"compatibility": environment.compatibility,
|
|
}
|
|
|
|
|
|
def _artifacts_ready(root: Path) -> bool:
|
|
return (root / "vmlinux").is_file() and (root / "rootfs.ext4").is_file()
|
|
|
|
|
|
class EnvironmentStore:
|
|
"""Install and inspect curated environments in a local cache."""
|
|
|
|
def __init__(
|
|
self,
|
|
*,
|
|
runtime_paths: RuntimePaths,
|
|
cache_dir: Path | None = None,
|
|
) -> None:
|
|
self._runtime_paths = runtime_paths
|
|
self._cache_dir = cache_dir or _default_cache_dir()
|
|
raw_platform = self._runtime_paths.manifest.get("platform", DEFAULT_PLATFORM)
|
|
platform = raw_platform if isinstance(raw_platform, str) else DEFAULT_PLATFORM
|
|
self._platform_dir = self._cache_dir / platform
|
|
|
|
@property
|
|
def cache_dir(self) -> Path:
|
|
return self._cache_dir
|
|
|
|
@property
|
|
def catalog_version(self) -> str:
|
|
return DEFAULT_CATALOG_VERSION
|
|
|
|
def list_environments(self) -> list[dict[str, object]]:
|
|
environments: list[dict[str, object]] = []
|
|
for name in sorted(CATALOG):
|
|
environments.append(self.inspect_environment(name))
|
|
return environments
|
|
|
|
def pull_environment(self, name: str) -> dict[str, object]:
|
|
installed = self.ensure_installed(name)
|
|
return {
|
|
**self.inspect_environment(name),
|
|
"install_dir": str(installed.install_dir),
|
|
"kernel_image": str(installed.kernel_image),
|
|
"rootfs_image": str(installed.rootfs_image),
|
|
"source": installed.source,
|
|
}
|
|
|
|
def inspect_environment(self, name: str) -> dict[str, object]:
|
|
spec = get_environment(name, runtime_paths=self._runtime_paths)
|
|
install_dir = self._install_dir(spec)
|
|
metadata_path = install_dir / "environment.json"
|
|
installed = self._load_installed_environment(spec) is not None
|
|
payload = _serialize_environment(spec)
|
|
payload.update(
|
|
{
|
|
"catalog_version": self.catalog_version,
|
|
"installed": installed,
|
|
"cache_dir": str(self._cache_dir),
|
|
"install_dir": str(install_dir),
|
|
}
|
|
)
|
|
if installed:
|
|
payload["install_manifest"] = str(metadata_path)
|
|
return payload
|
|
|
|
def ensure_installed(self, name: str) -> InstalledEnvironment:
|
|
spec = get_environment(name, runtime_paths=self._runtime_paths)
|
|
self._platform_dir.mkdir(parents=True, exist_ok=True)
|
|
installed = self._load_installed_environment(spec)
|
|
if installed is not None:
|
|
return installed
|
|
|
|
source_dir = self._runtime_paths.artifacts_dir / spec.source_profile
|
|
if _artifacts_ready(source_dir):
|
|
return self._install_from_local_source(spec, source_dir)
|
|
if (
|
|
spec.oci_registry is not None
|
|
and spec.oci_repository is not None
|
|
and spec.oci_reference is not None
|
|
):
|
|
return self._install_from_oci(spec)
|
|
if spec.source_url is not None:
|
|
return self._install_from_archive(spec, spec.source_url)
|
|
raise RuntimeError(
|
|
f"environment {spec.name!r} is not installed and no downloadable source is configured"
|
|
)
|
|
|
|
def prune_environments(self) -> dict[str, object]:
|
|
deleted: list[str] = []
|
|
if not self._platform_dir.exists():
|
|
return {"deleted_environment_dirs": [], "count": 0}
|
|
for child in self._platform_dir.iterdir():
|
|
if child.name.startswith(".partial-"):
|
|
shutil.rmtree(child, ignore_errors=True)
|
|
deleted.append(child.name)
|
|
continue
|
|
if not child.is_dir():
|
|
continue
|
|
marker = child / "environment.json"
|
|
if not marker.exists():
|
|
shutil.rmtree(child, ignore_errors=True)
|
|
deleted.append(child.name)
|
|
continue
|
|
metadata = json.loads(marker.read_text(encoding="utf-8"))
|
|
raw_name = metadata.get("name")
|
|
raw_version = metadata.get("version")
|
|
if not isinstance(raw_name, str) or not isinstance(raw_version, str):
|
|
shutil.rmtree(child, ignore_errors=True)
|
|
deleted.append(child.name)
|
|
continue
|
|
try:
|
|
spec = get_environment(raw_name, runtime_paths=self._runtime_paths)
|
|
except ValueError:
|
|
shutil.rmtree(child, ignore_errors=True)
|
|
deleted.append(child.name)
|
|
continue
|
|
if spec.version != raw_version:
|
|
shutil.rmtree(child, ignore_errors=True)
|
|
deleted.append(child.name)
|
|
continue
|
|
if self._load_installed_environment(spec, install_dir=child) is None:
|
|
shutil.rmtree(child, ignore_errors=True)
|
|
deleted.append(child.name)
|
|
return {"deleted_environment_dirs": sorted(deleted), "count": len(deleted)}
|
|
|
|
def _install_dir(self, spec: VmEnvironment) -> Path:
|
|
normalized = spec.name.replace(":", "_")
|
|
return self._platform_dir / f"{normalized}-{spec.version}"
|
|
|
|
def _install_from_local_source(
|
|
self, spec: VmEnvironment, source_dir: Path
|
|
) -> InstalledEnvironment:
|
|
install_dir = self._install_dir(spec)
|
|
temp_dir = Path(tempfile.mkdtemp(prefix=".partial-", dir=self._platform_dir))
|
|
try:
|
|
self._link_or_copy(source_dir / "vmlinux", temp_dir / "vmlinux")
|
|
self._link_or_copy(source_dir / "rootfs.ext4", temp_dir / "rootfs.ext4")
|
|
self._write_install_manifest(
|
|
temp_dir,
|
|
spec=spec,
|
|
source="bundled-runtime-source",
|
|
source_digest=spec.source_digest,
|
|
)
|
|
shutil.rmtree(install_dir, ignore_errors=True)
|
|
temp_dir.replace(install_dir)
|
|
except Exception:
|
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
|
raise
|
|
return InstalledEnvironment(
|
|
name=spec.name,
|
|
version=spec.version,
|
|
install_dir=install_dir,
|
|
kernel_image=install_dir / "vmlinux",
|
|
rootfs_image=install_dir / "rootfs.ext4",
|
|
source="bundled-runtime-source",
|
|
source_digest=spec.source_digest,
|
|
installed=True,
|
|
)
|
|
|
|
def _load_installed_environment(
|
|
self, spec: VmEnvironment, *, install_dir: Path | None = None
|
|
) -> InstalledEnvironment | None:
|
|
resolved_install_dir = install_dir or self._install_dir(spec)
|
|
metadata_path = resolved_install_dir / "environment.json"
|
|
if not metadata_path.is_file() or not _artifacts_ready(resolved_install_dir):
|
|
return None
|
|
try:
|
|
metadata = json.loads(metadata_path.read_text(encoding="utf-8"))
|
|
except (OSError, json.JSONDecodeError):
|
|
return None
|
|
if not isinstance(metadata, dict):
|
|
return None
|
|
source = str(metadata.get("source", "cache"))
|
|
raw_digest = metadata.get("source_digest")
|
|
digest = raw_digest if isinstance(raw_digest, str) else None
|
|
return InstalledEnvironment(
|
|
name=spec.name,
|
|
version=spec.version,
|
|
install_dir=resolved_install_dir,
|
|
kernel_image=resolved_install_dir / "vmlinux",
|
|
rootfs_image=resolved_install_dir / "rootfs.ext4",
|
|
source=source,
|
|
source_digest=digest,
|
|
installed=True,
|
|
)
|
|
|
|
def _install_from_archive(self, spec: VmEnvironment, archive_url: str) -> InstalledEnvironment:
|
|
install_dir = self._install_dir(spec)
|
|
temp_dir = Path(tempfile.mkdtemp(prefix=".partial-", dir=self._platform_dir))
|
|
archive_path = temp_dir / "environment.tgz"
|
|
try:
|
|
urllib.request.urlretrieve(archive_url, archive_path) # noqa: S310
|
|
self._extract_archive(archive_path, temp_dir)
|
|
kernel_image = self._locate_artifact(temp_dir, "vmlinux")
|
|
rootfs_image = self._locate_artifact(temp_dir, "rootfs.ext4")
|
|
if kernel_image.parent != temp_dir:
|
|
shutil.move(str(kernel_image), temp_dir / "vmlinux")
|
|
if rootfs_image.parent != temp_dir:
|
|
shutil.move(str(rootfs_image), temp_dir / "rootfs.ext4")
|
|
self._write_install_manifest(
|
|
temp_dir,
|
|
spec=spec,
|
|
source=archive_url,
|
|
source_digest=spec.source_digest,
|
|
)
|
|
archive_path.unlink(missing_ok=True)
|
|
shutil.rmtree(install_dir, ignore_errors=True)
|
|
temp_dir.replace(install_dir)
|
|
except Exception:
|
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
|
raise
|
|
return InstalledEnvironment(
|
|
name=spec.name,
|
|
version=spec.version,
|
|
install_dir=install_dir,
|
|
kernel_image=install_dir / "vmlinux",
|
|
rootfs_image=install_dir / "rootfs.ext4",
|
|
source=archive_url,
|
|
source_digest=spec.source_digest,
|
|
installed=True,
|
|
)
|
|
|
|
def _install_from_oci(self, spec: VmEnvironment) -> InstalledEnvironment:
|
|
install_dir = self._install_dir(spec)
|
|
temp_dir = Path(tempfile.mkdtemp(prefix=".partial-", dir=self._platform_dir))
|
|
resolved_digest: str | None = None
|
|
source = "oci://unknown"
|
|
try:
|
|
manifest, resolved_digest = self._fetch_oci_manifest(spec)
|
|
layers = manifest.get("layers")
|
|
if not isinstance(layers, list) or not layers:
|
|
raise RuntimeError("OCI manifest did not contain any layers")
|
|
for index, layer in enumerate(layers):
|
|
if not isinstance(layer, dict):
|
|
raise RuntimeError("OCI manifest layer entry is malformed")
|
|
raw_digest = layer.get("digest")
|
|
if not isinstance(raw_digest, str):
|
|
raise RuntimeError("OCI manifest layer is missing a digest")
|
|
blob_path = temp_dir / f"layer-{index}.tar"
|
|
self._download_oci_blob(spec, raw_digest, blob_path)
|
|
self._extract_tar_archive(blob_path, temp_dir)
|
|
blob_path.unlink(missing_ok=True)
|
|
kernel_image = self._locate_artifact(temp_dir, "vmlinux")
|
|
rootfs_image = self._locate_artifact(temp_dir, "rootfs.ext4")
|
|
if kernel_image.parent != temp_dir:
|
|
shutil.move(str(kernel_image), temp_dir / "vmlinux")
|
|
if rootfs_image.parent != temp_dir:
|
|
shutil.move(str(rootfs_image), temp_dir / "rootfs.ext4")
|
|
if spec.oci_registry is not None and spec.oci_repository is not None:
|
|
source = f"oci://{spec.oci_registry}/{spec.oci_repository}"
|
|
if resolved_digest is not None:
|
|
source = f"{source}@{resolved_digest}"
|
|
elif spec.oci_reference is not None:
|
|
source = f"{source}:{spec.oci_reference}"
|
|
self._write_install_manifest(
|
|
temp_dir,
|
|
spec=spec,
|
|
source=source,
|
|
source_digest=resolved_digest or spec.source_digest,
|
|
)
|
|
shutil.rmtree(install_dir, ignore_errors=True)
|
|
temp_dir.replace(install_dir)
|
|
except Exception:
|
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
|
raise
|
|
return InstalledEnvironment(
|
|
name=spec.name,
|
|
version=spec.version,
|
|
install_dir=install_dir,
|
|
kernel_image=install_dir / "vmlinux",
|
|
rootfs_image=install_dir / "rootfs.ext4",
|
|
source=source,
|
|
source_digest=resolved_digest or spec.source_digest,
|
|
installed=True,
|
|
)
|
|
|
|
def _write_install_manifest(
|
|
self,
|
|
install_dir: Path,
|
|
*,
|
|
spec: VmEnvironment,
|
|
source: str,
|
|
source_digest: str | None,
|
|
) -> None:
|
|
payload = {
|
|
"catalog_version": self.catalog_version,
|
|
"name": spec.name,
|
|
"version": spec.version,
|
|
"source": source,
|
|
"source_digest": source_digest,
|
|
"installed_at": int(time.time()),
|
|
}
|
|
(install_dir / "environment.json").write_text(
|
|
json.dumps(payload, indent=2, sort_keys=True) + "\n",
|
|
encoding="utf-8",
|
|
)
|
|
|
|
def _extract_archive(self, archive_path: Path, dest_dir: Path) -> None:
|
|
self._extract_tar_archive(archive_path, dest_dir)
|
|
|
|
def _locate_artifact(self, root: Path, name: str) -> Path:
|
|
for candidate in root.rglob(name):
|
|
if candidate.is_file():
|
|
return candidate
|
|
raise RuntimeError(f"environment archive did not contain {name}")
|
|
|
|
def _link_or_copy(self, source: Path, dest: Path) -> None:
|
|
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
relative_target = os.path.relpath(source, start=dest.parent)
|
|
try:
|
|
dest.symlink_to(relative_target)
|
|
except OSError:
|
|
shutil.copy2(source, dest)
|
|
|
|
def _fetch_oci_manifest(
|
|
self, spec: VmEnvironment
|
|
) -> tuple[dict[str, Any], str | None]:
|
|
if spec.oci_registry is None or spec.oci_repository is None or spec.oci_reference is None:
|
|
raise RuntimeError("OCI source metadata is incomplete")
|
|
headers = {"Accept": OCI_MANIFEST_ACCEPT}
|
|
payload, response_headers = self._request_bytes(
|
|
self._oci_url(
|
|
spec.oci_registry,
|
|
spec.oci_repository,
|
|
f"manifests/{spec.oci_reference}",
|
|
),
|
|
headers=headers,
|
|
repository=spec.oci_repository,
|
|
)
|
|
manifest = json.loads(payload.decode("utf-8"))
|
|
if not isinstance(manifest, dict):
|
|
raise RuntimeError("OCI manifest response was not a JSON object")
|
|
resolved_digest = response_headers.get("docker-content-digest")
|
|
if resolved_digest is not None:
|
|
self._verify_digest_bytes(payload, resolved_digest)
|
|
media_type = manifest.get("mediaType")
|
|
if media_type in {
|
|
"application/vnd.oci.image.index.v1+json",
|
|
"application/vnd.docker.distribution.manifest.list.v2+json",
|
|
} or isinstance(manifest.get("manifests"), list):
|
|
manifests = manifest.get("manifests")
|
|
if not isinstance(manifests, list):
|
|
raise RuntimeError("OCI index did not contain manifests")
|
|
selected = self._select_oci_manifest_descriptor(manifests)
|
|
payload, response_headers = self._request_bytes(
|
|
self._oci_url(
|
|
spec.oci_registry,
|
|
spec.oci_repository,
|
|
f"manifests/{selected}",
|
|
),
|
|
headers=headers,
|
|
repository=spec.oci_repository,
|
|
)
|
|
manifest = json.loads(payload.decode("utf-8"))
|
|
if not isinstance(manifest, dict):
|
|
raise RuntimeError("OCI child manifest response was not a JSON object")
|
|
resolved_digest = response_headers.get("docker-content-digest") or selected
|
|
self._verify_digest_bytes(payload, resolved_digest)
|
|
return manifest, resolved_digest
|
|
|
|
def _download_oci_blob(self, spec: VmEnvironment, digest: str, dest: Path) -> None:
|
|
if spec.oci_registry is None or spec.oci_repository is None:
|
|
raise RuntimeError("OCI source metadata is incomplete")
|
|
digest_algorithm, digest_value = self._split_digest(digest)
|
|
if digest_algorithm != "sha256":
|
|
raise RuntimeError(f"unsupported OCI blob digest algorithm: {digest_algorithm}")
|
|
hasher = hashlib.sha256()
|
|
with (
|
|
self._open_request(
|
|
self._oci_url(
|
|
spec.oci_registry,
|
|
spec.oci_repository,
|
|
f"blobs/{digest}",
|
|
),
|
|
headers={},
|
|
repository=spec.oci_repository,
|
|
) as response,
|
|
dest.open("wb") as handle,
|
|
):
|
|
while True:
|
|
chunk = response.read(OCI_READ_CHUNK_SIZE)
|
|
if not chunk:
|
|
break
|
|
hasher.update(chunk)
|
|
handle.write(chunk)
|
|
if hasher.hexdigest() != digest_value:
|
|
raise RuntimeError(
|
|
f"OCI blob digest mismatch for {digest}; got sha256:{hasher.hexdigest()}"
|
|
)
|
|
|
|
def _request_bytes(
|
|
self,
|
|
url: str,
|
|
*,
|
|
headers: dict[str, str],
|
|
repository: str,
|
|
) -> tuple[bytes, dict[str, str]]:
|
|
with self._open_request(url, headers=headers, repository=repository) as response:
|
|
return response.read(), {key.lower(): value for key, value in response.headers.items()}
|
|
|
|
def _open_request(
|
|
self,
|
|
url: str,
|
|
*,
|
|
headers: dict[str, str],
|
|
repository: str,
|
|
) -> Any:
|
|
request = urllib.request.Request(url, headers=headers, method="GET")
|
|
try:
|
|
return urllib.request.urlopen(request, timeout=90) # noqa: S310
|
|
except urllib.error.HTTPError as exc:
|
|
if exc.code != 401:
|
|
raise RuntimeError(f"failed to fetch OCI resource {url}: {exc}") from exc
|
|
authenticate = exc.headers.get("WWW-Authenticate")
|
|
if authenticate is None:
|
|
raise RuntimeError("OCI registry denied access without an auth challenge") from exc
|
|
token = self._fetch_registry_token(authenticate, repository)
|
|
authenticated_request = urllib.request.Request(
|
|
url,
|
|
headers={**headers, "Authorization": f"Bearer {token}"},
|
|
method="GET",
|
|
)
|
|
try:
|
|
return urllib.request.urlopen(authenticated_request, timeout=90) # noqa: S310
|
|
except urllib.error.HTTPError as auth_exc:
|
|
raise RuntimeError(f"failed to fetch OCI resource {url}: {auth_exc}") from auth_exc
|
|
|
|
def _fetch_registry_token(self, authenticate: str, repository: str) -> str:
|
|
if not authenticate.startswith("Bearer "):
|
|
raise RuntimeError("unsupported OCI authentication scheme")
|
|
params = self._parse_authenticate_parameters(authenticate[len("Bearer ") :])
|
|
realm = params.get("realm")
|
|
if realm is None:
|
|
raise RuntimeError("OCI auth challenge did not include a token realm")
|
|
query = {
|
|
"service": params.get("service", ""),
|
|
"scope": params.get("scope", f"repository:{repository}:pull"),
|
|
}
|
|
token_url = f"{realm}?{urllib.parse.urlencode(query)}"
|
|
with urllib.request.urlopen(token_url, timeout=90) as response: # noqa: S310
|
|
payload = json.loads(response.read().decode("utf-8"))
|
|
if not isinstance(payload, dict):
|
|
raise RuntimeError("OCI auth token response was not a JSON object")
|
|
raw_token = payload.get("token") or payload.get("access_token")
|
|
if not isinstance(raw_token, str) or raw_token == "":
|
|
raise RuntimeError("OCI auth token response did not include a bearer token")
|
|
return raw_token
|
|
|
|
def _parse_authenticate_parameters(self, raw: str) -> dict[str, str]:
|
|
params: dict[str, str] = {}
|
|
for segment in raw.split(","):
|
|
if "=" not in segment:
|
|
continue
|
|
key, value = segment.split("=", 1)
|
|
params[key.strip()] = value.strip().strip('"')
|
|
return params
|
|
|
|
def _select_oci_manifest_descriptor(self, manifests: list[Any]) -> str:
|
|
for manifest in manifests:
|
|
if not isinstance(manifest, dict):
|
|
continue
|
|
platform = manifest.get("platform")
|
|
if not isinstance(platform, dict):
|
|
continue
|
|
os_name = platform.get("os")
|
|
architecture = platform.get("architecture")
|
|
raw_digest = manifest.get("digest")
|
|
if (
|
|
isinstance(os_name, str)
|
|
and isinstance(architecture, str)
|
|
and isinstance(raw_digest, str)
|
|
and os_name == "linux"
|
|
and architecture in {"amd64", "x86_64"}
|
|
):
|
|
return raw_digest
|
|
raise RuntimeError("OCI index did not contain a linux/amd64 manifest")
|
|
|
|
def _extract_tar_archive(self, archive_path: Path, dest_dir: Path) -> None:
|
|
dest_root = dest_dir.resolve()
|
|
with tarfile.open(archive_path, "r:*") as archive:
|
|
for member in archive.getmembers():
|
|
member_path = (dest_dir / member.name).resolve()
|
|
if not member_path.is_relative_to(dest_root):
|
|
raise RuntimeError(f"unsafe archive member path: {member.name}")
|
|
archive.extractall(dest_dir, filter="data")
|
|
|
|
def _oci_url(self, registry: str, repository: str, suffix: str) -> str:
|
|
return f"https://{registry}/v2/{repository}/{suffix}"
|
|
|
|
def _split_digest(self, digest: str) -> tuple[str, str]:
|
|
algorithm, separator, value = digest.partition(":")
|
|
if separator == "" or value == "":
|
|
raise RuntimeError(f"invalid OCI digest: {digest}")
|
|
return algorithm, value
|
|
|
|
def _verify_digest_bytes(self, payload: bytes, digest: str) -> None:
|
|
algorithm, value = self._split_digest(digest)
|
|
if algorithm != "sha256":
|
|
raise RuntimeError(f"unsupported OCI digest algorithm: {algorithm}")
|
|
actual = hashlib.sha256(payload).hexdigest()
|
|
if actual != value:
|
|
raise RuntimeError(f"OCI digest mismatch for {digest}; got sha256:{actual}")
|