"""Official environment catalog and local cache management.""" from __future__ import annotations import hashlib import json import os import shutil import tarfile import tempfile import time import urllib.error import urllib.parse import urllib.request from dataclasses import dataclass from pathlib import Path from typing import Any from pyro_mcp.runtime import DEFAULT_PLATFORM, RuntimePaths DEFAULT_ENVIRONMENT_VERSION = "1.0.0" DEFAULT_CATALOG_VERSION = "4.5.0" OCI_MANIFEST_ACCEPT = ", ".join( ( "application/vnd.oci.image.index.v1+json", "application/vnd.oci.image.manifest.v1+json", "application/vnd.docker.distribution.manifest.list.v2+json", "application/vnd.docker.distribution.manifest.v2+json", ) ) OCI_READ_CHUNK_SIZE = 1024 * 1024 @dataclass(frozen=True) class VmEnvironment: """Catalog entry describing a curated Linux environment.""" name: str version: str description: str default_packages: tuple[str, ...] distribution: str distribution_version: str source_profile: str platform: str = DEFAULT_PLATFORM source_url: str | None = None oci_registry: str | None = None oci_repository: str | None = None oci_reference: str | None = None source_digest: str | None = None compatibility: str = ">=4.5.0,<5.0.0" @dataclass(frozen=True) class InstalledEnvironment: """Resolved environment artifact locations.""" name: str version: str install_dir: Path kernel_image: Path rootfs_image: Path source: str source_digest: str | None installed: bool CATALOG: dict[str, VmEnvironment] = { "debian:12": VmEnvironment( name="debian:12", version=DEFAULT_ENVIRONMENT_VERSION, description="Debian 12 environment with Git preinstalled for common agent workflows.", default_packages=("bash", "coreutils", "git"), distribution="debian", distribution_version="12", source_profile="debian-git", oci_registry="registry-1.docker.io", oci_repository="thalesmaciel/pyro-environment-debian-12", oci_reference=DEFAULT_ENVIRONMENT_VERSION, ), "debian:12-base": VmEnvironment( name="debian:12-base", version=DEFAULT_ENVIRONMENT_VERSION, description="Minimal Debian 12 environment for shell and core Unix tooling.", default_packages=("bash", "coreutils"), distribution="debian", distribution_version="12", source_profile="debian-base", oci_registry="registry-1.docker.io", oci_repository="thalesmaciel/pyro-environment-debian-12-base", oci_reference=DEFAULT_ENVIRONMENT_VERSION, ), "debian:12-build": VmEnvironment( name="debian:12-build", version=DEFAULT_ENVIRONMENT_VERSION, description="Debian 12 environment with Git and common build tools preinstalled.", default_packages=("bash", "coreutils", "git", "gcc", "make", "cmake", "python3"), distribution="debian", distribution_version="12", source_profile="debian-build", oci_registry="registry-1.docker.io", oci_repository="thalesmaciel/pyro-environment-debian-12-build", oci_reference=DEFAULT_ENVIRONMENT_VERSION, ), } def _default_cache_dir() -> Path: return Path( os.environ.get( "PYRO_ENVIRONMENT_CACHE_DIR", str(Path.home() / ".cache" / "pyro-mcp" / "environments"), ) ) def default_cache_dir() -> Path: """Return the canonical default environment cache directory.""" return _default_cache_dir() def _manifest_profile_digest(runtime_paths: RuntimePaths, profile_name: str) -> str | None: profiles = runtime_paths.manifest.get("profiles") if not isinstance(profiles, dict): return None profile = profiles.get(profile_name) if not isinstance(profile, dict): return None rootfs = profile.get("rootfs") if not isinstance(rootfs, dict): return None raw_digest = rootfs.get("sha256") return raw_digest if isinstance(raw_digest, str) else None def get_environment(name: str, *, runtime_paths: RuntimePaths | None = None) -> VmEnvironment: """Resolve a curated environment by name.""" try: spec = CATALOG[name] except KeyError as exc: known = ", ".join(sorted(CATALOG)) raise ValueError(f"unknown environment {name!r}; expected one of: {known}") from exc if runtime_paths is None: return spec return VmEnvironment( name=spec.name, version=spec.version, description=spec.description, default_packages=spec.default_packages, distribution=spec.distribution, distribution_version=spec.distribution_version, source_profile=spec.source_profile, platform=spec.platform, source_url=spec.source_url, oci_registry=spec.oci_registry, oci_repository=spec.oci_repository, oci_reference=spec.oci_reference, source_digest=_manifest_profile_digest(runtime_paths, spec.source_profile), compatibility=spec.compatibility, ) def list_environments(*, runtime_paths: RuntimePaths | None = None) -> list[dict[str, object]]: """Return catalog metadata in a JSON-safe format.""" return [ _serialize_environment(get_environment(name, runtime_paths=runtime_paths)) for name in sorted(CATALOG) ] def _serialize_environment(environment: VmEnvironment) -> dict[str, object]: return { "name": environment.name, "version": environment.version, "description": environment.description, "default_packages": list(environment.default_packages), "distribution": environment.distribution, "distribution_version": environment.distribution_version, "platform": environment.platform, "oci_registry": environment.oci_registry, "oci_repository": environment.oci_repository, "oci_reference": environment.oci_reference, "source_digest": environment.source_digest, "compatibility": environment.compatibility, } def _artifacts_ready(root: Path) -> bool: return (root / "vmlinux").is_file() and (root / "rootfs.ext4").is_file() class EnvironmentStore: """Install and inspect curated environments in a local cache.""" def __init__( self, *, runtime_paths: RuntimePaths, cache_dir: Path | None = None, ) -> None: self._runtime_paths = runtime_paths self._cache_dir = cache_dir or _default_cache_dir() raw_platform = self._runtime_paths.manifest.get("platform", DEFAULT_PLATFORM) platform = raw_platform if isinstance(raw_platform, str) else DEFAULT_PLATFORM self._platform_dir = self._cache_dir / platform @property def cache_dir(self) -> Path: return self._cache_dir @property def catalog_version(self) -> str: return DEFAULT_CATALOG_VERSION def list_environments(self) -> list[dict[str, object]]: environments: list[dict[str, object]] = [] for name in sorted(CATALOG): environments.append(self.inspect_environment(name)) return environments def pull_environment(self, name: str) -> dict[str, object]: installed = self.ensure_installed(name) return { **self.inspect_environment(name), "install_dir": str(installed.install_dir), "kernel_image": str(installed.kernel_image), "rootfs_image": str(installed.rootfs_image), "source": installed.source, } def inspect_environment(self, name: str) -> dict[str, object]: spec = get_environment(name, runtime_paths=self._runtime_paths) install_dir = self._install_dir(spec) metadata_path = install_dir / "environment.json" installed = self._load_installed_environment(spec) is not None payload = _serialize_environment(spec) payload.update( { "catalog_version": self.catalog_version, "installed": installed, "cache_dir": str(self._cache_dir), "install_dir": str(install_dir), } ) if installed: payload["install_manifest"] = str(metadata_path) return payload def ensure_installed(self, name: str) -> InstalledEnvironment: spec = get_environment(name, runtime_paths=self._runtime_paths) self._platform_dir.mkdir(parents=True, exist_ok=True) installed = self._load_installed_environment(spec) if installed is not None: return installed source_dir = self._runtime_paths.artifacts_dir / spec.source_profile if _artifacts_ready(source_dir): return self._install_from_local_source(spec, source_dir) if ( spec.oci_registry is not None and spec.oci_repository is not None and spec.oci_reference is not None ): return self._install_from_oci(spec) if spec.source_url is not None: return self._install_from_archive(spec, spec.source_url) raise RuntimeError( f"environment {spec.name!r} is not installed and no downloadable source is configured" ) def prune_environments(self) -> dict[str, object]: deleted: list[str] = [] if not self._platform_dir.exists(): return {"deleted_environment_dirs": [], "count": 0} for child in self._platform_dir.iterdir(): if child.name.startswith(".partial-"): shutil.rmtree(child, ignore_errors=True) deleted.append(child.name) continue if not child.is_dir(): continue marker = child / "environment.json" if not marker.exists(): shutil.rmtree(child, ignore_errors=True) deleted.append(child.name) continue metadata = json.loads(marker.read_text(encoding="utf-8")) raw_name = metadata.get("name") raw_version = metadata.get("version") if not isinstance(raw_name, str) or not isinstance(raw_version, str): shutil.rmtree(child, ignore_errors=True) deleted.append(child.name) continue try: spec = get_environment(raw_name, runtime_paths=self._runtime_paths) except ValueError: shutil.rmtree(child, ignore_errors=True) deleted.append(child.name) continue if spec.version != raw_version: shutil.rmtree(child, ignore_errors=True) deleted.append(child.name) continue if self._load_installed_environment(spec, install_dir=child) is None: shutil.rmtree(child, ignore_errors=True) deleted.append(child.name) return {"deleted_environment_dirs": sorted(deleted), "count": len(deleted)} def _install_dir(self, spec: VmEnvironment) -> Path: normalized = spec.name.replace(":", "_") return self._platform_dir / f"{normalized}-{spec.version}" def _install_from_local_source( self, spec: VmEnvironment, source_dir: Path ) -> InstalledEnvironment: install_dir = self._install_dir(spec) temp_dir = Path(tempfile.mkdtemp(prefix=".partial-", dir=self._platform_dir)) try: self._link_or_copy(source_dir / "vmlinux", temp_dir / "vmlinux") self._link_or_copy(source_dir / "rootfs.ext4", temp_dir / "rootfs.ext4") self._write_install_manifest( temp_dir, spec=spec, source="bundled-runtime-source", source_digest=spec.source_digest, ) shutil.rmtree(install_dir, ignore_errors=True) temp_dir.replace(install_dir) except Exception: shutil.rmtree(temp_dir, ignore_errors=True) raise return InstalledEnvironment( name=spec.name, version=spec.version, install_dir=install_dir, kernel_image=install_dir / "vmlinux", rootfs_image=install_dir / "rootfs.ext4", source="bundled-runtime-source", source_digest=spec.source_digest, installed=True, ) def _load_installed_environment( self, spec: VmEnvironment, *, install_dir: Path | None = None ) -> InstalledEnvironment | None: resolved_install_dir = install_dir or self._install_dir(spec) metadata_path = resolved_install_dir / "environment.json" if not metadata_path.is_file() or not _artifacts_ready(resolved_install_dir): return None try: metadata = json.loads(metadata_path.read_text(encoding="utf-8")) except (OSError, json.JSONDecodeError): return None if not isinstance(metadata, dict): return None source = str(metadata.get("source", "cache")) raw_digest = metadata.get("source_digest") digest = raw_digest if isinstance(raw_digest, str) else None return InstalledEnvironment( name=spec.name, version=spec.version, install_dir=resolved_install_dir, kernel_image=resolved_install_dir / "vmlinux", rootfs_image=resolved_install_dir / "rootfs.ext4", source=source, source_digest=digest, installed=True, ) def _install_from_archive(self, spec: VmEnvironment, archive_url: str) -> InstalledEnvironment: install_dir = self._install_dir(spec) temp_dir = Path(tempfile.mkdtemp(prefix=".partial-", dir=self._platform_dir)) archive_path = temp_dir / "environment.tgz" try: urllib.request.urlretrieve(archive_url, archive_path) # noqa: S310 self._extract_archive(archive_path, temp_dir) kernel_image = self._locate_artifact(temp_dir, "vmlinux") rootfs_image = self._locate_artifact(temp_dir, "rootfs.ext4") if kernel_image.parent != temp_dir: shutil.move(str(kernel_image), temp_dir / "vmlinux") if rootfs_image.parent != temp_dir: shutil.move(str(rootfs_image), temp_dir / "rootfs.ext4") self._write_install_manifest( temp_dir, spec=spec, source=archive_url, source_digest=spec.source_digest, ) archive_path.unlink(missing_ok=True) shutil.rmtree(install_dir, ignore_errors=True) temp_dir.replace(install_dir) except Exception: shutil.rmtree(temp_dir, ignore_errors=True) raise return InstalledEnvironment( name=spec.name, version=spec.version, install_dir=install_dir, kernel_image=install_dir / "vmlinux", rootfs_image=install_dir / "rootfs.ext4", source=archive_url, source_digest=spec.source_digest, installed=True, ) def _install_from_oci(self, spec: VmEnvironment) -> InstalledEnvironment: install_dir = self._install_dir(spec) temp_dir = Path(tempfile.mkdtemp(prefix=".partial-", dir=self._platform_dir)) resolved_digest: str | None = None source = "oci://unknown" try: manifest, resolved_digest = self._fetch_oci_manifest(spec) layers = manifest.get("layers") if not isinstance(layers, list) or not layers: raise RuntimeError("OCI manifest did not contain any layers") for index, layer in enumerate(layers): if not isinstance(layer, dict): raise RuntimeError("OCI manifest layer entry is malformed") raw_digest = layer.get("digest") if not isinstance(raw_digest, str): raise RuntimeError("OCI manifest layer is missing a digest") blob_path = temp_dir / f"layer-{index}.tar" self._download_oci_blob(spec, raw_digest, blob_path) self._extract_tar_archive(blob_path, temp_dir) blob_path.unlink(missing_ok=True) kernel_image = self._locate_artifact(temp_dir, "vmlinux") rootfs_image = self._locate_artifact(temp_dir, "rootfs.ext4") if kernel_image.parent != temp_dir: shutil.move(str(kernel_image), temp_dir / "vmlinux") if rootfs_image.parent != temp_dir: shutil.move(str(rootfs_image), temp_dir / "rootfs.ext4") if spec.oci_registry is not None and spec.oci_repository is not None: source = f"oci://{spec.oci_registry}/{spec.oci_repository}" if resolved_digest is not None: source = f"{source}@{resolved_digest}" elif spec.oci_reference is not None: source = f"{source}:{spec.oci_reference}" self._write_install_manifest( temp_dir, spec=spec, source=source, source_digest=resolved_digest or spec.source_digest, ) shutil.rmtree(install_dir, ignore_errors=True) temp_dir.replace(install_dir) except Exception: shutil.rmtree(temp_dir, ignore_errors=True) raise return InstalledEnvironment( name=spec.name, version=spec.version, install_dir=install_dir, kernel_image=install_dir / "vmlinux", rootfs_image=install_dir / "rootfs.ext4", source=source, source_digest=resolved_digest or spec.source_digest, installed=True, ) def _write_install_manifest( self, install_dir: Path, *, spec: VmEnvironment, source: str, source_digest: str | None, ) -> None: payload = { "catalog_version": self.catalog_version, "name": spec.name, "version": spec.version, "source": source, "source_digest": source_digest, "installed_at": int(time.time()), } (install_dir / "environment.json").write_text( json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8", ) def _extract_archive(self, archive_path: Path, dest_dir: Path) -> None: self._extract_tar_archive(archive_path, dest_dir) def _locate_artifact(self, root: Path, name: str) -> Path: for candidate in root.rglob(name): if candidate.is_file(): return candidate raise RuntimeError(f"environment archive did not contain {name}") def _link_or_copy(self, source: Path, dest: Path) -> None: dest.parent.mkdir(parents=True, exist_ok=True) relative_target = os.path.relpath(source, start=dest.parent) try: dest.symlink_to(relative_target) except OSError: shutil.copy2(source, dest) def _fetch_oci_manifest( self, spec: VmEnvironment ) -> tuple[dict[str, Any], str | None]: if spec.oci_registry is None or spec.oci_repository is None or spec.oci_reference is None: raise RuntimeError("OCI source metadata is incomplete") headers = {"Accept": OCI_MANIFEST_ACCEPT} payload, response_headers = self._request_bytes( self._oci_url( spec.oci_registry, spec.oci_repository, f"manifests/{spec.oci_reference}", ), headers=headers, repository=spec.oci_repository, ) manifest = json.loads(payload.decode("utf-8")) if not isinstance(manifest, dict): raise RuntimeError("OCI manifest response was not a JSON object") resolved_digest = response_headers.get("docker-content-digest") if resolved_digest is not None: self._verify_digest_bytes(payload, resolved_digest) media_type = manifest.get("mediaType") if media_type in { "application/vnd.oci.image.index.v1+json", "application/vnd.docker.distribution.manifest.list.v2+json", } or isinstance(manifest.get("manifests"), list): manifests = manifest.get("manifests") if not isinstance(manifests, list): raise RuntimeError("OCI index did not contain manifests") selected = self._select_oci_manifest_descriptor(manifests) payload, response_headers = self._request_bytes( self._oci_url( spec.oci_registry, spec.oci_repository, f"manifests/{selected}", ), headers=headers, repository=spec.oci_repository, ) manifest = json.loads(payload.decode("utf-8")) if not isinstance(manifest, dict): raise RuntimeError("OCI child manifest response was not a JSON object") resolved_digest = response_headers.get("docker-content-digest") or selected self._verify_digest_bytes(payload, resolved_digest) return manifest, resolved_digest def _download_oci_blob(self, spec: VmEnvironment, digest: str, dest: Path) -> None: if spec.oci_registry is None or spec.oci_repository is None: raise RuntimeError("OCI source metadata is incomplete") digest_algorithm, digest_value = self._split_digest(digest) if digest_algorithm != "sha256": raise RuntimeError(f"unsupported OCI blob digest algorithm: {digest_algorithm}") hasher = hashlib.sha256() with ( self._open_request( self._oci_url( spec.oci_registry, spec.oci_repository, f"blobs/{digest}", ), headers={}, repository=spec.oci_repository, ) as response, dest.open("wb") as handle, ): while True: chunk = response.read(OCI_READ_CHUNK_SIZE) if not chunk: break hasher.update(chunk) handle.write(chunk) if hasher.hexdigest() != digest_value: raise RuntimeError( f"OCI blob digest mismatch for {digest}; got sha256:{hasher.hexdigest()}" ) def _request_bytes( self, url: str, *, headers: dict[str, str], repository: str, ) -> tuple[bytes, dict[str, str]]: with self._open_request(url, headers=headers, repository=repository) as response: return response.read(), {key.lower(): value for key, value in response.headers.items()} def _open_request( self, url: str, *, headers: dict[str, str], repository: str, ) -> Any: request = urllib.request.Request(url, headers=headers, method="GET") try: return urllib.request.urlopen(request, timeout=90) # noqa: S310 except urllib.error.HTTPError as exc: if exc.code != 401: raise RuntimeError(f"failed to fetch OCI resource {url}: {exc}") from exc authenticate = exc.headers.get("WWW-Authenticate") if authenticate is None: raise RuntimeError("OCI registry denied access without an auth challenge") from exc token = self._fetch_registry_token(authenticate, repository) authenticated_request = urllib.request.Request( url, headers={**headers, "Authorization": f"Bearer {token}"}, method="GET", ) try: return urllib.request.urlopen(authenticated_request, timeout=90) # noqa: S310 except urllib.error.HTTPError as auth_exc: raise RuntimeError(f"failed to fetch OCI resource {url}: {auth_exc}") from auth_exc def _fetch_registry_token(self, authenticate: str, repository: str) -> str: if not authenticate.startswith("Bearer "): raise RuntimeError("unsupported OCI authentication scheme") params = self._parse_authenticate_parameters(authenticate[len("Bearer ") :]) realm = params.get("realm") if realm is None: raise RuntimeError("OCI auth challenge did not include a token realm") query = { "service": params.get("service", ""), "scope": params.get("scope", f"repository:{repository}:pull"), } token_url = f"{realm}?{urllib.parse.urlencode(query)}" with urllib.request.urlopen(token_url, timeout=90) as response: # noqa: S310 payload = json.loads(response.read().decode("utf-8")) if not isinstance(payload, dict): raise RuntimeError("OCI auth token response was not a JSON object") raw_token = payload.get("token") or payload.get("access_token") if not isinstance(raw_token, str) or raw_token == "": raise RuntimeError("OCI auth token response did not include a bearer token") return raw_token def _parse_authenticate_parameters(self, raw: str) -> dict[str, str]: params: dict[str, str] = {} for segment in raw.split(","): if "=" not in segment: continue key, value = segment.split("=", 1) params[key.strip()] = value.strip().strip('"') return params def _select_oci_manifest_descriptor(self, manifests: list[Any]) -> str: for manifest in manifests: if not isinstance(manifest, dict): continue platform = manifest.get("platform") if not isinstance(platform, dict): continue os_name = platform.get("os") architecture = platform.get("architecture") raw_digest = manifest.get("digest") if ( isinstance(os_name, str) and isinstance(architecture, str) and isinstance(raw_digest, str) and os_name == "linux" and architecture in {"amd64", "x86_64"} ): return raw_digest raise RuntimeError("OCI index did not contain a linux/amd64 manifest") def _extract_tar_archive(self, archive_path: Path, dest_dir: Path) -> None: dest_root = dest_dir.resolve() with tarfile.open(archive_path, "r:*") as archive: for member in archive.getmembers(): member_path = (dest_dir / member.name).resolve() if not member_path.is_relative_to(dest_root): raise RuntimeError(f"unsafe archive member path: {member.name}") archive.extractall(dest_dir, filter="data") def _oci_url(self, registry: str, repository: str, suffix: str) -> str: return f"https://{registry}/v2/{repository}/{suffix}" def _split_digest(self, digest: str) -> tuple[str, str]: algorithm, separator, value = digest.partition(":") if separator == "" or value == "": raise RuntimeError(f"invalid OCI digest: {digest}") return algorithm, value def _verify_digest_bytes(self, payload: bytes, digest: str) -> None: algorithm, value = self._split_digest(digest) if algorithm != "sha256": raise RuntimeError(f"unsupported OCI digest algorithm: {algorithm}") actual = hashlib.sha256(payload).hexdigest() if actual != value: raise RuntimeError(f"OCI digest mismatch for {digest}; got sha256:{actual}")