diff --git a/README.md b/README.md index c0a28d8..04b8ff1 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,8 @@ Current curated environments in this repository: - `debian:12-build` The package ships the embedded Firecracker runtime and a package-controlled environment catalog. -Environment artifacts are installed into a local cache on first use or through `pyro env pull`. +Official environments are pulled as OCI artifacts from GHCR into a local cache on first use or +through `pyro env pull`. ## CLI diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 45a75e9..059f276 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -5,7 +5,7 @@ Cause: - the environment cache directory is not writable -- the configured environment source is unavailable +- the configured GHCR environment artifact is unavailable - the environment download was interrupted Fix: diff --git a/src/pyro_mcp/vm_environments.py b/src/pyro_mcp/vm_environments.py index 5a26b52..79392cc 100644 --- a/src/pyro_mcp/vm_environments.py +++ b/src/pyro_mcp/vm_environments.py @@ -2,6 +2,7 @@ from __future__ import annotations +import hashlib import json import os import shutil @@ -27,6 +28,7 @@ OCI_MANIFEST_ACCEPT = ", ".join( "application/vnd.docker.distribution.manifest.v2+json", ) ) +OCI_READ_CHUNK_SIZE = 1024 * 1024 @dataclass(frozen=True) @@ -381,6 +383,8 @@ class EnvironmentStore: def _install_from_oci(self, spec: VmEnvironment) -> InstalledEnvironment: install_dir = self._install_dir(spec) temp_dir = Path(tempfile.mkdtemp(prefix=".partial-", dir=self._platform_dir)) + resolved_digest: str | None = None + source = "oci://unknown" try: manifest, resolved_digest = self._fetch_oci_manifest(spec) layers = manifest.get("layers") @@ -402,13 +406,12 @@ class EnvironmentStore: shutil.move(str(kernel_image), temp_dir / "vmlinux") if rootfs_image.parent != temp_dir: shutil.move(str(rootfs_image), temp_dir / "rootfs.ext4") - source = ( - f"oci://{spec.oci_registry}/{spec.oci_repository}:{spec.oci_reference}" - if spec.oci_registry is not None - and spec.oci_repository is not None - and spec.oci_reference is not None - else "oci://unknown" - ) + if spec.oci_registry is not None and spec.oci_repository is not None: + source = f"oci://{spec.oci_registry}/{spec.oci_repository}" + if resolved_digest is not None: + source = f"{source}@{resolved_digest}" + elif spec.oci_reference is not None: + source = f"{source}:{spec.oci_reference}" self._write_install_manifest( temp_dir, spec=spec, @@ -487,12 +490,14 @@ class EnvironmentStore: manifest = json.loads(payload.decode("utf-8")) if not isinstance(manifest, dict): raise RuntimeError("OCI manifest response was not a JSON object") - resolved_digest = response_headers.get("Docker-Content-Digest") + resolved_digest = response_headers.get("docker-content-digest") + if resolved_digest is not None: + self._verify_digest_bytes(payload, resolved_digest) media_type = manifest.get("mediaType") if media_type in { "application/vnd.oci.image.index.v1+json", "application/vnd.docker.distribution.manifest.list.v2+json", - }: + } or isinstance(manifest.get("manifests"), list): manifests = manifest.get("manifests") if not isinstance(manifests, list): raise RuntimeError("OCI index did not contain manifests") @@ -509,22 +514,39 @@ class EnvironmentStore: manifest = json.loads(payload.decode("utf-8")) if not isinstance(manifest, dict): raise RuntimeError("OCI child manifest response was not a JSON object") - resolved_digest = response_headers.get("Docker-Content-Digest") or selected + resolved_digest = response_headers.get("docker-content-digest") or selected + self._verify_digest_bytes(payload, resolved_digest) return manifest, resolved_digest def _download_oci_blob(self, spec: VmEnvironment, digest: str, dest: Path) -> None: if spec.oci_registry is None or spec.oci_repository is None: raise RuntimeError("OCI source metadata is incomplete") - payload, _ = self._request_bytes( - self._oci_url( - spec.oci_registry, - spec.oci_repository, - f"blobs/{digest}", - ), - headers={}, - repository=spec.oci_repository, - ) - dest.write_bytes(payload) + digest_algorithm, digest_value = self._split_digest(digest) + if digest_algorithm != "sha256": + raise RuntimeError(f"unsupported OCI blob digest algorithm: {digest_algorithm}") + hasher = hashlib.sha256() + with ( + self._open_request( + self._oci_url( + spec.oci_registry, + spec.oci_repository, + f"blobs/{digest}", + ), + headers={}, + repository=spec.oci_repository, + ) as response, + dest.open("wb") as handle, + ): + while True: + chunk = response.read(OCI_READ_CHUNK_SIZE) + if not chunk: + break + hasher.update(chunk) + handle.write(chunk) + if hasher.hexdigest() != digest_value: + raise RuntimeError( + f"OCI blob digest mismatch for {digest}; got sha256:{hasher.hexdigest()}" + ) def _request_bytes( self, @@ -533,10 +555,19 @@ class EnvironmentStore: headers: dict[str, str], repository: str, ) -> tuple[bytes, dict[str, str]]: + with self._open_request(url, headers=headers, repository=repository) as response: + return response.read(), {key.lower(): value for key, value in response.headers.items()} + + def _open_request( + self, + url: str, + *, + headers: dict[str, str], + repository: str, + ) -> Any: request = urllib.request.Request(url, headers=headers, method="GET") try: - with urllib.request.urlopen(request, timeout=90) as response: # noqa: S310 - return response.read(), dict(response.headers.items()) + return urllib.request.urlopen(request, timeout=90) # noqa: S310 except urllib.error.HTTPError as exc: if exc.code != 401: raise RuntimeError(f"failed to fetch OCI resource {url}: {exc}") from exc @@ -549,8 +580,10 @@ class EnvironmentStore: headers={**headers, "Authorization": f"Bearer {token}"}, method="GET", ) - with urllib.request.urlopen(authenticated_request, timeout=90) as response: # noqa: S310 - return response.read(), dict(response.headers.items()) + try: + return urllib.request.urlopen(authenticated_request, timeout=90) # noqa: S310 + except urllib.error.HTTPError as auth_exc: + raise RuntimeError(f"failed to fetch OCI resource {url}: {auth_exc}") from auth_exc def _fetch_registry_token(self, authenticate: str, repository: str) -> str: if not authenticate.startswith("Bearer "): @@ -613,3 +646,17 @@ class EnvironmentStore: def _oci_url(self, registry: str, repository: str, suffix: str) -> str: return f"https://{registry}/v2/{repository}/{suffix}" + + def _split_digest(self, digest: str) -> tuple[str, str]: + algorithm, separator, value = digest.partition(":") + if separator == "" or value == "": + raise RuntimeError(f"invalid OCI digest: {digest}") + return algorithm, value + + def _verify_digest_bytes(self, payload: bytes, digest: str) -> None: + algorithm, value = self._split_digest(digest) + if algorithm != "sha256": + raise RuntimeError(f"unsupported OCI digest algorithm: {algorithm}") + actual = hashlib.sha256(payload).hexdigest() + if actual != value: + raise RuntimeError(f"OCI digest mismatch for {digest}; got sha256:{actual}") diff --git a/tests/test_vm_environments.py b/tests/test_vm_environments.py index 175f87a..8d6cede 100644 --- a/tests/test_vm_environments.py +++ b/tests/test_vm_environments.py @@ -1,12 +1,99 @@ from __future__ import annotations +import hashlib +import io +import json import tarfile +import urllib.error +import urllib.request +from email.message import Message from pathlib import Path import pytest -from pyro_mcp.runtime import resolve_runtime_paths -from pyro_mcp.vm_environments import EnvironmentStore, get_environment, list_environments +from pyro_mcp.runtime import RuntimePaths, resolve_runtime_paths +from pyro_mcp.vm_environments import ( + EnvironmentStore, + VmEnvironment, + get_environment, + list_environments, +) + + +class FakeResponse: + def __init__(self, payload: bytes, *, headers: dict[str, str] | None = None) -> None: + self._buffer = io.BytesIO(payload) + self.headers = headers or {} + + def read(self, size: int = -1) -> bytes: + return self._buffer.read(size) + + def __enter__(self) -> FakeResponse: + return self + + def __exit__(self, exc_type: object, exc: object, tb: object) -> None: + del exc_type, exc, tb + + +def _fake_runtime_paths(tmp_path: Path) -> RuntimePaths: + bundle_parent = tmp_path / "runtime" + bundle_root = bundle_parent / "linux-x86_64" + manifest_path = bundle_root / "manifest.json" + firecracker_bin = bundle_root / "bin" / "firecracker" + jailer_bin = bundle_root / "bin" / "jailer" + guest_agent_path = bundle_root / "guest" / "pyro_guest_agent.py" + artifacts_dir = bundle_root / "profiles" + notice_path = bundle_parent / "NOTICE" + + artifacts_dir.mkdir(parents=True, exist_ok=True) + firecracker_bin.parent.mkdir(parents=True, exist_ok=True) + jailer_bin.parent.mkdir(parents=True, exist_ok=True) + guest_agent_path.parent.mkdir(parents=True, exist_ok=True) + + manifest_path.write_text('{"platform": "linux-x86_64"}\n', encoding="utf-8") + firecracker_bin.write_text("firecracker\n", encoding="utf-8") + jailer_bin.write_text("jailer\n", encoding="utf-8") + guest_agent_path.write_text("print('guest')\n", encoding="utf-8") + notice_path.write_text("notice\n", encoding="utf-8") + + return RuntimePaths( + bundle_root=bundle_root, + manifest_path=manifest_path, + firecracker_bin=firecracker_bin, + jailer_bin=jailer_bin, + guest_agent_path=guest_agent_path, + artifacts_dir=artifacts_dir, + notice_path=notice_path, + manifest={"platform": "linux-x86_64"}, + ) + + +def _sha256_digest(payload: bytes) -> str: + return f"sha256:{hashlib.sha256(payload).hexdigest()}" + + +def _layer_archive(filename: str, content: bytes) -> bytes: + archive_buffer = io.BytesIO() + with tarfile.open(fileobj=archive_buffer, mode="w:gz") as archive: + info = tarfile.TarInfo(name=filename) + info.size = len(content) + archive.addfile(info, io.BytesIO(content)) + return archive_buffer.getvalue() + + +def _authorization_header(request: object) -> str | None: + if isinstance(request, urllib.request.Request): + for key, value in request.header_items(): + if key.lower() == "authorization": + return value + return None + + +def _http_headers(headers: dict[str, str]) -> Message: + message = Message() + for key, value in headers.items(): + message[key] = value + return message def test_list_environments_includes_expected_entries() -> None: @@ -151,3 +238,185 @@ def test_environment_store_prunes_stale_entries(tmp_path: Path) -> None: result = store.prune_environments() assert result["count"] == 5 + + +def test_fetch_oci_manifest_resolves_linux_amd64_index_with_bearer_auth( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + runtime_paths = _fake_runtime_paths(tmp_path) + store = EnvironmentStore(runtime_paths=runtime_paths, cache_dir=tmp_path / "cache") + spec = VmEnvironment( + name="debian:12-ghcr", + version="1.0.0", + description="OCI-backed environment", + default_packages=("bash", "git"), + distribution="debian", + distribution_version="12", + source_profile="missing-profile", + oci_registry="ghcr.io", + oci_repository="thaloco/pyro-environments/debian-12", + oci_reference="1.0.0", + ) + + child_manifest = { + "schemaVersion": 2, + "mediaType": "application/vnd.oci.image.manifest.v1+json", + "layers": [], + } + child_payload = json.dumps(child_manifest).encode("utf-8") + child_digest = _sha256_digest(child_payload) + index_manifest = { + "schemaVersion": 2, + "mediaType": "application/vnd.oci.image.index.v1+json", + "manifests": [ + { + "digest": "sha256:arm64digest", + "mediaType": "application/vnd.oci.image.manifest.v1+json", + "platform": {"os": "linux", "architecture": "arm64"}, + }, + { + "digest": child_digest, + "mediaType": "application/vnd.oci.image.manifest.v1+json", + "platform": {"os": "linux", "architecture": "amd64"}, + }, + ], + } + index_payload = json.dumps(index_manifest).encode("utf-8") + index_digest = _sha256_digest(index_payload) + authorized_urls: list[str] = [] + + def fake_urlopen(request: object, timeout: int = 90) -> FakeResponse: + del timeout + url = request.full_url if isinstance(request, urllib.request.Request) else str(request) + if url.startswith("https://ghcr.io/token?"): + return FakeResponse(b'{"token":"secret-token"}') + authorization = _authorization_header(request) + if url.endswith("/manifests/1.0.0"): + if authorization is None: + raise urllib.error.HTTPError( + url, + 401, + "Unauthorized", + _http_headers( + { + "WWW-Authenticate": ( + 'Bearer realm="https://ghcr.io/token",' + 'service="ghcr.io",' + 'scope="repository:thaloco/pyro-environments/debian-12:pull"' + ) + } + ), + io.BytesIO(b""), + ) + authorized_urls.append(url) + return FakeResponse( + index_payload, + headers={"Docker-Content-Digest": index_digest}, + ) + if url.endswith(f"/manifests/{child_digest}"): + if authorization is None: + raise urllib.error.HTTPError( + url, + 401, + "Unauthorized", + _http_headers( + { + "WWW-Authenticate": ( + 'Bearer realm="https://ghcr.io/token",' + 'service="ghcr.io",' + 'scope="repository:thaloco/pyro-environments/debian-12:pull"' + ) + } + ), + io.BytesIO(b""), + ) + authorized_urls.append(url) + assert authorization == "Bearer secret-token" + return FakeResponse( + child_payload, + headers={"Docker-Content-Digest": child_digest}, + ) + raise AssertionError(f"unexpected OCI request: {url}") + + monkeypatch.setattr(urllib.request, "urlopen", fake_urlopen) + + manifest, resolved_digest = store._fetch_oci_manifest(spec) # noqa: SLF001 + + assert manifest == child_manifest + assert resolved_digest == child_digest + assert authorized_urls == [ + "https://ghcr.io/v2/thaloco/pyro-environments/debian-12/manifests/1.0.0", + f"https://ghcr.io/v2/thaloco/pyro-environments/debian-12/manifests/{child_digest}", + ] + + +def test_environment_store_installs_from_oci_when_runtime_source_missing( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + runtime_paths = _fake_runtime_paths(tmp_path) + kernel_layer = _layer_archive("vmlinux", b"kernel\n") + rootfs_layer = _layer_archive("rootfs.ext4", b"rootfs\n") + kernel_digest = _sha256_digest(kernel_layer) + rootfs_digest = _sha256_digest(rootfs_layer) + manifest = { + "schemaVersion": 2, + "mediaType": "application/vnd.oci.image.manifest.v1+json", + "layers": [ + { + "mediaType": "application/vnd.oci.image.layer.v1.tar+gzip", + "digest": kernel_digest, + "size": len(kernel_layer), + }, + { + "mediaType": "application/vnd.oci.image.layer.v1.tar+gzip", + "digest": rootfs_digest, + "size": len(rootfs_layer), + }, + ], + } + manifest_payload = json.dumps(manifest).encode("utf-8") + manifest_digest = _sha256_digest(manifest_payload) + environment = VmEnvironment( + name="debian:12-ghcr", + version="1.0.0", + description="OCI-backed environment", + default_packages=("bash", "git"), + distribution="debian", + distribution_version="12", + source_profile="missing-profile", + oci_registry="ghcr.io", + oci_repository="thaloco/pyro-environments/debian-12", + oci_reference="1.0.0", + ) + + def fake_urlopen(request: object, timeout: int = 90) -> FakeResponse: + del timeout + url = request.full_url if isinstance(request, urllib.request.Request) else str(request) + if url.endswith("/manifests/1.0.0"): + return FakeResponse( + manifest_payload, + headers={"Docker-Content-Digest": manifest_digest}, + ) + if url.endswith(f"/blobs/{kernel_digest}"): + return FakeResponse(kernel_layer) + if url.endswith(f"/blobs/{rootfs_digest}"): + return FakeResponse(rootfs_layer) + raise AssertionError(f"unexpected OCI request: {url}") + + monkeypatch.setattr(urllib.request, "urlopen", fake_urlopen) + monkeypatch.setattr( + "pyro_mcp.vm_environments.CATALOG", + {environment.name: environment}, + ) + store = EnvironmentStore(runtime_paths=runtime_paths, cache_dir=tmp_path / "cache") + + installed = store.ensure_installed(environment.name) + + assert installed.kernel_image.read_text(encoding="utf-8") == "kernel\n" + assert installed.rootfs_image.read_text(encoding="utf-8") == "rootfs\n" + assert installed.source == ( + "oci://ghcr.io/thaloco/pyro-environments/debian-12" + f"@{manifest_digest}" + ) + metadata = json.loads((installed.install_dir / "environment.json").read_text(encoding="utf-8")) + assert metadata["source_digest"] == manifest_digest diff --git a/uv.lock b/uv.lock index a4b51df..279cbde 100644 --- a/uv.lock +++ b/uv.lock @@ -706,7 +706,7 @@ crypto = [ [[package]] name = "pyro-mcp" -version = "0.1.0" +version = "1.0.0" source = { editable = "." } dependencies = [ { name = "mcp" },