Add direct GHCR environment pulls

This commit is contained in:
Thales Maciel 2026-03-08 16:08:01 -03:00
parent 5d5243df23
commit 75082467f9
5 changed files with 346 additions and 29 deletions

View file

@ -42,7 +42,8 @@ Current curated environments in this repository:
- `debian:12-build` - `debian:12-build`
The package ships the embedded Firecracker runtime and a package-controlled environment catalog. The package ships the embedded Firecracker runtime and a package-controlled environment catalog.
Environment artifacts are installed into a local cache on first use or through `pyro env pull`. Official environments are pulled as OCI artifacts from GHCR into a local cache on first use or
through `pyro env pull`.
## CLI ## CLI

View file

@ -5,7 +5,7 @@
Cause: Cause:
- the environment cache directory is not writable - the environment cache directory is not writable
- the configured environment source is unavailable - the configured GHCR environment artifact is unavailable
- the environment download was interrupted - the environment download was interrupted
Fix: Fix:

View file

@ -2,6 +2,7 @@
from __future__ import annotations from __future__ import annotations
import hashlib
import json import json
import os import os
import shutil import shutil
@ -27,6 +28,7 @@ OCI_MANIFEST_ACCEPT = ", ".join(
"application/vnd.docker.distribution.manifest.v2+json", "application/vnd.docker.distribution.manifest.v2+json",
) )
) )
OCI_READ_CHUNK_SIZE = 1024 * 1024
@dataclass(frozen=True) @dataclass(frozen=True)
@ -381,6 +383,8 @@ class EnvironmentStore:
def _install_from_oci(self, spec: VmEnvironment) -> InstalledEnvironment: def _install_from_oci(self, spec: VmEnvironment) -> InstalledEnvironment:
install_dir = self._install_dir(spec) install_dir = self._install_dir(spec)
temp_dir = Path(tempfile.mkdtemp(prefix=".partial-", dir=self._platform_dir)) temp_dir = Path(tempfile.mkdtemp(prefix=".partial-", dir=self._platform_dir))
resolved_digest: str | None = None
source = "oci://unknown"
try: try:
manifest, resolved_digest = self._fetch_oci_manifest(spec) manifest, resolved_digest = self._fetch_oci_manifest(spec)
layers = manifest.get("layers") layers = manifest.get("layers")
@ -402,13 +406,12 @@ class EnvironmentStore:
shutil.move(str(kernel_image), temp_dir / "vmlinux") shutil.move(str(kernel_image), temp_dir / "vmlinux")
if rootfs_image.parent != temp_dir: if rootfs_image.parent != temp_dir:
shutil.move(str(rootfs_image), temp_dir / "rootfs.ext4") shutil.move(str(rootfs_image), temp_dir / "rootfs.ext4")
source = ( if spec.oci_registry is not None and spec.oci_repository is not None:
f"oci://{spec.oci_registry}/{spec.oci_repository}:{spec.oci_reference}" source = f"oci://{spec.oci_registry}/{spec.oci_repository}"
if spec.oci_registry is not None if resolved_digest is not None:
and spec.oci_repository is not None source = f"{source}@{resolved_digest}"
and spec.oci_reference is not None elif spec.oci_reference is not None:
else "oci://unknown" source = f"{source}:{spec.oci_reference}"
)
self._write_install_manifest( self._write_install_manifest(
temp_dir, temp_dir,
spec=spec, spec=spec,
@ -487,12 +490,14 @@ class EnvironmentStore:
manifest = json.loads(payload.decode("utf-8")) manifest = json.loads(payload.decode("utf-8"))
if not isinstance(manifest, dict): if not isinstance(manifest, dict):
raise RuntimeError("OCI manifest response was not a JSON object") raise RuntimeError("OCI manifest response was not a JSON object")
resolved_digest = response_headers.get("Docker-Content-Digest") resolved_digest = response_headers.get("docker-content-digest")
if resolved_digest is not None:
self._verify_digest_bytes(payload, resolved_digest)
media_type = manifest.get("mediaType") media_type = manifest.get("mediaType")
if media_type in { if media_type in {
"application/vnd.oci.image.index.v1+json", "application/vnd.oci.image.index.v1+json",
"application/vnd.docker.distribution.manifest.list.v2+json", "application/vnd.docker.distribution.manifest.list.v2+json",
}: } or isinstance(manifest.get("manifests"), list):
manifests = manifest.get("manifests") manifests = manifest.get("manifests")
if not isinstance(manifests, list): if not isinstance(manifests, list):
raise RuntimeError("OCI index did not contain manifests") raise RuntimeError("OCI index did not contain manifests")
@ -509,22 +514,39 @@ class EnvironmentStore:
manifest = json.loads(payload.decode("utf-8")) manifest = json.loads(payload.decode("utf-8"))
if not isinstance(manifest, dict): if not isinstance(manifest, dict):
raise RuntimeError("OCI child manifest response was not a JSON object") raise RuntimeError("OCI child manifest response was not a JSON object")
resolved_digest = response_headers.get("Docker-Content-Digest") or selected resolved_digest = response_headers.get("docker-content-digest") or selected
self._verify_digest_bytes(payload, resolved_digest)
return manifest, resolved_digest return manifest, resolved_digest
def _download_oci_blob(self, spec: VmEnvironment, digest: str, dest: Path) -> None: def _download_oci_blob(self, spec: VmEnvironment, digest: str, dest: Path) -> None:
if spec.oci_registry is None or spec.oci_repository is None: if spec.oci_registry is None or spec.oci_repository is None:
raise RuntimeError("OCI source metadata is incomplete") raise RuntimeError("OCI source metadata is incomplete")
payload, _ = self._request_bytes( digest_algorithm, digest_value = self._split_digest(digest)
self._oci_url( if digest_algorithm != "sha256":
spec.oci_registry, raise RuntimeError(f"unsupported OCI blob digest algorithm: {digest_algorithm}")
spec.oci_repository, hasher = hashlib.sha256()
f"blobs/{digest}", with (
), self._open_request(
headers={}, self._oci_url(
repository=spec.oci_repository, spec.oci_registry,
) spec.oci_repository,
dest.write_bytes(payload) f"blobs/{digest}",
),
headers={},
repository=spec.oci_repository,
) as response,
dest.open("wb") as handle,
):
while True:
chunk = response.read(OCI_READ_CHUNK_SIZE)
if not chunk:
break
hasher.update(chunk)
handle.write(chunk)
if hasher.hexdigest() != digest_value:
raise RuntimeError(
f"OCI blob digest mismatch for {digest}; got sha256:{hasher.hexdigest()}"
)
def _request_bytes( def _request_bytes(
self, self,
@ -533,10 +555,19 @@ class EnvironmentStore:
headers: dict[str, str], headers: dict[str, str],
repository: str, repository: str,
) -> tuple[bytes, dict[str, str]]: ) -> tuple[bytes, dict[str, str]]:
with self._open_request(url, headers=headers, repository=repository) as response:
return response.read(), {key.lower(): value for key, value in response.headers.items()}
def _open_request(
self,
url: str,
*,
headers: dict[str, str],
repository: str,
) -> Any:
request = urllib.request.Request(url, headers=headers, method="GET") request = urllib.request.Request(url, headers=headers, method="GET")
try: try:
with urllib.request.urlopen(request, timeout=90) as response: # noqa: S310 return urllib.request.urlopen(request, timeout=90) # noqa: S310
return response.read(), dict(response.headers.items())
except urllib.error.HTTPError as exc: except urllib.error.HTTPError as exc:
if exc.code != 401: if exc.code != 401:
raise RuntimeError(f"failed to fetch OCI resource {url}: {exc}") from exc raise RuntimeError(f"failed to fetch OCI resource {url}: {exc}") from exc
@ -549,8 +580,10 @@ class EnvironmentStore:
headers={**headers, "Authorization": f"Bearer {token}"}, headers={**headers, "Authorization": f"Bearer {token}"},
method="GET", method="GET",
) )
with urllib.request.urlopen(authenticated_request, timeout=90) as response: # noqa: S310 try:
return response.read(), dict(response.headers.items()) return urllib.request.urlopen(authenticated_request, timeout=90) # noqa: S310
except urllib.error.HTTPError as auth_exc:
raise RuntimeError(f"failed to fetch OCI resource {url}: {auth_exc}") from auth_exc
def _fetch_registry_token(self, authenticate: str, repository: str) -> str: def _fetch_registry_token(self, authenticate: str, repository: str) -> str:
if not authenticate.startswith("Bearer "): if not authenticate.startswith("Bearer "):
@ -613,3 +646,17 @@ class EnvironmentStore:
def _oci_url(self, registry: str, repository: str, suffix: str) -> str: def _oci_url(self, registry: str, repository: str, suffix: str) -> str:
return f"https://{registry}/v2/{repository}/{suffix}" return f"https://{registry}/v2/{repository}/{suffix}"
def _split_digest(self, digest: str) -> tuple[str, str]:
algorithm, separator, value = digest.partition(":")
if separator == "" or value == "":
raise RuntimeError(f"invalid OCI digest: {digest}")
return algorithm, value
def _verify_digest_bytes(self, payload: bytes, digest: str) -> None:
algorithm, value = self._split_digest(digest)
if algorithm != "sha256":
raise RuntimeError(f"unsupported OCI digest algorithm: {algorithm}")
actual = hashlib.sha256(payload).hexdigest()
if actual != value:
raise RuntimeError(f"OCI digest mismatch for {digest}; got sha256:{actual}")

View file

@ -1,12 +1,99 @@
from __future__ import annotations from __future__ import annotations
import hashlib
import io
import json
import tarfile import tarfile
import urllib.error
import urllib.request
from email.message import Message
from pathlib import Path from pathlib import Path
import pytest import pytest
from pyro_mcp.runtime import resolve_runtime_paths from pyro_mcp.runtime import RuntimePaths, resolve_runtime_paths
from pyro_mcp.vm_environments import EnvironmentStore, get_environment, list_environments from pyro_mcp.vm_environments import (
EnvironmentStore,
VmEnvironment,
get_environment,
list_environments,
)
class FakeResponse:
def __init__(self, payload: bytes, *, headers: dict[str, str] | None = None) -> None:
self._buffer = io.BytesIO(payload)
self.headers = headers or {}
def read(self, size: int = -1) -> bytes:
return self._buffer.read(size)
def __enter__(self) -> FakeResponse:
return self
def __exit__(self, exc_type: object, exc: object, tb: object) -> None:
del exc_type, exc, tb
def _fake_runtime_paths(tmp_path: Path) -> RuntimePaths:
bundle_parent = tmp_path / "runtime"
bundle_root = bundle_parent / "linux-x86_64"
manifest_path = bundle_root / "manifest.json"
firecracker_bin = bundle_root / "bin" / "firecracker"
jailer_bin = bundle_root / "bin" / "jailer"
guest_agent_path = bundle_root / "guest" / "pyro_guest_agent.py"
artifacts_dir = bundle_root / "profiles"
notice_path = bundle_parent / "NOTICE"
artifacts_dir.mkdir(parents=True, exist_ok=True)
firecracker_bin.parent.mkdir(parents=True, exist_ok=True)
jailer_bin.parent.mkdir(parents=True, exist_ok=True)
guest_agent_path.parent.mkdir(parents=True, exist_ok=True)
manifest_path.write_text('{"platform": "linux-x86_64"}\n', encoding="utf-8")
firecracker_bin.write_text("firecracker\n", encoding="utf-8")
jailer_bin.write_text("jailer\n", encoding="utf-8")
guest_agent_path.write_text("print('guest')\n", encoding="utf-8")
notice_path.write_text("notice\n", encoding="utf-8")
return RuntimePaths(
bundle_root=bundle_root,
manifest_path=manifest_path,
firecracker_bin=firecracker_bin,
jailer_bin=jailer_bin,
guest_agent_path=guest_agent_path,
artifacts_dir=artifacts_dir,
notice_path=notice_path,
manifest={"platform": "linux-x86_64"},
)
def _sha256_digest(payload: bytes) -> str:
return f"sha256:{hashlib.sha256(payload).hexdigest()}"
def _layer_archive(filename: str, content: bytes) -> bytes:
archive_buffer = io.BytesIO()
with tarfile.open(fileobj=archive_buffer, mode="w:gz") as archive:
info = tarfile.TarInfo(name=filename)
info.size = len(content)
archive.addfile(info, io.BytesIO(content))
return archive_buffer.getvalue()
def _authorization_header(request: object) -> str | None:
if isinstance(request, urllib.request.Request):
for key, value in request.header_items():
if key.lower() == "authorization":
return value
return None
def _http_headers(headers: dict[str, str]) -> Message:
message = Message()
for key, value in headers.items():
message[key] = value
return message
def test_list_environments_includes_expected_entries() -> None: def test_list_environments_includes_expected_entries() -> None:
@ -151,3 +238,185 @@ def test_environment_store_prunes_stale_entries(tmp_path: Path) -> None:
result = store.prune_environments() result = store.prune_environments()
assert result["count"] == 5 assert result["count"] == 5
def test_fetch_oci_manifest_resolves_linux_amd64_index_with_bearer_auth(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
runtime_paths = _fake_runtime_paths(tmp_path)
store = EnvironmentStore(runtime_paths=runtime_paths, cache_dir=tmp_path / "cache")
spec = VmEnvironment(
name="debian:12-ghcr",
version="1.0.0",
description="OCI-backed environment",
default_packages=("bash", "git"),
distribution="debian",
distribution_version="12",
source_profile="missing-profile",
oci_registry="ghcr.io",
oci_repository="thaloco/pyro-environments/debian-12",
oci_reference="1.0.0",
)
child_manifest = {
"schemaVersion": 2,
"mediaType": "application/vnd.oci.image.manifest.v1+json",
"layers": [],
}
child_payload = json.dumps(child_manifest).encode("utf-8")
child_digest = _sha256_digest(child_payload)
index_manifest = {
"schemaVersion": 2,
"mediaType": "application/vnd.oci.image.index.v1+json",
"manifests": [
{
"digest": "sha256:arm64digest",
"mediaType": "application/vnd.oci.image.manifest.v1+json",
"platform": {"os": "linux", "architecture": "arm64"},
},
{
"digest": child_digest,
"mediaType": "application/vnd.oci.image.manifest.v1+json",
"platform": {"os": "linux", "architecture": "amd64"},
},
],
}
index_payload = json.dumps(index_manifest).encode("utf-8")
index_digest = _sha256_digest(index_payload)
authorized_urls: list[str] = []
def fake_urlopen(request: object, timeout: int = 90) -> FakeResponse:
del timeout
url = request.full_url if isinstance(request, urllib.request.Request) else str(request)
if url.startswith("https://ghcr.io/token?"):
return FakeResponse(b'{"token":"secret-token"}')
authorization = _authorization_header(request)
if url.endswith("/manifests/1.0.0"):
if authorization is None:
raise urllib.error.HTTPError(
url,
401,
"Unauthorized",
_http_headers(
{
"WWW-Authenticate": (
'Bearer realm="https://ghcr.io/token",'
'service="ghcr.io",'
'scope="repository:thaloco/pyro-environments/debian-12:pull"'
)
}
),
io.BytesIO(b""),
)
authorized_urls.append(url)
return FakeResponse(
index_payload,
headers={"Docker-Content-Digest": index_digest},
)
if url.endswith(f"/manifests/{child_digest}"):
if authorization is None:
raise urllib.error.HTTPError(
url,
401,
"Unauthorized",
_http_headers(
{
"WWW-Authenticate": (
'Bearer realm="https://ghcr.io/token",'
'service="ghcr.io",'
'scope="repository:thaloco/pyro-environments/debian-12:pull"'
)
}
),
io.BytesIO(b""),
)
authorized_urls.append(url)
assert authorization == "Bearer secret-token"
return FakeResponse(
child_payload,
headers={"Docker-Content-Digest": child_digest},
)
raise AssertionError(f"unexpected OCI request: {url}")
monkeypatch.setattr(urllib.request, "urlopen", fake_urlopen)
manifest, resolved_digest = store._fetch_oci_manifest(spec) # noqa: SLF001
assert manifest == child_manifest
assert resolved_digest == child_digest
assert authorized_urls == [
"https://ghcr.io/v2/thaloco/pyro-environments/debian-12/manifests/1.0.0",
f"https://ghcr.io/v2/thaloco/pyro-environments/debian-12/manifests/{child_digest}",
]
def test_environment_store_installs_from_oci_when_runtime_source_missing(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
runtime_paths = _fake_runtime_paths(tmp_path)
kernel_layer = _layer_archive("vmlinux", b"kernel\n")
rootfs_layer = _layer_archive("rootfs.ext4", b"rootfs\n")
kernel_digest = _sha256_digest(kernel_layer)
rootfs_digest = _sha256_digest(rootfs_layer)
manifest = {
"schemaVersion": 2,
"mediaType": "application/vnd.oci.image.manifest.v1+json",
"layers": [
{
"mediaType": "application/vnd.oci.image.layer.v1.tar+gzip",
"digest": kernel_digest,
"size": len(kernel_layer),
},
{
"mediaType": "application/vnd.oci.image.layer.v1.tar+gzip",
"digest": rootfs_digest,
"size": len(rootfs_layer),
},
],
}
manifest_payload = json.dumps(manifest).encode("utf-8")
manifest_digest = _sha256_digest(manifest_payload)
environment = VmEnvironment(
name="debian:12-ghcr",
version="1.0.0",
description="OCI-backed environment",
default_packages=("bash", "git"),
distribution="debian",
distribution_version="12",
source_profile="missing-profile",
oci_registry="ghcr.io",
oci_repository="thaloco/pyro-environments/debian-12",
oci_reference="1.0.0",
)
def fake_urlopen(request: object, timeout: int = 90) -> FakeResponse:
del timeout
url = request.full_url if isinstance(request, urllib.request.Request) else str(request)
if url.endswith("/manifests/1.0.0"):
return FakeResponse(
manifest_payload,
headers={"Docker-Content-Digest": manifest_digest},
)
if url.endswith(f"/blobs/{kernel_digest}"):
return FakeResponse(kernel_layer)
if url.endswith(f"/blobs/{rootfs_digest}"):
return FakeResponse(rootfs_layer)
raise AssertionError(f"unexpected OCI request: {url}")
monkeypatch.setattr(urllib.request, "urlopen", fake_urlopen)
monkeypatch.setattr(
"pyro_mcp.vm_environments.CATALOG",
{environment.name: environment},
)
store = EnvironmentStore(runtime_paths=runtime_paths, cache_dir=tmp_path / "cache")
installed = store.ensure_installed(environment.name)
assert installed.kernel_image.read_text(encoding="utf-8") == "kernel\n"
assert installed.rootfs_image.read_text(encoding="utf-8") == "rootfs\n"
assert installed.source == (
"oci://ghcr.io/thaloco/pyro-environments/debian-12"
f"@{manifest_digest}"
)
metadata = json.loads((installed.install_dir / "environment.json").read_text(encoding="utf-8"))
assert metadata["source_digest"] == manifest_digest

2
uv.lock generated
View file

@ -706,7 +706,7 @@ crypto = [
[[package]] [[package]]
name = "pyro-mcp" name = "pyro-mcp"
version = "0.1.0" version = "1.0.0"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "mcp" }, { name = "mcp" },