Add direct GHCR environment pulls

This commit is contained in:
Thales Maciel 2026-03-08 16:08:01 -03:00
parent 5d5243df23
commit 75082467f9
5 changed files with 346 additions and 29 deletions

View file

@ -42,7 +42,8 @@ Current curated environments in this repository:
- `debian:12-build`
The package ships the embedded Firecracker runtime and a package-controlled environment catalog.
Environment artifacts are installed into a local cache on first use or through `pyro env pull`.
Official environments are pulled as OCI artifacts from GHCR into a local cache on first use or
through `pyro env pull`.
## CLI

View file

@ -5,7 +5,7 @@
Cause:
- the environment cache directory is not writable
- the configured environment source is unavailable
- the configured GHCR environment artifact is unavailable
- the environment download was interrupted
Fix:

View file

@ -2,6 +2,7 @@
from __future__ import annotations
import hashlib
import json
import os
import shutil
@ -27,6 +28,7 @@ OCI_MANIFEST_ACCEPT = ", ".join(
"application/vnd.docker.distribution.manifest.v2+json",
)
)
OCI_READ_CHUNK_SIZE = 1024 * 1024
@dataclass(frozen=True)
@ -381,6 +383,8 @@ class EnvironmentStore:
def _install_from_oci(self, spec: VmEnvironment) -> InstalledEnvironment:
install_dir = self._install_dir(spec)
temp_dir = Path(tempfile.mkdtemp(prefix=".partial-", dir=self._platform_dir))
resolved_digest: str | None = None
source = "oci://unknown"
try:
manifest, resolved_digest = self._fetch_oci_manifest(spec)
layers = manifest.get("layers")
@ -402,13 +406,12 @@ class EnvironmentStore:
shutil.move(str(kernel_image), temp_dir / "vmlinux")
if rootfs_image.parent != temp_dir:
shutil.move(str(rootfs_image), temp_dir / "rootfs.ext4")
source = (
f"oci://{spec.oci_registry}/{spec.oci_repository}:{spec.oci_reference}"
if spec.oci_registry is not None
and spec.oci_repository is not None
and spec.oci_reference is not None
else "oci://unknown"
)
if spec.oci_registry is not None and spec.oci_repository is not None:
source = f"oci://{spec.oci_registry}/{spec.oci_repository}"
if resolved_digest is not None:
source = f"{source}@{resolved_digest}"
elif spec.oci_reference is not None:
source = f"{source}:{spec.oci_reference}"
self._write_install_manifest(
temp_dir,
spec=spec,
@ -487,12 +490,14 @@ class EnvironmentStore:
manifest = json.loads(payload.decode("utf-8"))
if not isinstance(manifest, dict):
raise RuntimeError("OCI manifest response was not a JSON object")
resolved_digest = response_headers.get("Docker-Content-Digest")
resolved_digest = response_headers.get("docker-content-digest")
if resolved_digest is not None:
self._verify_digest_bytes(payload, resolved_digest)
media_type = manifest.get("mediaType")
if media_type in {
"application/vnd.oci.image.index.v1+json",
"application/vnd.docker.distribution.manifest.list.v2+json",
}:
} or isinstance(manifest.get("manifests"), list):
manifests = manifest.get("manifests")
if not isinstance(manifests, list):
raise RuntimeError("OCI index did not contain manifests")
@ -509,13 +514,19 @@ class EnvironmentStore:
manifest = json.loads(payload.decode("utf-8"))
if not isinstance(manifest, dict):
raise RuntimeError("OCI child manifest response was not a JSON object")
resolved_digest = response_headers.get("Docker-Content-Digest") or selected
resolved_digest = response_headers.get("docker-content-digest") or selected
self._verify_digest_bytes(payload, resolved_digest)
return manifest, resolved_digest
def _download_oci_blob(self, spec: VmEnvironment, digest: str, dest: Path) -> None:
if spec.oci_registry is None or spec.oci_repository is None:
raise RuntimeError("OCI source metadata is incomplete")
payload, _ = self._request_bytes(
digest_algorithm, digest_value = self._split_digest(digest)
if digest_algorithm != "sha256":
raise RuntimeError(f"unsupported OCI blob digest algorithm: {digest_algorithm}")
hasher = hashlib.sha256()
with (
self._open_request(
self._oci_url(
spec.oci_registry,
spec.oci_repository,
@ -523,8 +534,19 @@ class EnvironmentStore:
),
headers={},
repository=spec.oci_repository,
) as response,
dest.open("wb") as handle,
):
while True:
chunk = response.read(OCI_READ_CHUNK_SIZE)
if not chunk:
break
hasher.update(chunk)
handle.write(chunk)
if hasher.hexdigest() != digest_value:
raise RuntimeError(
f"OCI blob digest mismatch for {digest}; got sha256:{hasher.hexdigest()}"
)
dest.write_bytes(payload)
def _request_bytes(
self,
@ -533,10 +555,19 @@ class EnvironmentStore:
headers: dict[str, str],
repository: str,
) -> tuple[bytes, dict[str, str]]:
with self._open_request(url, headers=headers, repository=repository) as response:
return response.read(), {key.lower(): value for key, value in response.headers.items()}
def _open_request(
self,
url: str,
*,
headers: dict[str, str],
repository: str,
) -> Any:
request = urllib.request.Request(url, headers=headers, method="GET")
try:
with urllib.request.urlopen(request, timeout=90) as response: # noqa: S310
return response.read(), dict(response.headers.items())
return urllib.request.urlopen(request, timeout=90) # noqa: S310
except urllib.error.HTTPError as exc:
if exc.code != 401:
raise RuntimeError(f"failed to fetch OCI resource {url}: {exc}") from exc
@ -549,8 +580,10 @@ class EnvironmentStore:
headers={**headers, "Authorization": f"Bearer {token}"},
method="GET",
)
with urllib.request.urlopen(authenticated_request, timeout=90) as response: # noqa: S310
return response.read(), dict(response.headers.items())
try:
return urllib.request.urlopen(authenticated_request, timeout=90) # noqa: S310
except urllib.error.HTTPError as auth_exc:
raise RuntimeError(f"failed to fetch OCI resource {url}: {auth_exc}") from auth_exc
def _fetch_registry_token(self, authenticate: str, repository: str) -> str:
if not authenticate.startswith("Bearer "):
@ -613,3 +646,17 @@ class EnvironmentStore:
def _oci_url(self, registry: str, repository: str, suffix: str) -> str:
return f"https://{registry}/v2/{repository}/{suffix}"
def _split_digest(self, digest: str) -> tuple[str, str]:
algorithm, separator, value = digest.partition(":")
if separator == "" or value == "":
raise RuntimeError(f"invalid OCI digest: {digest}")
return algorithm, value
def _verify_digest_bytes(self, payload: bytes, digest: str) -> None:
algorithm, value = self._split_digest(digest)
if algorithm != "sha256":
raise RuntimeError(f"unsupported OCI digest algorithm: {algorithm}")
actual = hashlib.sha256(payload).hexdigest()
if actual != value:
raise RuntimeError(f"OCI digest mismatch for {digest}; got sha256:{actual}")

View file

@ -1,12 +1,99 @@
from __future__ import annotations
import hashlib
import io
import json
import tarfile
import urllib.error
import urllib.request
from email.message import Message
from pathlib import Path
import pytest
from pyro_mcp.runtime import resolve_runtime_paths
from pyro_mcp.vm_environments import EnvironmentStore, get_environment, list_environments
from pyro_mcp.runtime import RuntimePaths, resolve_runtime_paths
from pyro_mcp.vm_environments import (
EnvironmentStore,
VmEnvironment,
get_environment,
list_environments,
)
class FakeResponse:
def __init__(self, payload: bytes, *, headers: dict[str, str] | None = None) -> None:
self._buffer = io.BytesIO(payload)
self.headers = headers or {}
def read(self, size: int = -1) -> bytes:
return self._buffer.read(size)
def __enter__(self) -> FakeResponse:
return self
def __exit__(self, exc_type: object, exc: object, tb: object) -> None:
del exc_type, exc, tb
def _fake_runtime_paths(tmp_path: Path) -> RuntimePaths:
bundle_parent = tmp_path / "runtime"
bundle_root = bundle_parent / "linux-x86_64"
manifest_path = bundle_root / "manifest.json"
firecracker_bin = bundle_root / "bin" / "firecracker"
jailer_bin = bundle_root / "bin" / "jailer"
guest_agent_path = bundle_root / "guest" / "pyro_guest_agent.py"
artifacts_dir = bundle_root / "profiles"
notice_path = bundle_parent / "NOTICE"
artifacts_dir.mkdir(parents=True, exist_ok=True)
firecracker_bin.parent.mkdir(parents=True, exist_ok=True)
jailer_bin.parent.mkdir(parents=True, exist_ok=True)
guest_agent_path.parent.mkdir(parents=True, exist_ok=True)
manifest_path.write_text('{"platform": "linux-x86_64"}\n', encoding="utf-8")
firecracker_bin.write_text("firecracker\n", encoding="utf-8")
jailer_bin.write_text("jailer\n", encoding="utf-8")
guest_agent_path.write_text("print('guest')\n", encoding="utf-8")
notice_path.write_text("notice\n", encoding="utf-8")
return RuntimePaths(
bundle_root=bundle_root,
manifest_path=manifest_path,
firecracker_bin=firecracker_bin,
jailer_bin=jailer_bin,
guest_agent_path=guest_agent_path,
artifacts_dir=artifacts_dir,
notice_path=notice_path,
manifest={"platform": "linux-x86_64"},
)
def _sha256_digest(payload: bytes) -> str:
return f"sha256:{hashlib.sha256(payload).hexdigest()}"
def _layer_archive(filename: str, content: bytes) -> bytes:
archive_buffer = io.BytesIO()
with tarfile.open(fileobj=archive_buffer, mode="w:gz") as archive:
info = tarfile.TarInfo(name=filename)
info.size = len(content)
archive.addfile(info, io.BytesIO(content))
return archive_buffer.getvalue()
def _authorization_header(request: object) -> str | None:
if isinstance(request, urllib.request.Request):
for key, value in request.header_items():
if key.lower() == "authorization":
return value
return None
def _http_headers(headers: dict[str, str]) -> Message:
message = Message()
for key, value in headers.items():
message[key] = value
return message
def test_list_environments_includes_expected_entries() -> None:
@ -151,3 +238,185 @@ def test_environment_store_prunes_stale_entries(tmp_path: Path) -> None:
result = store.prune_environments()
assert result["count"] == 5
def test_fetch_oci_manifest_resolves_linux_amd64_index_with_bearer_auth(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
runtime_paths = _fake_runtime_paths(tmp_path)
store = EnvironmentStore(runtime_paths=runtime_paths, cache_dir=tmp_path / "cache")
spec = VmEnvironment(
name="debian:12-ghcr",
version="1.0.0",
description="OCI-backed environment",
default_packages=("bash", "git"),
distribution="debian",
distribution_version="12",
source_profile="missing-profile",
oci_registry="ghcr.io",
oci_repository="thaloco/pyro-environments/debian-12",
oci_reference="1.0.0",
)
child_manifest = {
"schemaVersion": 2,
"mediaType": "application/vnd.oci.image.manifest.v1+json",
"layers": [],
}
child_payload = json.dumps(child_manifest).encode("utf-8")
child_digest = _sha256_digest(child_payload)
index_manifest = {
"schemaVersion": 2,
"mediaType": "application/vnd.oci.image.index.v1+json",
"manifests": [
{
"digest": "sha256:arm64digest",
"mediaType": "application/vnd.oci.image.manifest.v1+json",
"platform": {"os": "linux", "architecture": "arm64"},
},
{
"digest": child_digest,
"mediaType": "application/vnd.oci.image.manifest.v1+json",
"platform": {"os": "linux", "architecture": "amd64"},
},
],
}
index_payload = json.dumps(index_manifest).encode("utf-8")
index_digest = _sha256_digest(index_payload)
authorized_urls: list[str] = []
def fake_urlopen(request: object, timeout: int = 90) -> FakeResponse:
del timeout
url = request.full_url if isinstance(request, urllib.request.Request) else str(request)
if url.startswith("https://ghcr.io/token?"):
return FakeResponse(b'{"token":"secret-token"}')
authorization = _authorization_header(request)
if url.endswith("/manifests/1.0.0"):
if authorization is None:
raise urllib.error.HTTPError(
url,
401,
"Unauthorized",
_http_headers(
{
"WWW-Authenticate": (
'Bearer realm="https://ghcr.io/token",'
'service="ghcr.io",'
'scope="repository:thaloco/pyro-environments/debian-12:pull"'
)
}
),
io.BytesIO(b""),
)
authorized_urls.append(url)
return FakeResponse(
index_payload,
headers={"Docker-Content-Digest": index_digest},
)
if url.endswith(f"/manifests/{child_digest}"):
if authorization is None:
raise urllib.error.HTTPError(
url,
401,
"Unauthorized",
_http_headers(
{
"WWW-Authenticate": (
'Bearer realm="https://ghcr.io/token",'
'service="ghcr.io",'
'scope="repository:thaloco/pyro-environments/debian-12:pull"'
)
}
),
io.BytesIO(b""),
)
authorized_urls.append(url)
assert authorization == "Bearer secret-token"
return FakeResponse(
child_payload,
headers={"Docker-Content-Digest": child_digest},
)
raise AssertionError(f"unexpected OCI request: {url}")
monkeypatch.setattr(urllib.request, "urlopen", fake_urlopen)
manifest, resolved_digest = store._fetch_oci_manifest(spec) # noqa: SLF001
assert manifest == child_manifest
assert resolved_digest == child_digest
assert authorized_urls == [
"https://ghcr.io/v2/thaloco/pyro-environments/debian-12/manifests/1.0.0",
f"https://ghcr.io/v2/thaloco/pyro-environments/debian-12/manifests/{child_digest}",
]
def test_environment_store_installs_from_oci_when_runtime_source_missing(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
runtime_paths = _fake_runtime_paths(tmp_path)
kernel_layer = _layer_archive("vmlinux", b"kernel\n")
rootfs_layer = _layer_archive("rootfs.ext4", b"rootfs\n")
kernel_digest = _sha256_digest(kernel_layer)
rootfs_digest = _sha256_digest(rootfs_layer)
manifest = {
"schemaVersion": 2,
"mediaType": "application/vnd.oci.image.manifest.v1+json",
"layers": [
{
"mediaType": "application/vnd.oci.image.layer.v1.tar+gzip",
"digest": kernel_digest,
"size": len(kernel_layer),
},
{
"mediaType": "application/vnd.oci.image.layer.v1.tar+gzip",
"digest": rootfs_digest,
"size": len(rootfs_layer),
},
],
}
manifest_payload = json.dumps(manifest).encode("utf-8")
manifest_digest = _sha256_digest(manifest_payload)
environment = VmEnvironment(
name="debian:12-ghcr",
version="1.0.0",
description="OCI-backed environment",
default_packages=("bash", "git"),
distribution="debian",
distribution_version="12",
source_profile="missing-profile",
oci_registry="ghcr.io",
oci_repository="thaloco/pyro-environments/debian-12",
oci_reference="1.0.0",
)
def fake_urlopen(request: object, timeout: int = 90) -> FakeResponse:
del timeout
url = request.full_url if isinstance(request, urllib.request.Request) else str(request)
if url.endswith("/manifests/1.0.0"):
return FakeResponse(
manifest_payload,
headers={"Docker-Content-Digest": manifest_digest},
)
if url.endswith(f"/blobs/{kernel_digest}"):
return FakeResponse(kernel_layer)
if url.endswith(f"/blobs/{rootfs_digest}"):
return FakeResponse(rootfs_layer)
raise AssertionError(f"unexpected OCI request: {url}")
monkeypatch.setattr(urllib.request, "urlopen", fake_urlopen)
monkeypatch.setattr(
"pyro_mcp.vm_environments.CATALOG",
{environment.name: environment},
)
store = EnvironmentStore(runtime_paths=runtime_paths, cache_dir=tmp_path / "cache")
installed = store.ensure_installed(environment.name)
assert installed.kernel_image.read_text(encoding="utf-8") == "kernel\n"
assert installed.rootfs_image.read_text(encoding="utf-8") == "rootfs\n"
assert installed.source == (
"oci://ghcr.io/thaloco/pyro-environments/debian-12"
f"@{manifest_digest}"
)
metadata = json.loads((installed.install_dir / "environment.json").read_text(encoding="utf-8"))
assert metadata["source_digest"] == manifest_digest

2
uv.lock generated
View file

@ -706,7 +706,7 @@ crypto = [
[[package]]
name = "pyro-mcp"
version = "0.1.0"
version = "1.0.0"
source = { editable = "." }
dependencies = [
{ name = "mcp" },