release: prep v0.1.10 changelog

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
readme: add demo gif
2026-05-03 18:08:48 -03:00 · 2026-05-03 18:08:42 -03:00 · 2026-05-03 17:57:26 -03:00 · 2026-05-03 17:51:22 -03:00 · 2026-05-02 15:54:07 -03:00 · 2026-05-02 14:39:46 -03:00
279 changed files with 50615 additions and 11472 deletions
--- a/.githooks/pre-commit
+++ b/.githooks/pre-commit
@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+# pre-commit gate. Runs lint (gofmt -l + go vet + shellcheck), unit
+# tests, and a build before any commit lands. Activate once via
+# `make install-hooks`, which points core.hooksPath at this directory.
+#
+# Bypass for in-flight WIP commits with `git commit --no-verify`.
+set -euo pipefail
+
+# Resolve repo root so the hook works from any subdirectory.
+repo_root="$(git rev-parse --show-toplevel)"
+cd "$repo_root"
+
+# `make lint` already wraps `gofmt -l`, `go vet`, and shellcheck.
+echo '[pre-commit] lint'
+make --no-print-directory lint
+
+echo '[pre-commit] test'
+make --no-print-directory test
+
+echo '[pre-commit] build'
+make --no-print-directory build
+
+echo '[pre-commit] ok'
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,5 @@
 state/
+/build/
 /runtime/
 /dist/
 /banger
@ -11,3 +12,11 @@ state/
 squashfs-root/
 rootfs*
 wtf/*.deb
+*.pem
+*.key
+id_rsa
+.env
+/todos
+/coverage.out
+/coverage.html
+/.codex
--- a/AGENTS.md
+++ b/AGENTS.md
@ -1,52 +1,64 @@
 # Repository Guidelines

-## Project Structure & Module Organization
- `cmd/banger` and `cmd/bangerd` are the primary user-facing entrypoints.
- `internal/` contains the daemon, CLI, RPC, storage, Firecracker, and system integration code.
- The VM lifecycle is now organized around daemon capabilities plus a structured guest-config builder. New host-integrated VM features should plug into that Go path instead of adding more one-off branches through `internal/daemon/vm.go`.
- `customize.sh`, `make-rootfs.sh`, and `interactive.sh` remain as manual rootfs/customization helpers; normal VM lifecycle, NAT, `.vm` DNS, and daemon-driven image builds are handled by the Go control plane.
- Source checkouts use a generated `./runtime/` bundle for Firecracker, kernels, modules, rootfs images, and helper copies. Bundle defaults come from `./runtime/bundle.json` when present. Those runtime artifacts are not meant to be tracked directly in Git.
- The daemon keeps state under XDG directories rather than the old repo-local `state/` layout.
+Always run `make build` before commit.

-## Build, Test, and Development Commands
- `make build` builds `./banger`, `./bangerd`, and the bundled `./runtime/banger-vsock-agent` guest helper.
- `make bench-create` benchmarks `vm create` and first-SSH readiness on the current host.
- `make runtime-bundle` bootstraps `./runtime/` from the archive referenced by `RUNTIME_MANIFEST`; the checked-in `runtime-bundle.toml` is only a template.
- `make rootfs-void` builds an experimental local-only `x86_64-glibc` Void rootfs plus work-seed under `./runtime/`; it does not replace the default Debian path or teach `banger image build` about Void.
- `make verify-void` registers `void-exp` and runs the normal smoke test against that image.
- `banger` validates required host tools per command and reports actionable missing-tool errors; do not assume one workstation's package set.
- `./banger vm create --name testbox` creates and starts a VM.
- `./banger vm ssh testbox` connects to a running guest using the runtime bundle SSH key and reminds the user if the VM is still running when the session exits.
- `./banger vm stop testbox` stops a VM while preserving its disks.
- `./banger vm stop vm-a vm-b vm-c` and `./banger vm set --nat web-1 web-2` are supported; multi-VM lifecycle and `set` actions fan out concurrently through the CLI.
- `./banger doctor` reports runtime bundle, host tool, feature, and image-build readiness from the same Go checks used by the daemon.
- `./banger image register --name local --rootfs /abs/path/rootfs.ext4` creates or updates an unmanaged image record without changing the default image config; use it for experimental guest iteration paths such as Void.
- `./banger tui` launches the terminal UI.
+## Project Structure
+
+- `cmd/banger`, `cmd/bangerd`, and `cmd/banger-vsock-agent` are the three binaries. The first two are user-facing; the third is a companion that ships inside each guest VM.
+- `internal/` contains the daemon, CLI, RPC, storage, Firecracker integration, and guest helpers.
+- `internal/daemon/` is the composition root; pure helpers live in its subpackages (`opstate`, `dmsnap`, `fcproc`, `imagemgr`, `workspace`). See `internal/daemon/ARCHITECTURE.md`.
+- `internal/imagecat/` and `internal/kernelcat/` embed the image + kernel catalogs.
+- `images/golden/` is the Dockerfile for the `debian-bookworm` catalog entry.
+- `scripts/` contains manual helper workflows for rootfs, kernel, and bundle preparation.
+- `build/bin/` is the canonical source-checkout build output.
+- `build/manual/` is the canonical source-checkout location for manual rootfs/kernel artifacts.
+
+## Build and Test
+
+- `make build` builds `./build/bin/banger`, `./build/bin/bangerd`, and `./build/bin/banger-vsock-agent`.
 - `make test` runs `go test ./...`.
- `./verify.sh` runs the smoke test for the Go VM workflow.
+- `make lint` runs `gofmt -l`, `go vet ./...`, and `shellcheck --severity=error` on `scripts/*.sh`. Run before commits.
+- `./build/bin/banger doctor` checks host readiness.
+- `./build/bin/banger vm run` is the primary user-facing entry point — auto-pulls the default image + kernel from the catalogs if missing.
+- `./build/bin/banger image pull <name>` uses the bundle catalog (fast) when `<name>` is a catalog entry, or falls through to the OCI path for arbitrary registry refs. See `docs/image-catalog.md` and `docs/oci-import.md`.
+- `./build/bin/banger image register ...` registers an unmanaged host-side image stack.
+- `./build/bin/banger image promote <image>` copies an unmanaged image into daemon-owned managed artifacts.
+- `scripts/make-generic-kernel.sh` builds a Firecracker-optimized vmlinux from upstream sources. `scripts/publish-kernel.sh <name>` publishes it to the kernel catalog.
+- `scripts/publish-golden-image.sh` rebuilds + publishes the golden image bundle and patches the image catalog.
+- `scripts/publish-banger-release.sh <vX.Y.Z>` cuts a banger release. Full runbook in `docs/release-process.md`.

-## Coding Style & Naming Conventions
- Go code should stay small, direct, and standard-library-first unless there is a clear reason otherwise.
- Shell helpers use Bash with `set -euo pipefail`; keep remaining shell scripts strict and explicit.
- Prefer lowercase filenames with short descriptive names.
- Use `gofmt` for Go formatting; no extra formatter is configured for shell files.
+## Image Model

-## Testing Guidelines
- Primary automated coverage is `go test ./...`.
- Manual verification for VM lifecycle changes: `./banger vm create`, confirm SSH access, then stop/delete the VM.
- For host-integration changes, run `./banger doctor` as a quick readiness check before the live VM smoke.
- Rebuilt images now include `mise`, `opencode`, `tmux-resurrect`/`tmux-continuum` defaults for `root`, and the `banger-vsock-agent` service used by the SSH reminder and guest health-check path; if you change guest provisioning, document whether users need to rebuild `./runtime/rootfs-docker.ext4` or another base image to pick it up.
- The experimental Void rootfs path now includes the repo's basic dev baseline plus Docker and Compose, alongside boot, SSH, the vsock HTTP health agent, pinned `mise` plus `opencode` for `root`, a `bash` root shell while leaving `/bin/sh` alone, and the `/root` work-seed. Keep further baked-in tooling deliberate and user-driven.
- Rebuilt images also emit a `work-seed.ext4` sidecar used to speed up future VM creates. If you touch `/root` provisioning, verify both the rootfs and the work-seed output.
- The daemon may keep idle TAP devices in a pool for faster creates. Smoke tests should treat `tap-pool-*` devices as reusable capacity, not cleanup leaks.
- If you add a new operational workflow, document how to exercise it in `README.md`.
- For NAT changes, verify both guest outbound access and host rule cleanup, for example with `./verify.sh --nat`.
+- Managed images own the full boot set: rootfs, optional work-seed, kernel, optional initrd, and optional modules.
+- The image catalog ships pre-built bundles. `vm run` auto-pulls the default catalog entry; `image pull <name>` can be invoked explicitly.
+- `default_image_name` defaults to `debian-bookworm`. On miss, the daemon auto-pulls from `imagecat` before surfacing "not found".
+- Kernel references follow the same auto-pull pattern against `kernelcat`.

-## Commit & Pull Request Guidelines
- Git history uses short, imperative subjects.
- Prefer a real commit body when the change affects lifecycle behavior, storage semantics, or host integration.
- PRs should call out runtime requirements, migration impact, and any host-side verification performed.
+## Config

-## Security & Configuration Tips
- The VM workflow requires `sudo` and `/dev/kvm` access; do not commit secrets.
- `id_ed25519` lives inside the runtime bundle; rotate or replace it before publishing a shared bundle.
+- Config lives at `~/.config/banger/config.toml`.
+- Firecracker comes from `PATH` by default, or `firecracker_bin`.
+- SSH uses `ssh_key_path` or an auto-managed default key at `~/.local/state/banger/ssh/id_ed25519`.
+
+## Coding Style
+
+- Prefer small, direct Go code and standard library solutions.
+- Keep shell scripts strict with `set -euo pipefail`.
+- Use `gofmt` for Go formatting.
+- When a CLI accepts either an inline string or a file input, always prefer the file-based form.
+- For shell commands and AI/LLM tooling, prefer passing files as input whenever the CLI allows it.
+- Create temporary files as needed to follow the file-first rule.
+- Examples: use `git commit -F <file>` instead of `git commit -m <message>`, and use prompt files instead of inline prompt strings when invoking LLM CLIs.
+
+## Testing Guidance
+
+- Primary automated coverage is `go test ./...` (wired through `make test`).
+- `make coverage` runs the suite with `-coverpkg=./...` and prints per-package averages plus a total; `make coverage-html` writes a browsable report to `coverage.html`; `make coverage-total` prints just the total (for scripts/CI).
+- For lifecycle changes, smoke-test with `vm run` end-to-end (covers create + start + boot + ssh).
+- If guest provisioning changes, document whether existing images must be rebuilt or recreated.
+
+## Security
+
+- Do not commit secrets.
+- VM workflows require `sudo` and `/dev/kvm`.
+- The default SSH key is local configuration, not a checked-in runtime artifact.
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -0,0 +1,326 @@
+# Changelog
+
+All notable changes to banger are documented here. The format is based
+on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this
+project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+The version line printed by `banger version` is the canonical reference
+for what's installed; this file is the canonical reference for what
+changed between versions.
+
+## [Unreleased]
+
+## [v0.1.10] - 2026-05-03
+
+### Added
+
+- README now includes an animated demo GIF showing the typical
+  sandbox lifecycle (`vm run`, host-side `ssh demo.vm`, stop/start
+  with file persistence, `vm exec`, `curl http://demo.vm`). The
+  recording script lives at `assets/demo.tape` and is rendered with
+  [VHS](https://github.com/charmbracelet/vhs).
+
+## [v0.1.9] - 2026-05-01
+
+### Fixed
+
+- `vm exec` no longer falls back to `cd /root/repo` on VMs that have
+  no recorded workspace. Previously, running `vm exec` against a plain
+  VM (one that never had `vm workspace prepare` / `vm run ./repo`)
+  blew up with `cd: /root/repo: No such file or directory` — surfaced
+  via the login shell's mise activate hook because `bash -lc` sources
+  profile.d before the explicit cd. Now the auto-cd only fires when
+  the user passes `--guest-path` or the VM actually has a workspace
+  recorded; otherwise the command runs from root's home. Mise wrapping
+  is unchanged — without a `.mise.toml` it's a no-op.
+
+### Changed
+
+- `vm exec --guest-path` default in `--help` now reads "from last
+  workspace prepare; otherwise root's home" (was "or /root/repo").
+  Anyone who relied on the implicit `/root/repo` default for a VM that
+  has a repo there but no workspace record must now pass
+  `--guest-path /root/repo` explicitly.
+
+### Notes
+
+- Internal: smoke-test harness ported from `scripts/smoke.sh` to a
+  Go test suite under `internal/smoketest`. `make smoke` is unchanged
+  for maintainers; no user-visible effect.
+
+## [v0.1.8] - 2026-05-01
+
+### Fixed
+
+- `<vm>.vm` resolution from the host (NSS path: curl, ssh hostname,
+  etc.) now works on systemd-resolved hosts. The root helper's
+  `validateResolverAddr` was rejecting the `host:port` form
+  (`127.0.0.1:42069`) that banger constructs to point resolved at the
+  in-process DNS server, so the auto-wire silently failed at every
+  daemon startup. `dig @127.0.0.1` worked because that bypasses NSS;
+  any tool going through glibc's resolver chain didn't.
+- Validator now accepts both bare IPs and `IP:port` (matching what
+  `resolvectl dns` itself accepts) with new test coverage for the
+  port'd form.
+
+### Notes
+
+- Existing v0.1.x installs that already booted with the broken
+  validator have stale per-link resolved state. After updating to
+  v0.1.8, run `sudo banger system restart` once to re-trigger the
+  auto-wire, or restart the host. systemd-resolved restarts also
+  wipe per-link state — banger restores it on its own daemon
+  startup but won't re-run for an already-running daemon.
+
+## [v0.1.7] - 2026-05-01
+
+### Added
+
+- `vm run -d` / `--detach` creates the VM, runs workspace prep + tooling
+  bootstrap, then exits without attaching to ssh. Reconnect later with
+  `banger vm ssh <name>`. The combos `-d --rm` and `-d -- <cmd>` are
+  rejected before VM creation.
+- `vm run --no-bootstrap` skips the mise tooling install entirely; useful
+  when a workspace has a `.mise.toml` you don't want banger to act on.
+- `banger doctor --verbose` / `-v` prints every check with details.
+  Without it, doctor's default output now collapses (see Changed).
+
+### Changed
+
+- **`vm run` refuses early when bootstrap can't succeed.** Previously, a
+  workspace containing `.mise.toml` or `.tool-versions` without `--nat`
+  set silently failed the bootstrap into a log file and dropped you into
+  ssh with tools missing. It now refuses before VM creation with
+  `tooling bootstrap requires --nat (or pass --no-bootstrap to skip)`.
+  Existing scripts that relied on the silent-failure path will need to
+  add `--nat` or `--no-bootstrap`.
+- **`banger doctor` default output is now compact.** A healthy host
+  collapses to a single line (`all N checks passed`); failing or warning
+  checks print only the affected entries plus a summary footer
+  (`N passed, M warnings, K failures`). Pass `--verbose` for the full
+  per-check output. Anything parsing the previous always-verbose output
+  needs to switch to `doctor --verbose`.
+
+### Fixed
+
+- The detached bootstrap path runs synchronously (foreground, tee'd to
+  the existing log file) so the CLI only returns once installs finish.
+  Interactive mode keeps today's nohup'd background behaviour so the ssh
+  session starts promptly.
+
+## [v0.1.6] - 2026-04-29
+
+### Fixed
+
+- v0.1.4's "running VMs survive daemon restart" fix was incomplete:
+  the binary-level reconcile path was correct, but `/run/banger` (the
+  daemon's runtime dir) was being wiped on every daemon stop because
+  systemd defaults to `RuntimeDirectoryPreserve=no`. The api-sock
+  symlinks the helper had created for live VMs vanished with it,
+  and `findByJailerPidfile` couldn't resolve them to find the chroot
+  + pidfile. v0.1.6 sets `RuntimeDirectoryPreserve=yes` on both
+  unit templates so the symlinks (and helper RPC sock) survive
+  the restart window. Live-verified: FC PID and guest boot_id both
+  unchanged across a full helper+daemon restart cycle with a VM
+  running.
+- v0.1.4's CHANGELOG correction stands: existing v0.1.x installs
+  (where x < 6) need a one-time `sudo banger system install` after
+  updating to v0.1.6 to pick up both the new `KillMode=process` and
+  the new `RuntimeDirectoryPreserve=yes` directives. `banger update`
+  swaps binaries, not unit files.
+
+## [v0.1.5] - 2026-04-29
+
+No functional changes. Verification release for v0.1.4: the previous
+release shipped the running-VMs-survive-update fix, but updating
+*to* v0.1.4 from v0.1.3 used v0.1.3's buggy driver, so the fix
+couldn't be verified live in that direction. v0.1.5 exists so a
+host on v0.1.4 can update to it and observe a running VM survive
+end-to-end with v0.1.4 in the driver seat.
+
+## [v0.1.4] - 2026-04-29
+
+### Fixed
+
+- Daemon restarts no longer kill running VMs. Two changes together:
+  - The `bangerd-root.service` and `bangerd.service` unit templates
+    now set `KillMode=process`. The default (`control-group`) sent
+    SIGKILL to every process in the unit's cgroup on stop/restart,
+    including the jailer-spawned firecracker children — fork/exec
+    doesn't escape a systemd cgroup. With `KillMode=process` only
+    the unit's main PID is signalled; firecracker children survive.
+  - `fcproc.FindPID` now also looks up jailer'd firecracker
+    processes via the pidfile jailer writes at
+    `<chroot>/firecracker.pid` (sibling of the api-sock target).
+    Previously the only lookup path was `pgrep -n -f <api-sock>`,
+    which can't see jailer'd processes because their cmdline only
+    carries the chroot-relative `--api-sock /firecracker.socket`.
+    Reconcile after a daemon restart now correctly re-attaches to
+    surviving guests instead of mistaking them for stale and tearing
+    down their dm-snapshot.
+
+### Notes
+
+- v0.1.0's CHANGELOG line "daemon restarts do not interrupt running
+  guests" was wrong: it was true at the systemd cgroup layer in
+  theory but the default `KillMode` defeated it, and even with
+  `KillMode=process` the daemon's reconcile would mistake
+  surviving FCs for stale and tear them down. v0.1.4 is the version
+  where this actually works end-to-end.
+- Updating from v0.1.0–v0.1.3 to v0.1.4 still kills running VMs
+  because the *driver* of the update is the buggy older binary.
+  Updates from v0.1.4 onward preserve running VMs across the
+  helper+daemon restart that `banger update` performs.
+- Existing v0.1.0–v0.1.3 installs that update to v0.1.4 do NOT
+  automatically pick up the new unit files — `banger update` swaps
+  binaries, not systemd units. Run `sudo banger system install` once
+  on those hosts after updating to refresh the units. New v0.1.4+
+  installs get the correct units from the start.
+
+## [v0.1.3] - 2026-04-29
+
+No functional changes. Verification release: v0.1.2 fixed
+`banger update`'s install.toml handling, but the fix only takes
+effect when v0.1.2 (or later) is the driver of an update. v0.1.3
+exists so a host running v0.1.2 can update to it and confirm the
+fix works end-to-end with the new code in the driver seat.
+
+## [v0.1.2] - 2026-04-29
+
+### Fixed
+
+- `banger update` now writes the freshly-installed binary's commit
+  and built_at fields to `/etc/banger/install.toml`, not the running
+  CLI's. Previously install.toml's `version` was correct after an
+  update but `commit` + `built_at` still pointed at the pre-update
+  binary's identity, which made `banger doctor` raise a false-positive
+  "CLI/install drift" warning on every update. Caught by the v0.1.0
+  → v0.1.1 live update smoke-test.
+
+## [v0.1.1] - 2026-04-29
+
+### Added
+
+- `install.sh` — one-command installer published at
+  `https://releases.thaloco.com/banger/install.sh`. Runs as the
+  invoking user, downloads + verifies the latest signed release with
+  the embedded cosign public key, and re-execs `sudo` only for the
+  actual system-install step. Pre-sudo summary explains in plain
+  language why elevation is needed.
+- `BANGER_INSTALL_NONINTERACTIVE=1` env var on `install.sh` for
+  non-interactive use through `curl | bash` (CI, automated provisioning).
+
+## [v0.1.0] - 2026-04-29
+
+First public release. banger runs disposable development sandboxes as
+Firecracker microVMs: each sandbox boots in a few seconds, gets its own
+root filesystem and network, and exits on demand.
+
+### Added
+
+**Sandbox VMs**
+- `banger vm run` boots a microVM, drops you into ssh, and tears it down
+  on exit. Optional positional path ships a host repo into the guest;
+  `-- cmd args` runs a command non-interactively and exits with its
+  status.
+- Long-lived VMs via `vm create` / `vm start` / `vm stop` /
+  `vm restart` / `vm ssh` / `vm exec` / `vm logs` / `vm stats` /
+  `vm ports` / `vm kill`. `vm list` and `ps` enumerate state;
+  `vm prune` deletes every non-running VM.
+- `vm workspace` ships a host repo into a guest and pulls diffs back.
+- Per-VM cgroup-isolated firecracker process under jailer chroot;
+  daemon restarts do not interrupt running guests.
+
+**Images**
+- `banger image pull <name>` pulls a curated rootfs+kernel bundle from
+  the banger image catalog. `image pull <oci-ref>` pulls any OCI image.
+- `image list` / `image show` / `image delete` / `image promote` /
+  `image register` round out the lifecycle.
+- `image cache` manages the OCI layer-blob cache.
+- Concurrent pulls of the same image are coalesced; the first pull
+  wins, the rest wait.
+
+**Kernels**
+- `banger kernel pull <name>` pulls a Firecracker-compatible kernel
+  from the banger kernel catalog. `kernel list` / `kernel show` /
+  `kernel rm` manage the local store.
+
+**Host networking**
+- Per-host bridge with NAT; per-VM tap device; deterministic IPv4
+  assignment; iptables rules installed/removed with VM lifecycle.
+- DNS routing: local resolver on `127.0.0.1:42069` answers queries
+  for `<vm>.vm` so plain `ssh <vm>.vm` reaches the guest.
+- `banger ssh-config` writes a one-time `~/.ssh/config` include so
+  ssh, scp, and rsync resolve `<vm>.vm` from any terminal.
+
+**System install**
+- `sudo banger system install` installs an owner-mode daemon
+  (`bangerd.service`) and a root-helper (`bangerd-root.service`) as
+  systemd units. The owner daemon runs as the invoking user; only the
+  root helper holds privilege, and only for a vetted set of operations.
+- `system status` / `system restart` / `system uninstall` round out
+  the lifecycle. `daemon` is a thin alias.
+- `banger doctor` audits host readiness: architecture, CLI/install
+  version drift, state store, host runtime, vm lifecycle prerequisites,
+  vsock guest agent, vm defaults, ssh shortcut, /root work disk, DNS,
+  NAT, firecracker binary version, systemd units, socket permissions,
+  helper unit hardening directives.
+
+**Self-update**
+- `banger update` downloads, verifies, and installs newer releases
+  from the public manifest. Flow: fetch manifest, refuse if any VM
+  operation is in flight, download tarball + `SHA256SUMS` +
+  `SHA256SUMS.sig`, verify the cosign signature against the embedded
+  public key, verify the tarball hash, stage to a scratch dir, run
+  `bangerd --check-migrations` against the staged binary, atomically
+  swap the three banger binaries, restart the systemd units, run
+  `banger doctor`, finalise the install record.
+- Pre-restart abort and post-restart auto-rollback both restore the
+  previous install on failure.
+- `banger update --check` reports whether a newer release is
+  available without applying it; `--to vX.Y.Z` pins a specific
+  version; `--dry-run` prints the plan; `--force` skips the
+  in-flight-op refusal.
+
+**Trust model**
+- Every release is cosign-signed. The public key is embedded in the
+  banger binary at build time; the signed payload is `SHA256SUMS`,
+  which in turn covers the release tarball. Verification uses the
+  Go standard library (`crypto/ecdsa.VerifyASN1`); cosign is needed
+  only for *signing*, not for verification.
+- The release manifest URL is hardcoded into the binary so a
+  compromised daemon config cannot redirect the updater to a different
+  bucket.
+
+**CLI surface**
+- Top-level: `vm`, `ps`, `image`, `kernel`, `ssh-config`, `system`,
+  `daemon`, `doctor`, `update`, `version`, `completion`.
+- `banger version` reports the version, commit SHA, and build
+  timestamp baked in via ldflags at release-build time.
+
+### Compatibility
+
+- The host-side and guest-side vsock agent protocol is informally
+  stable across **patch** versions (v0.1.x). Minor-version bumps
+  (v0.2.x) may change it; existing VMs created against an older
+  minor will need to be re-pulled. `banger doctor` warns when a
+  running VM's agent is older than the daemon expects but does not
+  block lifecycle operations.
+- The on-disk store schema is forward-only. Downgrading the binary
+  against a database written by a newer binary is unsupported; the
+  updater detects this via `bangerd --check-migrations` and refuses
+  the swap rather than starting up against an incompatible store.
+- Linux only. amd64 only. KVM required.
+
+[Unreleased]: https://git.thaloco.com/thaloco/banger/compare/v0.1.10...HEAD
+[v0.1.10]: https://git.thaloco.com/thaloco/banger/releases/tag/v0.1.10
+[v0.1.9]: https://git.thaloco.com/thaloco/banger/releases/tag/v0.1.9
+[v0.1.8]: https://git.thaloco.com/thaloco/banger/releases/tag/v0.1.8
+[v0.1.7]: https://git.thaloco.com/thaloco/banger/releases/tag/v0.1.7
+[v0.1.6]: https://git.thaloco.com/thaloco/banger/releases/tag/v0.1.6
+[v0.1.5]: https://git.thaloco.com/thaloco/banger/releases/tag/v0.1.5
+[v0.1.4]: https://git.thaloco.com/thaloco/banger/releases/tag/v0.1.4
+[v0.1.3]: https://git.thaloco.com/thaloco/banger/releases/tag/v0.1.3
+[v0.1.2]: https://git.thaloco.com/thaloco/banger/releases/tag/v0.1.2
+[v0.1.1]: https://git.thaloco.com/thaloco/banger/releases/tag/v0.1.1
+[v0.1.0]: https://git.thaloco.com/thaloco/banger/releases/tag/v0.1.0
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -0,0 +1,62 @@
+# Contributing
+
+## Build from source
+
+```bash
+make build
+sudo ./build/bin/banger system install --owner "$USER"
+```
+
+`make build` produces three binaries under `./build/bin/`:
+
+- `banger` — the user-facing CLI
+- `bangerd` — the owner-user daemon (exposes `/run/banger/bangerd.sock`)
+- `banger-vsock-agent` — the in-guest companion
+
+`system install` copies them into `/usr/local`, writes install
+metadata under `/etc/banger`, lays down `bangerd.service` and
+`bangerd-root.service`, and starts both. After that, daily commands
+like `banger vm run` are unprivileged.
+
+To inspect or refresh the services:
+
+```bash
+banger system status
+sudo banger system restart
+```
+
+The two-service split (owner daemon + privileged root helper) is
+explained in [`docs/privileges.md`](docs/privileges.md), including
+the exact capability set the root helper holds.
+
+## Tests
+
+```bash
+make test            # go test ./...
+make coverage        # per-package + total statement coverage
+make lint            # gofmt + go vet + shellcheck
+```
+
+The smoke suite (`make smoke`) builds coverage-instrumented binaries,
+installs them as a temporary systemd service, and runs end-to-end
+scenarios against real Firecracker. Requires a KVM-capable host and
+`sudo`. The suite lives under `internal/smoketest/` (build-tagged
+`smoke`); `make smoke-list` prints scenario names; `make smoke-one
+SCENARIO=<name>` runs just one (comma-separated for several). See
+the smoke comments in the `Makefile` for details.
+
+## Pre-commit hook
+
+```bash
+make install-hooks
+```
+
+Points `core.hooksPath` at `.githooks/`, which runs lint + test +
+build on every commit. Bypass with `git commit --no-verify`; revert
+with `git config --unset core.hooksPath`.
+
+## Internals
+
+- [`docs/privileges.md`](docs/privileges.md) — daemon split, capability set, trust model.
+- [`docs/release-process.md`](docs/release-process.md) — cutting and signing a release.
+- [`AGENTS.md`](AGENTS.md) — repo-wide notes for code agents.
--- a/21
+++ b/21
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2026 Thales Maciel <thales@thalesmaciel.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/313
+++ b/313
@ -6,115 +6,266 @@ INSTALL ?= install
 PREFIX ?= $(HOME)/.local
 BINDIR ?= $(PREFIX)/bin
 LIBDIR ?= $(PREFIX)/lib
-RUNTIMEDIR ?= $(LIBDIR)/banger
 DESTDIR ?=
-RUNTIME_MANIFEST ?= runtime-bundle.toml
-RUNTIME_SOURCE_DIR ?= runtime
-RUNTIME_ARCHIVE ?= dist/banger-runtime.tar.gz
-BINARIES := banger bangerd
-RUNTIME_HELPERS := $(RUNTIME_SOURCE_DIR)/banger-vsock-agent
+BUILD_DIR ?= build
+BUILD_BIN_DIR ?= $(BUILD_DIR)/bin
+BUILD_MANUAL_DIR ?= $(BUILD_DIR)/manual
+BANGER_BIN ?= $(BUILD_BIN_DIR)/banger
+BANGERD_BIN ?= $(BUILD_BIN_DIR)/bangerd
+VSOCK_AGENT_BIN ?= $(BUILD_BIN_DIR)/banger-vsock-agent
+BINARIES := $(BANGER_BIN) $(BANGERD_BIN) $(VSOCK_AGENT_BIN)
 GO_SOURCES := $(shell find cmd internal -type f -name '*.go' | sort)
-RUNTIME_EXECUTABLES := firecracker customize.sh packages.sh namegen banger-vsock-agent
-RUNTIME_DATA_FILES := packages.apt id_ed25519 rootfs-docker.ext4
-RUNTIME_OPTIONAL_DATA_FILES := rootfs.ext4 rootfs-docker.work-seed.ext4 bundle.json
-RUNTIME_BOOT_FILES := wtf/root/boot/vmlinux-6.8.0-94-generic wtf/root/boot/initrd.img-6.8.0-94-generic
-RUNTIME_MODULES_DIR := wtf/root/lib/modules/6.8.0-94-generic
-VOID_IMAGE_NAME ?= void-exp
-VOID_VM_NAME ?= void-dev
+# BUILD_INPUTS is everything that can change a binary's bytes: Go sources
+# plus embedded assets (catalog.json, future static files). Listing
+# everything is cheaper than missing a rebuild — go's own cache absorbs
+# any redundant invocations.
+BUILD_INPUTS := $(shell find cmd internal -type f | sort)
+SHELL_SOURCES := $(shell find scripts -type f -name '*.sh' | sort)
+SMOKE_DIR := $(BUILD_DIR)/smoke
+SMOKE_BIN_DIR := $(SMOKE_DIR)/bin
+SMOKE_COVER_DIR := $(SMOKE_DIR)/covdata
+SMOKE_XDG_DIR := $(SMOKE_DIR)/xdg
+VERSION ?= $(shell git describe --tags --exact-match 2>/dev/null || echo dev)
+COMMIT ?= $(shell git rev-parse --verify HEAD 2>/dev/null || echo unknown)
+BUILT_AT ?= $(shell date -u +%Y-%m-%dT%H:%M:%SZ)
+GO_LDFLAGS := -X banger/internal/buildinfo.Version=$(VERSION) -X banger/internal/buildinfo.Commit=$(COMMIT) -X banger/internal/buildinfo.BuiltAt=$(BUILT_AT)

 .DEFAULT_GOAL := help

-.PHONY: help build banger bangerd test fmt tidy clean rootfs rootfs-void void-register void-vm verify-void install runtime-bundle runtime-package check-runtime bench-create
+# `make smoke-one` requires SCENARIO=. Validate before any prerequisite
+# (notably smoke-build) so a typo'd invocation doesn't pay for a Go
+# rebuild before learning it's wrong.
+ifneq (,$(filter smoke-one,$(MAKECMDGOALS)))
+ifndef SCENARIO
+$(error smoke-one needs SCENARIO=name (see `make smoke-list` for names))
+endif
+endif
+
+.PHONY: help build banger bangerd test fmt tidy clean install uninstall lint lint-go lint-shell coverage coverage-html coverage-total coverage-combined coverage-combined-html smoke smoke-build smoke-list smoke-one smoke-coverage-html smoke-clean smoke-fresh install-hooks

 help:
 	@printf '%s\n' \
 	  'Targets:' \
-	  '  make build   Build ./banger and ./bangerd' \
-	  '  make runtime-bundle Fetch and unpack ./runtime from the archive referenced by $(RUNTIME_MANIFEST)' \
-	  '  make runtime-package Package $(RUNTIME_SOURCE_DIR) into $(RUNTIME_ARCHIVE) and print its SHA256' \
-	  '  make bench-create Benchmark vm create and SSH readiness with scripts/bench-create.sh' \
-	  '  make install Build and install binaries plus the runtime bundle into $(DESTDIR)$(BINDIR) and $(DESTDIR)$(RUNTIMEDIR)' \
-	  '  make test    Run go test ./...' \
-	  '  make fmt     Format Go sources under cmd/ and internal/' \
-	  '  make tidy    Run go mod tidy' \
-	  '  make clean   Remove built Go binaries' \
-	  '  make rootfs  Rebuild the source-checkout default Debian rootfs image in ./runtime' \
-	  '  make rootfs-void  Build an experimental Void Linux rootfs and work-seed in ./runtime' \
-	  '  make void-register  Register or update the experimental Void image as $(VOID_IMAGE_NAME)' \
-	  '  make void-vm  Register the experimental Void image and create a VM named $(VOID_VM_NAME)' \
-	  '  make verify-void  Register the experimental Void image and run verify.sh against it'
+	  '  make build           Build ./build/bin/banger, ./build/bin/bangerd, and ./build/bin/banger-vsock-agent' \
+	  '  make install         Build and install banger, bangerd, and the companion vsock helper' \
+	  '  make uninstall       Stop the daemon and remove installed binaries (leaves user state by default)' \
+	  '  make test            Run go test ./...' \
+	  '  make coverage        Run tests with coverage; print per-package + total' \
+	  '  make coverage-html   Open a browsable per-line HTML report (writes coverage.html)' \
+	  '  make coverage-total  Print just the total statement coverage (for scripts/CI)' \
+	  '  make coverage-combined  Merge unit-test + smoke covdata; print per-package + total' \
+	  '  make coverage-combined-html  HTML report of the merged unit+smoke coverage' \
+	  '  make lint            Run gofmt + go vet + shellcheck (errors)' \
+	  '  make fmt             Format Go sources under cmd/ and internal/' \
+	  '  make tidy            Run go mod tidy' \
+	  '  make clean           Remove built Go binaries and coverage artefacts' \
+	  '  make smoke           Build instrumented binaries, run the supported systemd smoke suite, report coverage (needs KVM + sudo)' \
+	  '  make smoke JOBS=N    Override parallelism (default: nproc, capped at 8). JOBS=1 forces serial.' \
+	  '  make smoke-list      Print the list of smoke scenarios (no build, no install)' \
+	  '  make smoke-one SCENARIO=NAME  Run a single smoke scenario (still does the install preamble; comma-separated for several)' \
+	  '  make smoke-fresh     smoke-clean + smoke — purges stale smoke-owned installs before a clean supported-path run' \
+	  '  make smoke-coverage-html  HTML coverage report from the last smoke run' \
+	  '  make smoke-clean     Remove the smoke build tree and purge any stale smoke-owned system install' \
+	  '  make install-hooks   Point core.hooksPath at .githooks (lint + test + build run on every commit)'

-build: $(BINARIES) $(RUNTIME_HELPERS)
+build: $(BINARIES)

-banger: $(GO_SOURCES) go.mod go.sum
-	$(GO) build -o ./banger ./cmd/banger
+$(BANGER_BIN): $(BUILD_INPUTS) go.mod go.sum
+	mkdir -p "$(BUILD_BIN_DIR)"
+	$(GO) build -ldflags '$(GO_LDFLAGS)' -o "$(BANGER_BIN)" ./cmd/banger

-bangerd: $(GO_SOURCES) go.mod go.sum
-	$(GO) build -o ./bangerd ./cmd/bangerd
+$(BANGERD_BIN): $(BUILD_INPUTS) go.mod go.sum
+	mkdir -p "$(BUILD_BIN_DIR)"
+	$(GO) build -ldflags '$(GO_LDFLAGS)' -o "$(BANGERD_BIN)" ./cmd/bangerd

-$(RUNTIME_SOURCE_DIR)/banger-vsock-agent: $(GO_SOURCES) go.mod go.sum
-	mkdir -p "$(RUNTIME_SOURCE_DIR)"
-	CGO_ENABLED=0 GOOS=linux GOARCH=amd64 $(GO) build -o "$(RUNTIME_SOURCE_DIR)/banger-vsock-agent" ./cmd/banger-vsock-agent
+$(VSOCK_AGENT_BIN): $(BUILD_INPUTS) go.mod go.sum
+	mkdir -p "$(BUILD_BIN_DIR)"
+	CGO_ENABLED=0 GOOS=linux GOARCH=amd64 $(GO) build -ldflags '$(GO_LDFLAGS)' -o "$(VSOCK_AGENT_BIN)" ./cmd/banger-vsock-agent

 test:
 	$(GO) test ./...

+# Coverage targets use -coverpkg=./... so packages without their own
+# tests still get counted when another package exercises them (common
+# for daemon/* subpackages). coverage.out is gitignored.
+coverage:
+	$(GO) test -coverpkg=./... -coverprofile=coverage.out ./...
+	@echo ''
+	@echo 'Per-package:'
+	@$(GO) tool cover -func=coverage.out | awk -F'\t+' '/^total:/ {total=$$NF; next} {pkg=$$1; sub("banger/", "", pkg); sub("/[^/]+:[0-9]+:$$", "", pkg); pkgs[pkg]+=1; covered[pkg]+=$$NF+0} END {for (p in pkgs) printf "  %-40s %.1f%% (avg of %d funcs)\n", p, covered[p]/pkgs[p], pkgs[p] | "sort"; print ""; print "Total statement coverage:", total}'
+
+coverage-html: coverage
+	$(GO) tool cover -html=coverage.out -o coverage.html
+	@echo 'wrote coverage.html'
+
+coverage-total:
+	@$(GO) test -coverpkg=./... -coverprofile=coverage.out ./... >/dev/null 2>&1 && $(GO) tool cover -func=coverage.out | awk '/^total:/ {print $$NF}'
+
+# coverage-combined unions unit-test coverage and smoke coverage into
+# one report. Unit tests cover pure-Go logic (error branches, parsing,
+# handler wiring); smoke covers the real sudo / firecracker / dm-snap
+# paths that unit tests physically can't reach. Separately each tells
+# half the story. Merged, this is the single "what's not being
+# exercised at all" view.
+#
+# Requires an up-to-date smoke run (the target depends on smoke-build
+# to rebuild instrumented binaries; re-run `make smoke` yourself if
+# scenarios changed). Modes must match; smoke uses the default 'set',
+# so the unit run below drops the default 'atomic' for alignment.
+COMBINED_COVER_DIR := $(BUILD_DIR)/combined
+UNIT_COVER_DIR     := $(BUILD_DIR)/unit/covdata
+coverage-combined:
+	@test -d "$(SMOKE_COVER_DIR)" && test "$$(ls -A $(SMOKE_COVER_DIR) 2>/dev/null)" || { \
+	  echo 'no smoke covdata at $(SMOKE_COVER_DIR); run `make smoke` first' >&2; exit 1; \
+	}
+	rm -rf "$(UNIT_COVER_DIR)" "$(COMBINED_COVER_DIR)"
+	mkdir -p "$(UNIT_COVER_DIR)" "$(COMBINED_COVER_DIR)"
+	$(GO) test -cover -coverpkg=./... ./... -args -test.gocoverdir="$(abspath $(UNIT_COVER_DIR))" >/dev/null
+	$(GO) tool covdata merge -i="$(UNIT_COVER_DIR),$(SMOKE_COVER_DIR)" -o="$(COMBINED_COVER_DIR)"
+	$(GO) tool covdata textfmt -i="$(COMBINED_COVER_DIR)" -o="$(BUILD_DIR)/combined.cover.out"
+	@echo ''
+	@echo 'Per-package (merged unit + smoke):'
+	@$(GO) tool cover -func="$(BUILD_DIR)/combined.cover.out" | awk -F'\t+' '/^total:/ {total=$$NF; next} {pkg=$$1; sub("banger/", "", pkg); sub("/[^/]+:[0-9]+:$$", "", pkg); pkgs[pkg]+=1; covered[pkg]+=$$NF+0} END {for (p in pkgs) printf "  %-40s %.1f%% (avg of %d funcs)\n", p, covered[p]/pkgs[p], pkgs[p] | "sort"; print ""; print "Total statement coverage:", total}'
+
+coverage-combined-html: coverage-combined
+	$(GO) tool cover -html="$(BUILD_DIR)/combined.cover.out" -o "$(BUILD_DIR)/combined.cover.html"
+	@echo 'wrote $(BUILD_DIR)/combined.cover.html'
+
+lint: lint-go lint-shell
+
+lint-go:
+	@unformatted="$$($(GOFMT) -l $(GO_SOURCES))"; \
+	if [ -n "$$unformatted" ]; then \
+	  printf 'gofmt: the following files are not formatted:\n%s\n' "$$unformatted" >&2; \
+	  exit 1; \
+	fi
+	$(GO) vet ./...
+
+lint-shell:
+	@command -v shellcheck >/dev/null 2>&1 || { echo 'shellcheck is required for make lint-shell' >&2; exit 1; }
+	shellcheck --severity=error $(SHELL_SOURCES)
+
 fmt:
 	$(GOFMT) -w $(GO_SOURCES)

 tidy:
 	$(GO) mod tidy

+# Local-only: redirect git's hook lookup at .githooks/ so .githooks/pre-commit
+# fires on every `git commit`. Idempotent. Bypass an individual commit with
+# `git commit --no-verify`.
+install-hooks:
+	git config core.hooksPath .githooks
+	@echo 'core.hooksPath -> .githooks (run `git config --unset core.hooksPath` to revert)'
+
 clean:
-	rm -f ./banger ./bangerd
+	rm -rf "$(BUILD_BIN_DIR)" coverage.out coverage.html

-runtime-bundle:
-	$(GO) run ./cmd/runtimebundle fetch --manifest "$(RUNTIME_MANIFEST)" --out "$(RUNTIME_SOURCE_DIR)"
+# Smoke test suite. Builds the three banger binaries with -cover
+# instrumentation under $(SMOKE_BIN_DIR), installs them as temporary
+# bangerd.service + bangerd-root.service, runs the Go scenarios under
+# internal/smoketest (built with -tags=smoke), copies service covdata
+# out of /var/lib/banger, then purges the smoke-owned install on exit.
+#
+# This touches global systemd state. The harness refuses to overwrite a
+# pre-existing non-smoke install and drops a marker file under
+# /etc/banger so `make smoke-clean` can recover a stale smoke-owned
+# install after an interrupted run.
+#
+# Requires a KVM-capable Linux host with sudo. This is a pre-release
+# gate, not CI — the Go unit suite (`make test`) is what runs everywhere.
+smoke-build: $(SMOKE_BIN_DIR)/.built

-runtime-package:
-	$(GO) run ./cmd/runtimebundle package --manifest "$(RUNTIME_MANIFEST)" --runtime-dir "$(RUNTIME_SOURCE_DIR)" --out "$(RUNTIME_ARCHIVE)"
+$(SMOKE_BIN_DIR)/.built: $(BUILD_INPUTS) go.mod go.sum
+	mkdir -p "$(SMOKE_BIN_DIR)"
+	$(GO) build -cover -ldflags '$(GO_LDFLAGS)' -o "$(SMOKE_BIN_DIR)/banger" ./cmd/banger
+	$(GO) build -cover -ldflags '$(GO_LDFLAGS)' -o "$(SMOKE_BIN_DIR)/bangerd" ./cmd/bangerd
+	CGO_ENABLED=0 GOOS=linux GOARCH=amd64 $(GO) build -ldflags '$(GO_LDFLAGS)' -o "$(SMOKE_BIN_DIR)/banger-vsock-agent" ./cmd/banger-vsock-agent
+	touch "$@"

-bench-create: build
-	bash ./scripts/bench-create.sh $(ARGS)
+# JOBS defaults to nproc; SMOKE_JOBS clamps it at 8. Each parallel slot
+# runs a smoke-tuned VM, and over-subscribing the host pushes
+# waitForSSH past its 60s deadline. Floored at 1 so JOBS=1 still works.
+JOBS ?= $(shell nproc 2>/dev/null || echo 1)
+SMOKE_JOBS := $(shell n=$(JOBS); [ $$n -lt 1 ] && n=1; [ $$n -gt 8 ] && n=8; echo $$n)

-check-runtime:
-	@test -d "$(RUNTIME_SOURCE_DIR)" || { echo "missing runtime bundle directory: $(RUNTIME_SOURCE_DIR); run 'make runtime-bundle'" >&2; exit 1; }
-	@for path in $(RUNTIME_EXECUTABLES) $(RUNTIME_DATA_FILES) $(RUNTIME_BOOT_FILES) $(RUNTIME_MODULES_DIR); do \
-		test -e "$(RUNTIME_SOURCE_DIR)/$$path" || { echo "missing runtime artifact: $(RUNTIME_SOURCE_DIR)/$$path; run 'make runtime-bundle'" >&2; exit 1; }; \
-	done
+smoke: smoke-build
+	rm -rf "$(SMOKE_COVER_DIR)"
+	mkdir -p "$(SMOKE_COVER_DIR)" "$(SMOKE_XDG_DIR)"
+	BANGER_SMOKE_BIN_DIR="$(abspath $(SMOKE_BIN_DIR))" \
+	BANGER_SMOKE_COVER_DIR="$(abspath $(SMOKE_COVER_DIR))" \
+	BANGER_SMOKE_XDG_DIR="$(abspath $(SMOKE_XDG_DIR))" \
+	  $(GO) test -tags=smoke -count=1 -v -parallel $(SMOKE_JOBS) -timeout 30m ./internal/smoketest
+	@echo ''
+	@echo 'Smoke coverage:'
+	@$(GO) tool covdata percent -i="$(SMOKE_COVER_DIR)"

-install: build check-runtime
+# smoke-list parses the test scaffold for scenario names. Cheap: no
+# smoke-build dep, no env vars, no test binary spawned.
+smoke-list:
+	@grep -oE 't\.Run\("[a-z_]+", *test[A-Za-z]+\)' internal/smoketest/smoke_test.go \
+	  | sed -E 's/t\.Run\("([a-z_]+)".*/  \1/'
+
+# smoke-one runs one scenario (or a comma-separated list) with the
+# install preamble. Comma list becomes a regex alternation so multiple
+# scenarios can be selected without invoking go test by hand.
+SCENARIO_PATTERN := $(shell echo '$(SCENARIO)' | tr ',' '|')
+
+smoke-one: smoke-build
+	rm -rf "$(SMOKE_COVER_DIR)"
+	mkdir -p "$(SMOKE_COVER_DIR)" "$(SMOKE_XDG_DIR)"
+	BANGER_SMOKE_BIN_DIR="$(abspath $(SMOKE_BIN_DIR))" \
+	BANGER_SMOKE_COVER_DIR="$(abspath $(SMOKE_COVER_DIR))" \
+	BANGER_SMOKE_XDG_DIR="$(abspath $(SMOKE_XDG_DIR))" \
+	  $(GO) test -tags=smoke -count=1 -v -timeout 30m \
+	    -run "TestSmoke/.*/($(SCENARIO_PATTERN))$$" \
+	    ./internal/smoketest
+
+smoke-coverage-html: smoke
+	$(GO) tool covdata textfmt -i="$(SMOKE_COVER_DIR)" -o="$(SMOKE_DIR)/cover.out"
+	$(GO) tool cover -html="$(SMOKE_DIR)/cover.out" -o "$(SMOKE_DIR)/cover.html"
+	@echo 'wrote $(SMOKE_DIR)/cover.html'
+
+smoke-clean:
+	@if sudo test -f /etc/banger/.smoke-owned; then \
+	  bin=''; \
+	  if [ -x "$(SMOKE_BIN_DIR)/banger" ]; then \
+	    bin="$(abspath $(SMOKE_BIN_DIR))/banger"; \
+	  elif [ -x "$(BANGER_BIN)" ]; then \
+	    bin="$(abspath $(BANGER_BIN))"; \
+	  elif [ -x /usr/local/bin/banger ]; then \
+	    bin=/usr/local/bin/banger; \
+	  fi; \
+	  if [ -n "$$bin" ]; then \
+	    sudo "$$bin" system uninstall --purge >/dev/null 2>&1 || true; \
+	  fi; \
+	fi
+	rm -rf "$(SMOKE_DIR)"
+
+# smoke-fresh wipes the instrumented build tree, purges any stale
+# smoke-owned install, and then runs the supported-path smoke suite
+# from scratch.
+smoke-fresh: smoke-clean smoke
+
+install: build
 	mkdir -p "$(DESTDIR)$(BINDIR)"
-	mkdir -p "$(DESTDIR)$(RUNTIMEDIR)"
-	mkdir -p "$(DESTDIR)$(RUNTIMEDIR)/wtf/root/boot"
-	mkdir -p "$(DESTDIR)$(RUNTIMEDIR)/wtf/root/lib/modules"
-	$(INSTALL) -m 0755 ./banger "$(DESTDIR)$(BINDIR)/banger"
-	$(INSTALL) -m 0755 ./bangerd "$(DESTDIR)$(BINDIR)/bangerd"
-	@for path in $(RUNTIME_EXECUTABLES); do \
-		$(INSTALL) -m 0755 "$(RUNTIME_SOURCE_DIR)/$$path" "$(DESTDIR)$(RUNTIMEDIR)/$$path"; \
-	done
-	@for path in $(RUNTIME_DATA_FILES) $(RUNTIME_BOOT_FILES); do \
-		$(INSTALL) -m 0644 "$(RUNTIME_SOURCE_DIR)/$$path" "$(DESTDIR)$(RUNTIMEDIR)/$$path"; \
-	done
-	@for path in $(RUNTIME_OPTIONAL_DATA_FILES); do \
-		if test -e "$(RUNTIME_SOURCE_DIR)/$$path"; then \
-			$(INSTALL) -m 0644 "$(RUNTIME_SOURCE_DIR)/$$path" "$(DESTDIR)$(RUNTIMEDIR)/$$path"; \
-		fi; \
-	done
-	chmod 0600 "$(DESTDIR)$(RUNTIMEDIR)/id_ed25519"
-	cp -a "$(RUNTIME_SOURCE_DIR)/$(RUNTIME_MODULES_DIR)" "$(DESTDIR)$(RUNTIMEDIR)/wtf/root/lib/modules/"
+	mkdir -p "$(DESTDIR)$(LIBDIR)/banger"
+	$(INSTALL) -m 0755 "$(BANGER_BIN)" "$(DESTDIR)$(BINDIR)/banger"
+	$(INSTALL) -m 0755 "$(BANGERD_BIN)" "$(DESTDIR)$(BINDIR)/bangerd"
+	$(INSTALL) -m 0755 "$(VSOCK_AGENT_BIN)" "$(DESTDIR)$(LIBDIR)/banger/banger-vsock-agent"

-rootfs:
-	BANGER_RUNTIME_DIR="$(abspath $(RUNTIME_SOURCE_DIR))" ./make-rootfs.sh
-
-rootfs-void:
-	BANGER_RUNTIME_DIR="$(abspath $(RUNTIME_SOURCE_DIR))" ./make-rootfs-void.sh
-
-void-register: build
-	./banger image register --name "$(VOID_IMAGE_NAME)" --rootfs "$(abspath $(RUNTIME_SOURCE_DIR))/rootfs-void.ext4" --work-seed "$(abspath $(RUNTIME_SOURCE_DIR))/rootfs-void.work-seed.ext4" --packages "$(abspath packages.void)"
-
-void-vm: void-register
-	./banger vm create --image "$(VOID_IMAGE_NAME)" --name "$(VOID_VM_NAME)"
-
-verify-void: void-register
-	./verify.sh --image "$(VOID_IMAGE_NAME)"
+# uninstall stops a running daemon (if any) and removes the installed
+# binaries. It does NOT touch user data (config, SSH keys, VM state,
+# image/kernel caches) — rm -rf those paths manually if wanted; they
+# are printed for convenience.
+uninstall:
+	@if [ -x "$(DESTDIR)$(BINDIR)/banger" ]; then \
+	  "$(DESTDIR)$(BINDIR)/banger" daemon stop >/dev/null 2>&1 || true; \
+	fi
+	rm -f "$(DESTDIR)$(BINDIR)/banger" "$(DESTDIR)$(BINDIR)/bangerd"
+	rm -rf "$(DESTDIR)$(LIBDIR)/banger"
+	@printf '\nRemoved binaries. User data is preserved at:\n'
+	@printf '  ~/.config/banger/       (config, ssh keys)\n'
+	@printf '  ~/.local/state/banger/  (VMs, images, kernels, db, logs)\n'
+	@printf '  ~/.cache/banger/        (OCI layer cache)\n'
+	@printf '\nDelete those paths manually if you want a full purge.\n'
--- a/README.md
+++ b/README.md
@ -1,444 +1,172 @@
 # banger

-Persistent Firecracker development VMs managed through a Go daemon, CLI, and TUI.
+One-command development sandboxes on Firecracker microVMs.

-## Requirements
- Linux host with KVM (`/dev/kvm` access)
- Vsock support for post-SSH liveness reminders (`/dev/vhost-vsock`)
- Core VM lifecycle: `sudo`, `ip`, `dmsetup`, `losetup`, `blockdev`, `truncate`, `pgrep`, `chown`, `chmod`, `kill`
- Guest rootfs patching: `e2cp`, `e2rm`, `debugfs`
- Guest work disk creation/resizing: `mkfs.ext4`, `e2fsck`, `resize2fs`, `mount`, `umount`, `cp`
- SSH and logs: `ssh`
- Optional NAT: `iptables`, `sysctl`
- Image build: the bundled SSH key plus the tools above; `banger image build` no longer shells out through `customize.sh`
+![banger demo](assets/banger.gif)

-`banger` validates these per command and returns actionable errors instead of
-assuming one workstation layout.
+Spin up a clean Linux VM with your repo and tooling preloaded, drop
+into ssh, and tear it down — all from one command. banger is built
+for the dev loop, not the server use case: guests are short-lived,
+single-user, reachable at `<name>.vm` from your host, and disposable.

-## Runtime Bundle
-Runtime artifacts are no longer tracked directly in Git. Source checkouts use a
-generated `./runtime/` bundle, while installed binaries use
-`$(prefix)/lib/banger`.
+## Quick start

-The bundle contains:
- `firecracker`
- `banger-vsock-agent` for the guest-side vsock HTTP health agent and SSH reminder checks
- `bundle.json` with the bundle's default kernel/initrd/modules/rootfs paths
- a kernel, initrd, and modules tree referenced by `bundle.json`
- `rootfs-docker.ext4`
- `rootfs-docker.work-seed.ext4` when present, used to seed `/root` quickly on
-  new VM creates
- `rootfs.ext4` when present
- `packages.apt`
- `id_ed25519`
- the helper scripts used by manual customization and installs
+**Requirements**:
+- Linux x86_64 with KVM
+- Systemd
+- [Firecracker >= v1.5](https://github.com/firecracker-microvm/firecracker)

-Bootstrap a source checkout from a local or published runtime archive. The
-checked-in [`runtime-bundle.toml`](/home/thales/projects/personal/banger/runtime-bundle.toml)
-is a template and intentionally ships with empty `url` and `sha256`.
+Install:

-If you need to create a local archive first, do that from a checkout or machine
-that already has a populated `./runtime/` tree:
 ```bash
-make runtime-package
-cp dist/banger-runtime.tar.gz /path/to/fresh-checkout/dist/
+curl -fsSL https://releases.thaloco.com/banger/install.sh | bash
 ```

-In the fresh checkout:
-```bash
-cp runtime-bundle.toml runtime-bundle.local.toml
-```
+The installer downloads the signed release, then prompts for sudo for install.
+[Read more about how banger uses sudo](#Security)

-Edit `runtime-bundle.local.toml` to point at the staged archive and checksum:
-```toml
-url = "./dist/banger-runtime.tar.gz"
-sha256 = "<sha256 printed by make runtime-package>"
-```
-
-Then bootstrap `./runtime/` with the local manifest copy:
-```bash
-make runtime-bundle RUNTIME_MANIFEST=runtime-bundle.local.toml
-```
-
-`url` may be a relative path, absolute path, `file:///...` URL, or HTTP(S)
-URL. `make install` will not fetch artifacts for you.
-
-## Build
-```bash
-make build
-```
-
-Run `make build` after `./runtime/` has been bootstrapped. It also rebuilds the
-bundled `banger-vsock-agent` guest helper in `./runtime/`.
-
-Install into `~/.local/bin` by default, with the runtime bundle under
-`~/.local/lib/banger`:
-```bash
-make install
-```
-
-After `make install`, the installed `banger` and `bangerd` do not need the repo
-checkout to keep working.
-
-## Basic VM Workflow
-Create and boot a VM:
-```bash
-banger vm create --name calm-otter --disk-size 16G
-```
-
-Check host/runtime readiness before creating VMs:
+Verify host configuration:
 ```bash
 banger doctor
 ```

-List VMs:
+First VM:
+>The first run may take a couple minutes for the bundle download.
+>Subsequent `vm run`s are expected to take from 1 to 3 seconds.
+
 ```bash
-banger vm list
+banger vm run --name my-vm
 ```

-Inspect a VM:
+This auto-pulls the default image and drops you into an interactive ssh session.
+Disconnecting an interactive session leaves the VM running,
+`--rm` auto-deletes the VM when the session or command exits.
+
+## `vm run`
+
 ```bash
-banger vm show calm-otter
-banger vm stats calm-otter
+banger vm run ./my-repo                # copy /my-repo into /root/repo — drops into ssh
+banger vm run ./repo -- make test      # workspace + run command, exits with its status
+banger vm run --rm -- script.sh        # ephemeral: VM is deleted on exit
+banger vm run -d ./repo --nat          # detached: prep + bootstrap, exit (no ssh attach)
 ```

-SSH into a running VM:
+If a repository is passed, banger copies your repo's git-tracked files
+into `/root/repo` and runs a `mise` bootstrap from `.mise.toml` /
+`.tool-versions` if either is present. The bootstrap reaches the
+public internet, so workspaces with mise manifests require `--nat`;
+pass `--no-bootstrap` to skip the install entirely. Untracked files
+are skipped by default — pass `--include-untracked` to ship them
+too, or `--dry-run` to preview the file list.
+
+In **command mode** (`-- <cmd>`), the exit code propagates through
+`banger`. In **detached mode** (`-d`), banger creates the VM, runs
+workspace prep + bootstrap synchronously, then exits — no ssh
+attach. Reconnect later with `banger vm ssh <name>`.
+
+### Other VM verbs
+
+The CLI tries to feel familiar — every command and subcommand has
+`--help`. Beyond `vm run`: `vm list` shows running VMs (`--all` for
+every state), `vm ssh <name>` reconnects to one, `vm exec <name> --
+<cmd>` runs a command without a shell, `vm stop` / `vm kill` shut a
+VM down (graceful / hard), `vm delete` removes a stopped one, and
+`vm prune` sweeps every non-running VM.
+
+### `--nat`: outbound internet
+
+By default, a guest can't reach the internet.
+Pass `--nat` to enable it (host-side MASQUERADE):
+
 ```bash
-banger vm ssh calm-otter
+banger vm run --nat ./repo -- npm install
 ```

-When the SSH session exits normally, `banger` checks the guest over vsock and
-reminds you if the VM is still running.
+`--nat` works on `vm run` and `vm create`. To toggle on an existing
+VM: `banger vm set --nat <name>` (or `--no-nat` to remove it).
+
+## Hostnames: `<vm>.vm`
+
+banger's daemon runs a DNS server for the `.vm` zone. With host-side
+DNS routing, `curl http://sandbox.vm:3000` works from anywhere on
+the host — no IP juggling. On systemd-resolved hosts, banger wires
+this up automatically; everywhere else there's a manual recipe in
+[`docs/dns-routing.md`](docs/dns-routing.md).
+
+For `ssh sandbox.vm` (instead of `banger vm ssh sandbox`):

-Inspect host-reachable listening ports for a running VM:
 ```bash
-banger vm ports calm-otter
+banger ssh-config --install
 ```

-Stop, restart, kill, or delete it:
-```bash
-banger vm stop calm-otter
-banger vm start calm-otter
-banger vm restart calm-otter
-banger vm kill --signal TERM calm-otter
-banger vm delete calm-otter
+That adds a marker-fenced `Include` line to `~/.ssh/config`.
+`banger ssh-config --uninstall` reverses it.
+
+## Config
+
+`~/.config/banger/config.toml`. All keys are optional:
+
+```toml
+[vm_defaults]
+vcpu = 4
+memory_mib = 4096
+disk_size = "16G"
+
+[[file_sync]]
+host = "~/.config/git/config"
+guest = "~/.config/git/config"
+
+[[file_sync]]
+host = "~/.aws"
+guest = "~/.aws"
 ```

-Update stopped VM settings:
+`vm_defaults` overrides banger's host-derived sizing. `file_sync`
+copies host files into the VM's work disk at create time — handy
+for credentials and dotfiles you want in every sandbox. Full
+reference: [`docs/config.md`](docs/config.md).
+
+## Updating
+
 ```bash
-banger vm set calm-otter --memory 2048 --vcpu 4 --disk-size 32G
+banger update --check        # is a newer release available?
+sudo banger update           # download, verify, swap, restart, run doctor
 ```

-Lifecycle and `set` actions also accept multiple VM refs and run them
-concurrently:
+The release tarball is cosign-verified against a public key embedded
+in the running binary. On any post-swap failure, banger auto-restores
+the previous install. See [`docs/privileges.md`](docs/privileges.md)
+for the trust model.
+
+## Uninstalling
+
 ```bash
-banger vm stop calm-otter buildbox api-1
-banger vm kill --signal KILL aa12bb34 cc56dd78
-banger vm set --nat web-1 web-2 web-3
+sudo banger system uninstall          # remove services + binaries; keep state
+sudo banger system uninstall --purge  # also wipe VMs, images, caches under /var/lib/banger
 ```

-Launch the TUI:
-```bash
-banger tui
-```
+User config (`~/.config/banger/`) and SSH key
+(`~/.local/state/banger/ssh/`) stay put either way — delete them by
+hand if you want a full clean slate.

-## Daemon
-The CLI auto-starts `bangerd` when needed.
+## Security

-Useful daemon commands:
-```bash
-banger daemon status
-banger daemon socket
-banger daemon stop
-```
+Guest VMs are single-user dev sandboxes, not multi-tenant servers.
+sshd accepts only the host SSH key (no passwords, no
+kbd-interactive), and guests are reachable only through the host
+bridge (`172.16.0.0/24`). Don't expose the bridge or guest IPs to
+an untrusted network.

-`banger daemon status` prints the daemon PID, socket path, daemon log path, and
-the built-in DNS listener address.
+The privileged surface lives entirely in `bangerd-root.service` and
+is documented in [`docs/privileges.md`](docs/privileges.md).

-State lives under XDG directories:
- config: `~/.config/banger`
- state: `~/.local/state/banger`
- cache: `~/.cache/banger`
- runtime socket: `$XDG_RUNTIME_DIR/banger/bangerd.sock`
+## Further reading

-Installed binaries resolve their runtime bundle from `../lib/banger` relative to
-the executable. Source-checkout binaries resolve it from `./runtime` next to the
-repo-built `./banger`. You can override either with `runtime_dir` in
-`~/.config/banger/config.toml` or `BANGER_RUNTIME_DIR`.
-
-Useful config keys:
- `log_level`
- `runtime_dir`
- `tap_pool_size`
- `firecracker_bin`
- `namegen_path`
- `customize_script` (manual helper compatibility; `banger image build` is Go-native)
- `vsock_agent_path`
- `default_rootfs`
- `default_work_seed`
- `default_base_rootfs`
- `default_kernel`
- `default_initrd`
- `default_modules_dir`
- `default_packages_file`
-
-Guest SSH access always uses the private key shipped in the resolved runtime
-bundle. `ssh_key_path` is no longer a supported override for `banger vm ssh`,
-VM start key injection, or daemon guest provisioning.
-
-## Doctor
-`banger doctor` runs the same readiness checks the Go control plane uses for VM
-start, host-integrated features, and image builds. It reports runtime bundle
-state, core VM host tools, current feature readiness, and image-build
-prerequisites in a concise pass/warn/fail list.
-
-Use it when bringing up a new machine, after changing the runtime bundle, or
-before adding new host-integrated VM features.
-
-## Logs
- daemon lifecycle logs: `~/.local/state/banger/bangerd.log`
- raw Firecracker output per VM: `~/.local/state/banger/vms/<vm-id>/firecracker.log`
- raw image-build helper output: `~/.local/state/banger/image-build/*.log`
-
-`bangerd.log` is structured JSON. Set `log_level` in
-`~/.config/banger/config.toml` or `BANGER_LOG_LEVEL` to one of `debug`,
-`info`, `warn`, or `error`.
-
-## Images
-List images:
-```bash
-banger image list
-```
-
-Build a managed image:
-```bash
-banger image build --name docker-dev --docker
-```
-
-Rebuilt images install a pinned `mise` at `/usr/local/bin/mise`, activate it
-for bash login and interactive shells, install `opencode` through `mise`,
-configure `tmux-resurrect` plus `tmux-continuum` for `root` with periodic
-autosaves and manual-only restore by default, and bake in the
-`banger-vsock-agent` systemd service used by the post-SSH reminder path and
-guest health checks. They
-also emit a `work-seed.ext4` sidecar that lets new VMs clone a prepared `/root`
-work disk instead of rebuilding it from scratch on every create.
-
-Show or delete images:
-```bash
-banger image show docker-dev
-banger image delete docker-dev
-```
-
-`banger` auto-registers the bundled `default_rootfs` image when it exists. If
-the bundle does not include a separate base `rootfs.ext4`, `image build` falls
-back to using `rootfs-docker.ext4` as its default base image.
-
-## Networking And DNS
-Enable NAT when creating or updating a VM:
-```bash
-banger vm create --name web --nat
-banger vm set web --nat
-banger vm set web --no-nat
-```
-
-NAT is applied by the Go control plane using host `iptables` rules derived from
-the VM's current guest IP and TAP device. The remaining shell helpers also
-route NAT changes through `banger` instead of a standalone shell NAT script.
-
-`bangerd` also serves a tiny authoritative DNS service on `127.0.0.1:42069`
-for daemon-managed VMs. Known `A` records resolve `<vm-name>.vm` to the VM's
-guest IPv4 address. Integrate your local resolver separately if you want
-transparent `.vm` lookups on the host.
-
-`banger vm ports` asks the guest-side `banger-vsock-agent` to run `ss`, then
-prints host-usable endpoints plus the owning process/command. TCP listeners get
-short best-effort HTTP and HTTPS probes; detected web listeners are shown as
-`http` or `https`, and the endpoint column becomes a clickable URL such as
-`https://<hostname>.vm:port/`. Older images without `ss` may need rebuilding
-before `vm ports` works.
-
-## Storage Model
- VMs share a read-only base rootfs image.
- Each VM gets its own sparse writable system overlay for `/`.
- Each VM gets its own persistent ext4 work disk mounted at `/root`.
- When an image has a `work-seed.ext4` sidecar, new VM creates clone that seed
-  and only resize it when needed. Older images still work, but create more
-  slowly because `/root` must be built from scratch.
- The daemon can keep a small idle TAP pool warm in the background so VM create
-  does not need to synchronously create a fresh TAP every time. `tap_pool_size`
-  controls the pool depth.
-
-## Architecture Notes
-The Go daemon is the primary control plane. VM host integrations such as the
-built-in `.vm` DNS service, NAT, and `/root` work-disk wiring now sit behind a
-capability pipeline in the daemon instead of being open-coded through the VM
-lifecycle. Guest boot-time files and mounts are rendered through a structured
-guest-config builder rather than ad hoc `fstab` string mutation.
-
-That split is intentional: future host-integrated features should plug into the
-daemon capability path and `banger doctor` checks first, with the remaining
-shell helpers treated as manual workflows rather than architecture drivers.
- Stopping a VM preserves its overlay and work disk.
-
-## Rebuilding The Repo Default Rootfs
-`packages.apt` controls the base apt packages baked into rebuilt images,
-including guest tools such as `ss` used by `banger vm ports`.
-
-To rebuild the source-checkout default image in `./runtime/rootfs-docker.ext4`:
-```bash
-make rootfs
-```
-
-That rebuild also regenerates `./runtime/rootfs-docker.work-seed.ext4`, which
-the daemon uses to speed up future `vm create` calls.
-
-If your runtime bundle does not include `./runtime/rootfs.ext4`, pass an
-explicit base image instead:
-```bash
-./make-rootfs.sh --base-rootfs /path/to/base-rootfs.ext4
-```
-
-If the package manifest changed and you want a fresh source-checkout image:
-```bash
-rm -f ./runtime/rootfs-docker.ext4 ./runtime/rootfs-docker.ext4.packages.sha256
-make rootfs
-```
-
-`make rootfs` expects a bootstrapped runtime bundle. If `./runtime/rootfs.ext4`
-is not available, pass an explicit `--base-rootfs` to `./make-rootfs.sh`.
-Existing VMs keep using their current image and disks; rebuilds only affect VMs
-created from the rebuilt image afterward.
-
-## Experimental Void Rootfs
-There is also a separate, opt-in builder for an experimental Void Linux guest
-path:
-```bash
-make rootfs-void
-```
-
-That writes:
- `./runtime/rootfs-void.ext4`
- `./runtime/rootfs-void.work-seed.ext4`
-
-This path is intentionally local-only and does not change the default Debian
-image flow. It reuses the current runtime bundle kernel, initrd, and modules,
-but builds a lean `x86_64-glibc` Void userspace with:
- `bash` installed for interactive/admin use
- pinned `mise` installed at `/usr/local/bin/mise`, activated for `root` bash shells
- `opencode` installed through `mise`, with `/usr/local/bin/opencode` available by default
- `docker` plus `docker-compose` installed from Void packages
- the `docker` runit service enabled, with Docker netfilter/forwarding kernel prep
- `openssh` enabled under runit
- the bundled `banger-vsock-agent` health agent enabled under runit
- `root` normalized to `/bin/bash` while keeping `/bin/sh` as the distro's system shell
- a generated `/root` work-seed for fast creates
-
-It still keeps some Debian-oriented extras out for now:
- no tmux plugin defaults
-
-The builder fetches official static XBPS tools and packages from the Void
-mirror during the build. It currently supports only `x86_64-glibc`.
-
-The package set comes from [`packages.void`](/home/thales/projects/personal/banger/packages.void).
-You can override the mirror, size, or output path directly:
-```bash
-./make-rootfs-void.sh --mirror https://repo-default.voidlinux.org --size 2G
-```
-
-The fastest local iteration loop does not require changing your default image
-config at all:
-```bash
-make rootfs-void
-make void-register
-./banger vm create --image void-exp --name void-dev
-./banger vm ssh void-dev
-```
-
-Rebuild the Void rootfs and recreate existing `void-exp` VMs after changing the
-package set or guest provisioning; restart alone will not update the image
-contents or `/root` work-seed.
-
-There is also a smoke path for the experimental image:
-```bash
-make verify-void
-```
-
-`make void-register` uses the unmanaged image registration path to create or
-update a `void-exp` image record in place, so repeated rebuilds do not require
-editing `~/.config/banger/config.toml`.
-
-There is also a one-step helper target:
-```bash
-make void-vm VOID_VM_NAME=void-a
-```
-
-If you really want the Void image to become your default for `vm create`
-without `--image`, use the checked-in override template at
-[`examples/void-exp.config.toml`](/home/thales/projects/personal/banger/examples/void-exp.config.toml)
-and merge its four settings into `~/.config/banger/config.toml`.
-
-`banger image build` remains Debian-only in this pass. Do not point
-`default_base_rootfs` at the Void artifact yet.
-
-## Registering Unmanaged Images
-You can also register any local rootfs as an unmanaged image record without
-changing global defaults:
-```bash
-banger image register --name local-test --rootfs /abs/path/rootfs.ext4
-```
-
-Optional paths let you point at an existing work seed, kernel, initrd, modules,
-and package manifest:
-```bash
-banger image register \
-  --name void-exp \
-  --rootfs ./runtime/rootfs-void.ext4 \
-  --work-seed ./runtime/rootfs-void.work-seed.ext4 \
-  --packages ./packages.void
-```
-
-If an unmanaged image with the same name already exists, `image register`
-updates it in place so future `vm create --image <name>` calls pick up the new
-artifacts immediately.
-
-## Maintaining The Runtime Bundle
-The checked-in [`runtime-bundle.toml`](/home/thales/projects/personal/banger/runtime-bundle.toml)
-is a template. Keep `bundle_metadata` accurate there, but use a separate local
-manifest copy when you need concrete `url` and `sha256` values for bootstrap
-testing or publication.
-
-Package a local `./runtime/` tree into an archive:
-```bash
-make runtime-package
-```
-
-That writes `dist/banger-runtime.tar.gz` and prints its SHA256 so you can update
-a local manifest copy before testing bootstrap changes or publishing the
-archive elsewhere.
-
-## Benchmarking Create Time
-Benchmark the current host's `vm create` wall time plus first-SSH readiness:
-```bash
-make bench-create
-```
-
-Pass options through `ARGS`, for example:
-```bash
-make bench-create ARGS="--runs 3 --image docker-dev"
-```
-
-The benchmark prints JSON with:
- `create_ms`: wall time for `banger vm create`
- `ssh_ready_ms`: wall time from create start until `banger vm ssh <vm> -- true`
-  succeeds
-
-## Remaining Shell Helpers
-The runtime VM lifecycle is managed through `banger`. The remaining shell scripts are not the primary user interface:
- `customize.sh`: manual reference flow for rootfs customization; `banger image build` is now Go-native, but the script still reads
-  assets from `BANGER_RUNTIME_DIR` and stores transient state under
-  `BANGER_STATE_DIR`/XDG state
- `make-rootfs.sh`: convenience wrapper for rebuilding `./runtime/rootfs-docker.ext4`
- `interactive.sh`: manual one-off rootfs customization over SSH
- `packages.sh`: shell helper library
- `verify.sh`: smoke test for the Go workflow (`./verify.sh --nat` adds NAT coverage)
+- [`docs/config.md`](docs/config.md) — full config reference.
+- [`docs/dns-routing.md`](docs/dns-routing.md) — `<vm>.vm` host-side resolution.
+- [`docs/image-catalog.md`](docs/image-catalog.md) — image bundles and how to publish.
+- [`docs/kernel-catalog.md`](docs/kernel-catalog.md) — kernel bundles.
+- [`docs/oci-import.md`](docs/oci-import.md) — pulling arbitrary OCI images.
+- [`docs/advanced.md`](docs/advanced.md) — `vm create`, scripting, custom rootfs.
+- [`docs/privileges.md`](docs/privileges.md) — trust model, capability set, daemon split.
+- [`CONTRIBUTING.md`](CONTRIBUTING.md) — building from source, running tests.
--- a/assets/banger.gif
+++ b/assets/banger.gif
--- a/assets/demo.tape
+++ b/assets/demo.tape
@ -0,0 +1,112 @@
+# banger hero demo — VHS tape
+# Render with: vhs assets/demo.tape
+
+Output assets/banger.gif
+
+Require banger
+Require ssh
+Require curl
+
+Set Shell "bash"
+Set FontSize 14
+Set LineHeight 1.4
+Set Width 1200
+Set Height 720
+Set Padding 20
+Set Theme "Catppuccin Frappe"
+Set TypingSpeed 66ms
+
+# Off-camera reset: enable bash syntax highlighting via ble.sh, prompt
+# styling, drop any prior demo VM, and clear the screen.
+Hide
+Type "source ~/.local/share/blesh/ble.sh --noattach"
+Enter
+Sleep 200ms
+Type "bleopt complete_auto_complete= complete_auto_history="
+Enter
+Sleep 100ms
+Type `export PS1="\n$PS1"`
+Enter
+Sleep 200ms
+Type "[[ ${BLE_VERSION-} ]] && ble-attach"
+Enter
+Sleep 400ms
+Type "ble-face -s syntax_error fg=red"
+Enter
+Sleep 100ms
+Type "banger vm kill demo 2>/dev/null; banger vm delete demo 2>/dev/null; clear"
+Enter
+Sleep 500ms
+Show
+
+Type "banger vm run --nat --name demo"
+Enter
+Wait+Line /demo:~#/
+Sleep 1.4s
+
+Type "uname -a"
+Enter
+Sleep 1.4s
+
+Type "exit"
+Enter
+Wait
+Sleep 700ms
+
+Type "banger vm list"
+Enter
+Wait
+Sleep 1.8s
+
+Type "ssh demo.vm"
+Enter
+Wait+Line /demo:~#/
+Sleep 500ms
+
+Type "touch foo bar baz"
+Enter
+Sleep 700ms
+
+Type "ls"
+Enter
+Sleep 1.4s
+
+Type "exit"
+Enter
+Sleep 700ms
+
+Type "banger vm stop demo"
+Enter
+Wait
+Sleep 1s
+
+Type "banger vm start demo"
+Enter
+Wait
+Sleep 1s
+
+Type "banger vm exec demo -- ls"
+Enter
+Wait
+Sleep 1.4s
+
+Type "banger vm exec demo -- docker run -d -p 80:80 nginx"
+Enter
+Wait
+Sleep 1.6s
+
+Type "banger vm ports demo"
+Enter
+Wait
+Sleep 2s
+
+Type "curl http://demo.vm"
+Sleep 1.2s
+Enter
+Wait
+Sleep 4s
+
+Type "banger vm kill demo && banger vm delete demo"
+Enter
+Wait
+Sleep 3s
--- a/cmd/banger-vsock-agent/main.go
+++ b/cmd/banger-vsock-agent/main.go
@ -11,12 +11,15 @@ import (
 	"syscall"
 	"time"

+	"banger/internal/buildinfo"
 	sdkvsock "github.com/firecracker-microvm/firecracker-go-sdk/vsock"
 	"github.com/sirupsen/logrus"

 	"banger/internal/vsockagent"
 )

+var _, _, _ = buildinfo.Version, buildinfo.Commit, buildinfo.BuiltAt
+
 func main() {
 	ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
 	defer cancel()
--- a/cmd/banger/main.go
+++ b/cmd/banger/main.go
@ -2,12 +2,14 @@ package main

 import (
 	"context"
+	"errors"
 	"fmt"
 	"os"
 	"os/signal"
 	"syscall"

 	"banger/internal/cli"
+	"banger/internal/cli/style"
 )

 func main() {
@ -16,7 +18,16 @@ func main() {

 	cmd := cli.NewBangerCommand()
 	if err := cmd.ExecuteContext(ctx); err != nil {
-		fmt.Fprintf(os.Stderr, "banger: %v\n", err)
+		var exitErr cli.ExitCodeError
+		if errors.As(err, &exitErr) {
+			os.Exit(exitErr.Code)
+		}
+		// Render the failure through the CLI's translator so RPC
+		// codes become friendly text, op_ids land in parens for
+		// journalctl grepping, and the "banger:" prefix turns red
+		// on a TTY.
+		prefix := style.Fail(os.Stderr, "banger:")
+		fmt.Fprintf(os.Stderr, "%s %s\n", prefix, cli.TranslateError(os.Stderr, err))
 		os.Exit(1)
 	}
 }
--- a/cmd/bangerd/main.go
+++ b/cmd/bangerd/main.go
@ -11,6 +11,12 @@ import (
 )

 func main() {
+	// 0o077 ensures the firecracker API/vsock sockets (and any other files
+	// the daemon or its children create) are user-private by default. The
+	// previous shell wrapper around firecracker exec did this inline; with
+	// the wrapper gone, the daemon process owns the umask.
+	syscall.Umask(0o077)
+
 	ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
 	defer stop()

--- a/cmd/runtimebundle/main.go
+++ b/cmd/runtimebundle/main.go
@ -1,72 +0,0 @@
-package main
-
-import (
-	"context"
-	"flag"
-	"fmt"
-	"os"
-
-	"banger/internal/runtimebundle"
-)
-
-func main() {
-	if len(os.Args) < 2 {
-		usage()
-		os.Exit(2)
-	}
-	switch os.Args[1] {
-	case "fetch":
-		if err := fetch(os.Args[2:]); err != nil {
-			fmt.Fprintln(os.Stderr, err)
-			os.Exit(1)
-		}
-	case "package":
-		if err := pkg(os.Args[2:]); err != nil {
-			fmt.Fprintln(os.Stderr, err)
-			os.Exit(1)
-		}
-	default:
-		usage()
-		os.Exit(2)
-	}
-}
-
-func fetch(args []string) error {
-	fs := flag.NewFlagSet("fetch", flag.ContinueOnError)
-	fs.SetOutput(os.Stderr)
-	manifestPath := fs.String("manifest", "runtime-bundle.toml", "path to the runtime bundle manifest")
-	outDir := fs.String("out", "runtime", "destination runtime directory")
-	if err := fs.Parse(args); err != nil {
-		return err
-	}
-	manifest, err := runtimebundle.LoadManifest(*manifestPath)
-	if err != nil {
-		return err
-	}
-	return runtimebundle.Bootstrap(context.Background(), manifest, *manifestPath, *outDir)
-}
-
-func pkg(args []string) error {
-	fs := flag.NewFlagSet("package", flag.ContinueOnError)
-	fs.SetOutput(os.Stderr)
-	manifestPath := fs.String("manifest", "runtime-bundle.toml", "path to the runtime bundle manifest")
-	runtimeDir := fs.String("runtime-dir", "runtime", "runtime directory to package")
-	outArchive := fs.String("out", "dist/banger-runtime.tar.gz", "output archive path")
-	if err := fs.Parse(args); err != nil {
-		return err
-	}
-	manifest, err := runtimebundle.LoadManifest(*manifestPath)
-	if err != nil {
-		return err
-	}
-	sum, err := runtimebundle.Package(*runtimeDir, *outArchive, manifest)
-	if err != nil {
-		return err
-	}
-	fmt.Println(sum)
-	return nil
-}
-
-func usage() {
-	fmt.Fprintln(os.Stderr, "usage: runtimebundle <fetch|package> [flags]")
-}
--- a/configs/firecracker-x86_64-6.1.config
+++ b/configs/firecracker-x86_64-6.1.config
--- a/cosign.pub
+++ b/cosign.pub
@ -0,0 +1,4 @@
+-----BEGIN PUBLIC KEY-----
+MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAElWFSLKLosBrdjfuF8ZS6U01Ufky4
+zNeVPCkA6HEJ/oe634fRqwFxkXKGWg03eGFSnlwRxnUxN2+duXQSsR0pzQ==
+-----END PUBLIC KEY-----
--- a/customize.sh
+++ b/customize.sh
@ -1,560 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-
-log() {
-  printf '[customize] %s\n' "$*"
-}
-
-usage() {
-  cat <<'EOF'
-Usage: ./customize.sh <base-rootfs> [--out <path>] [--size <size>] [--kernel <path>] [--initrd <path>] [--docker] [--modules <dir>]
-
-Creates a copy of rootfs.ext4, optionally resizes it, boots a VM using the
-copy as a writable rootfs, then applies base configuration and packages.
-EOF
-}
-
-parse_size() {
-  local raw="$1"
-  if [[ "$raw" =~ ^([0-9]+)([KMG])?$ ]]; then
-    local num="${BASH_REMATCH[1]}"
-    local unit="${BASH_REMATCH[2]}"
-    case "$unit" in
-      K) echo $((num * 1024)) ;;
-      M|"") echo $((num * 1024 * 1024)) ;;
-      G) echo $((num * 1024 * 1024 * 1024)) ;;
-    esac
-    return 0
-  fi
-  return 1
-}
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-DEFAULT_RUNTIME_DIR="$SCRIPT_DIR"
-if [[ -d "$SCRIPT_DIR/runtime" ]]; then
-  DEFAULT_RUNTIME_DIR="$SCRIPT_DIR/runtime"
-fi
-RUNTIME_DIR="${BANGER_RUNTIME_DIR:-$DEFAULT_RUNTIME_DIR}"
-if [[ ! -d "$RUNTIME_DIR" ]]; then
-  log "runtime bundle not found: $RUNTIME_DIR"
-  log "run 'make runtime-bundle' or set BANGER_RUNTIME_DIR"
-  exit 1
-fi
-source "$RUNTIME_DIR/packages.sh"
-STATE="${BANGER_STATE_DIR:-${XDG_STATE_HOME:-$HOME/.local/state}/banger/image-build}"
-VM_ROOT="$STATE/vms"
-mkdir -p "$VM_ROOT"
-
-BUNDLE_METADATA="$RUNTIME_DIR/bundle.json"
-
-bundle_path() {
-  local key="$1"
-  local fallback="$2"
-  local rel=""
-
-  if [[ -f "$BUNDLE_METADATA" ]] && command -v jq >/dev/null 2>&1; then
-    rel="$(jq -r --arg key "$key" '.[$key] // empty' "$BUNDLE_METADATA" 2>/dev/null || true)"
-  fi
-  if [[ -n "$rel" && "$rel" != "null" ]]; then
-    printf '%s\n' "$RUNTIME_DIR/$rel"
-    return
-  fi
-  printf '%s\n' "$fallback"
-}
-
-BASE_ROOTFS="$RUNTIME_DIR/rootfs.ext4"
-FC_BIN="$RUNTIME_DIR/firecracker"
-
-KERNEL="$(bundle_path default_kernel "$RUNTIME_DIR/wtf/root/boot/vmlinux-6.8.0-94-generic")"
-INITRD="$(bundle_path default_initrd "$RUNTIME_DIR/wtf/root/boot/initrd.img-6.8.0-94-generic")"
-SSH_KEY="$RUNTIME_DIR/id_ed25519"
-VSOCK_AGENT="$(bundle_path vsock_agent_path "$RUNTIME_DIR/banger-vsock-agent")"
-if [[ "$VSOCK_AGENT" == "$RUNTIME_DIR/banger-vsock-agent" && ! -x "$VSOCK_AGENT" ]]; then
-  VSOCK_AGENT="$(bundle_path vsock_ping_helper_path "$RUNTIME_DIR/banger-vsock-pingd")"
-fi
-
-BR_DEV="br-fc"
-BR_IP="172.16.0.1"
-CIDR="24"
-DNS_SERVER="1.1.1.1"
-
-resolve_banger_bin() {
-  if [[ -n "${BANGER_BIN:-}" ]]; then
-    printf '%s\n' "$BANGER_BIN"
-    return
-  fi
-  if [[ -x "$SCRIPT_DIR/banger" ]]; then
-    printf '%s\n' "$SCRIPT_DIR/banger"
-    return
-  fi
-  if command -v banger >/dev/null 2>&1; then
-    command -v banger
-    return
-  fi
-  log "banger binary not found; install/build banger or set BANGER_BIN"
-  exit 1
-}
-
-BANGER_BIN="$(resolve_banger_bin)"
-NAT_ACTIVE=0
-
-banger_nat() {
-  local action="$1"
-  "$BANGER_BIN" internal nat "$action" --guest-ip "$GUEST_IP" --tap "$TAP_DEV"
-}
-
-BASE_ROOTFS=""
-OUT_ROOTFS=""
-SIZE_SPEC=""
-INSTALL_DOCKER=0
-MISE_VERSION="v2025.12.0"
-MISE_INSTALL_PATH="/usr/local/bin/mise"
-MISE_ACTIVATE_LINE='eval "$(/usr/local/bin/mise activate bash)"'
-TMUX_PLUGIN_DIR="/root/.tmux/plugins"
-TMUX_RESURRECT_DIR="/root/.tmux/resurrect"
-TMUX_TPM_REPO="https://github.com/tmux-plugins/tpm"
-TMUX_RESURRECT_REPO="https://github.com/tmux-plugins/tmux-resurrect"
-TMUX_CONTINUUM_REPO="https://github.com/tmux-plugins/tmux-continuum"
-TMUX_MANAGED_START="# >>> banger tmux plugins >>>"
-TMUX_MANAGED_END="# <<< banger tmux plugins <<<"
-MODULES_DIR="$(bundle_path default_modules_dir "$RUNTIME_DIR/wtf/root/lib/modules/6.8.0-94-generic")"
-PACKAGES_FILE="$(banger_packages_file)"
-while [[ $# -gt 0 ]]; do
-  case "$1" in
-    --out)
-      OUT_ROOTFS="${2:-}"
-      shift 2
-      ;;
-    --size)
-      SIZE_SPEC="${2:-}"
-      shift 2
-      ;;
-    --kernel)
-      KERNEL="${2:-}"
-      shift 2
-      ;;
-    --initrd)
-      INITRD="${2:-}"
-      shift 2
-      ;;
-    --docker)
-      INSTALL_DOCKER=1
-      shift
-      ;;
-    --modules)
-      MODULES_DIR="${2:-}"
-      shift 2
-      ;;
-    -h|--help)
-      usage
-      exit 0
-      ;;
-    *)
-      if [[ -z "$BASE_ROOTFS" ]]; then
-        BASE_ROOTFS="$1"
-        shift
-      else
-        log "unknown option: $1"
-        usage
-        exit 1
-      fi
-      ;;
-  esac
-done
-
-if [[ -z "$BASE_ROOTFS" ]]; then
-  usage
-  exit 1
-fi
-
-if [[ ! -f "$BASE_ROOTFS" ]]; then
-  log "base rootfs not found: $BASE_ROOTFS"
-  exit 1
-fi
-
-if [[ -z "$OUT_ROOTFS" ]]; then
-  base_dir="$(dirname "$BASE_ROOTFS")"
-  base_name="$(basename "$BASE_ROOTFS")"
-  OUT_ROOTFS="${base_dir}/docker-${base_name}"
-fi
-if [[ "$OUT_ROOTFS" == *.ext4 ]]; then
-  WORK_SEED="${OUT_ROOTFS%.ext4}.work-seed.ext4"
-else
-  WORK_SEED="${OUT_ROOTFS}.work-seed"
-fi
-if [[ ! -f "$KERNEL" ]]; then
-  log "kernel not found: $KERNEL"
-  exit 1
-fi
-if [[ -n "$INITRD" && ! -f "$INITRD" ]]; then
-  log "initrd not found: $INITRD"
-  exit 1
-fi
-if [[ -n "$MODULES_DIR" && ! -d "$MODULES_DIR" ]]; then
-  log "modules dir not found: $MODULES_DIR"
-  exit 1
-fi
-
-if [[ -e "$OUT_ROOTFS" ]]; then
-  log "output rootfs already exists: $OUT_ROOTFS"
-  exit 1
-fi
-
-if ! command -v resize2fs >/dev/null 2>&1; then
-  log "resize2fs required"
-  exit 1
-fi
-if ! command -v jq >/dev/null 2>&1; then
-  log "jq required"
-  exit 1
-fi
-if ! command -v sha256sum >/dev/null 2>&1; then
-  log "sha256sum required to record package manifest metadata"
-  exit 1
-fi
-if [[ ! -f "$PACKAGES_FILE" ]]; then
-  log "package manifest not found: $PACKAGES_FILE"
-  exit 1
-fi
-if [[ ! -x "$VSOCK_AGENT" ]]; then
-  log "vsock agent not found or not executable: $VSOCK_AGENT"
-  log "run 'make build' or refresh the runtime bundle"
-  exit 1
-fi
-
-APT_PACKAGES=()
-if ! banger_packages_read_array APT_PACKAGES "$PACKAGES_FILE"; then
-  log "package manifest is empty: $PACKAGES_FILE"
-  exit 1
-fi
-if ! PACKAGES_HASH="$(printf '%s\n' "${APT_PACKAGES[@]}" | banger_packages_hash_stream)"; then
-  log "failed to hash package manifest: $PACKAGES_FILE"
-  exit 1
-fi
-printf -v APT_PACKAGES_ESCAPED '%q ' "${APT_PACKAGES[@]}"
-
-log "copying base rootfs to $OUT_ROOTFS"
-cp --reflink=auto "$BASE_ROOTFS" "$OUT_ROOTFS"
-
-if [[ -n "$SIZE_SPEC" ]]; then
-  SIZE_BYTES="$(parse_size "$SIZE_SPEC")"
-  BASE_BYTES="$(stat -c%s "$BASE_ROOTFS")"
-  if [[ -z "$SIZE_BYTES" || "$SIZE_BYTES" -lt "$BASE_BYTES" ]]; then
-    log "size must be >= base image size"
-    exit 1
-  fi
-  log "resizing rootfs to $SIZE_SPEC"
-  truncate -s "$SIZE_BYTES" "$OUT_ROOTFS"
-  e2fsck -p -f "$OUT_ROOTFS" >/dev/null
-  resize2fs "$OUT_ROOTFS" >/dev/null
-fi
-
-VM_ID="$(head -c 32 /dev/urandom | xxd -p -c 256)"
-VM_TAG="${VM_ID:0:8}"
-VM_NAME="customize-${VM_TAG}"
-VM_DIR="$VM_ROOT/$VM_ID"
-mkdir -p "$VM_DIR"
-
-API_SOCK="${XDG_RUNTIME_DIR:-/run/user/$(id -u)}/banger/fc-$VM_TAG.sock"
-LOG_FILE="$VM_DIR/firecracker.log"
-TAP_DEV="tap-fc-$VM_TAG"
-
-# Allocate guest IP
-NEXT_IP_FILE="$STATE/next_ip"
-NEXT_IP="$(cat "$NEXT_IP_FILE" 2>/dev/null || echo 2)"
-GUEST_IP="172.16.0.$NEXT_IP"
-echo "$((NEXT_IP + 1))" > "$NEXT_IP_FILE"
-
-sudo -v
-
-cleanup() {
-  sudo kill "${FC_PID:-}" 2>/dev/null || true
-  if [[ "$NAT_ACTIVE" -eq 1 ]]; then
-    banger_nat down >/dev/null 2>&1 || true
-  fi
-  sudo ip link del "$TAP_DEV" 2>/dev/null || true
-  rm -f "$API_SOCK"
-  rm -rf "$VM_DIR"
-}
-trap cleanup EXIT
-
-sudo mkdir -p "$(dirname "$API_SOCK")"
-sudo chown "$(id -u):$(id -g)" "$(dirname "$API_SOCK")"
-
-# Host bridge
-if ! ip link show "$BR_DEV" >/dev/null 2>&1; then
-  log "creating host bridge $BR_DEV ($BR_IP/$CIDR)"
-  sudo ip link add name "$BR_DEV" type bridge
-  sudo ip addr add "${BR_IP}/${CIDR}" dev "$BR_DEV"
-  sudo ip link set "$BR_DEV" up
-else
-  sudo ip link set "$BR_DEV" up
-fi
-
-log "creating tap device $TAP_DEV"
-TAP_USER="${SUDO_UID:-$(id -u)}"
-TAP_GROUP="${SUDO_GID:-$(id -g)}"
-sudo ip tuntap add dev "$TAP_DEV" mode tap user "$TAP_USER" group "$TAP_GROUP"
-sudo ip link set "$TAP_DEV" master "$BR_DEV"
-sudo ip link set "$TAP_DEV" up
-sudo ip link set "$BR_DEV" up
-
-log "starting firecracker process"
-rm -f "$API_SOCK"
-nohup sudo -E "$FC_BIN" --api-sock "$API_SOCK" >"$LOG_FILE" 2>&1 &
-FC_PID="$!"
-
-log "waiting for firecracker api socket"
-for _ in $(seq 1 200); do
-  [[ -S "$API_SOCK" ]] && break
-  sleep 0.02
-done
-[[ -S "$API_SOCK" ]] || { log "firecracker api socket not ready"; exit 1; }
-
-log "configuring machine"
-sudo -E curl --unix-socket "$API_SOCK" -X PUT http://localhost/machine-config \
-  -H "Content-Type: application/json" \
-  -d '{
-    "vcpu_count": 2,
-    "mem_size_mib": 1024,
-    "smt": false
-  }' >/dev/null
-
-KCMD="console=ttyS0 reboot=k panic=1 pci=off root=/dev/vda rw ip=${GUEST_IP}::${BR_IP}:255.255.255.0:${VM_NAME}:eth0:off:${DNS_SERVER} hostname=${VM_NAME} systemd.mask=home.mount systemd.mask=var.mount"
-
-INITRD_JSON=""
-if [[ -n "$INITRD" ]]; then
-  INITRD_JSON=", \"initrd_path\": \"$INITRD\""
-fi
-
-sudo -E curl --unix-socket "$API_SOCK" -X PUT http://localhost/boot-source \
-  -H "Content-Type: application/json" \
-  -d "{
-    \"kernel_image_path\": \"$KERNEL\",
-    \"boot_args\": \"$KCMD\"${INITRD_JSON}
-  }" >/dev/null
-
-sudo -E curl --unix-socket "$API_SOCK" -X PUT http://localhost/drives/rootfs \
-  -H "Content-Type: application/json" \
-  -d "{
-    \"drive_id\": \"rootfs\",
-    \"path_on_host\": \"$OUT_ROOTFS\",
-    \"is_root_device\": true,
-    \"is_read_only\": false
-  }" >/dev/null
-
-sudo -E curl --unix-socket "$API_SOCK" -X PUT http://localhost/network-interfaces/eth0 \
-  -H "Content-Type: application/json" \
-  -d "{
-    \"iface_id\": \"eth0\",
-    \"host_dev_name\": \"$TAP_DEV\"
-  }" >/dev/null
-
-sudo -E curl --unix-socket "$API_SOCK" -X PUT http://localhost/actions \
-  -H "Content-Type: application/json" \
-  -d '{ "action_type": "InstanceStart" }' >/dev/null
-
-SUDO_CHILD_PID="$(pgrep -n -f "$API_SOCK" || true)"
-if [[ -n "$SUDO_CHILD_PID" ]]; then
-  FC_PID="$SUDO_CHILD_PID"
-fi
-
-VM_CONFIG_JSON="$(sudo -E curl --unix-socket "$API_SOCK" -sS http://localhost/vm/config)"
-CREATED_AT="$(date -Iseconds)"
-jq -n \
-  --arg id "$VM_ID" \
-  --arg name "$VM_NAME" \
-  --arg pid "$FC_PID" \
-  --arg created_at "$CREATED_AT" \
-  --arg guest_ip "$GUEST_IP" \
-  --arg tap "$TAP_DEV" \
-  --arg api_sock "$API_SOCK" \
-  --arg log "$LOG_FILE" \
-  --arg rootfs "$OUT_ROOTFS" \
-  --arg kernel "$KERNEL" \
-  --argjson config "$VM_CONFIG_JSON" \
-  '{meta:{id:$id,name:$name,pid:$pid,created_at:$created_at,guest_ip:$guest_ip,tap:$tap,api_sock:$api_sock,log:$log,rootfs:$rootfs,kernel:$kernel},config:$config}' \
-  > "$VM_DIR/vm.json"
-
-log "enabling NAT for customization"
-banger_nat up >/dev/null
-NAT_ACTIVE=1
-
-log "waiting for SSH"
-SSH_READY=0
-for _ in $(seq 1 60); do
-  if ssh -i "$SSH_KEY" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-    "root@${GUEST_IP}" "true" >/dev/null 2>&1; then
-    SSH_READY=1
-    break
-  fi
-  sleep 1
-done
-if [[ "$SSH_READY" -ne 1 ]]; then
-  log "ssh did not become ready on $GUEST_IP"
-  exit 1
-fi
-
-log "configuring guest"
-log "installing vsock agent"
-scp -i "$SSH_KEY" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-  "$VSOCK_AGENT" "root@${GUEST_IP}:/usr/local/bin/banger-vsock-agent" >/dev/null
-
-ssh -i "$SSH_KEY" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-  "root@${GUEST_IP}" bash -lc "set -e
-printf 'nameserver %s\n' \"$DNS_SERVER\" > /etc/resolv.conf
-echo \"$VM_NAME\" > /etc/hostname
-printf '127.0.0.1 localhost\n127.0.1.1 %s\n' \"$VM_NAME\" > /etc/hosts
-touch /etc/fstab
-sed -i '\|^/dev/vdb[[:space:]]\+/home[[:space:]]|d; \|^/dev/vdc[[:space:]]\+/var[[:space:]]|d' /etc/fstab
-if ! grep -q '^tmpfs /run ' /etc/fstab; then
-  echo 'tmpfs /run tmpfs defaults,nodev,nosuid,mode=0755 0 0' >> /etc/fstab
-fi
-if ! grep -q '^tmpfs /tmp ' /etc/fstab; then
-  echo 'tmpfs /tmp tmpfs defaults,nodev,nosuid,mode=1777 0 0' >> /etc/fstab
-fi
-apt-get update
-DEBIAN_FRONTEND=noninteractive apt-get -y upgrade
-DEBIAN_FRONTEND=noninteractive apt-get -y install ${APT_PACKAGES_ESCAPED}
-curl -fsSL https://mise.run | MISE_INSTALL_PATH=\"$MISE_INSTALL_PATH\" MISE_VERSION=\"$MISE_VERSION\" sh
-\"$MISE_INSTALL_PATH\" use -g github:anomalyco/opencode
-mkdir -p /etc/profile.d
-cat > /etc/profile.d/mise.sh <<'MISEPROFILE'
-if [ -n \"\${BASH_VERSION:-}\" ] && [ -x \"$MISE_INSTALL_PATH\" ]; then
-  eval \"\$($MISE_INSTALL_PATH activate bash)\"
-fi
-MISEPROFILE
-chmod 0644 /etc/profile.d/mise.sh
-touch /etc/bash.bashrc
-if ! grep -Fqx '$MISE_ACTIVATE_LINE' /etc/bash.bashrc; then
-  printf '\n%s\n' '$MISE_ACTIVATE_LINE' >> /etc/bash.bashrc
-fi
-if [[ \"$INSTALL_DOCKER\" == \"1\" ]]; then
-  DEBIAN_FRONTEND=noninteractive apt-get -y remove containerd || true
-  if ! DEBIAN_FRONTEND=noninteractive apt-get -y install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin; then
-    DEBIAN_FRONTEND=noninteractive apt-get -y install docker.io
-  fi
-  if command -v systemctl >/dev/null 2>&1; then
-    systemctl enable --now docker || true
-  fi
-fi
-rm -f /root/get-docker /root/get-docker.sh /tmp/get-docker /tmp/get-docker.sh
-chmod 0755 /usr/local/bin/banger-vsock-agent
-mkdir -p /etc/modules-load.d /etc/systemd/system
-cat > /etc/modules-load.d/banger-vsock.conf <<'EOF'
-vsock
-vmw_vsock_virtio_transport
-EOF
-chmod 0644 /etc/modules-load.d/banger-vsock.conf
-cat > /etc/systemd/system/banger-vsock-agent.service <<'EOF'
-[Unit]
-Description=Banger vsock agent
-After=network.target
-
-[Service]
-Type=simple
-ExecStart=/usr/local/bin/banger-vsock-agent
-Restart=on-failure
-RestartSec=1
-
-[Install]
-WantedBy=multi-user.target
-EOF
-chmod 0644 /etc/systemd/system/banger-vsock-agent.service
-if command -v systemctl >/dev/null 2>&1; then
-  systemctl daemon-reload || true
-  systemctl enable --now banger-vsock-agent.service || true
-fi
-git config --system init.defaultBranch main
-"
-
-log "configuring tmux resurrect"
-ssh -i "$SSH_KEY" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-  "root@${GUEST_IP}" bash -se <<EOF
-set -euo pipefail
-
-install_tmux_plugin() {
-  local dir="\$1"
-  local repo="\$2"
-
-  if [[ -d "\$dir/.git" ]]; then
-    git -C "\$dir" fetch --depth 1 origin
-    git -C "\$dir" reset --hard FETCH_HEAD
-  else
-    rm -rf "\$dir"
-    git clone --depth 1 "\$repo" "\$dir"
-  fi
-}
-
-mkdir -p "$TMUX_PLUGIN_DIR" "$TMUX_RESURRECT_DIR"
-install_tmux_plugin "$TMUX_PLUGIN_DIR/tpm" "$TMUX_TPM_REPO"
-install_tmux_plugin "$TMUX_PLUGIN_DIR/tmux-resurrect" "$TMUX_RESURRECT_REPO"
-install_tmux_plugin "$TMUX_PLUGIN_DIR/tmux-continuum" "$TMUX_CONTINUUM_REPO"
-
-TMUX_CONF="/root/.tmux.conf"
-tmp_tmux_conf="\$(mktemp)"
-if [[ -f "\$TMUX_CONF" ]]; then
-  awk -v begin="$TMUX_MANAGED_START" -v end="$TMUX_MANAGED_END" '
-    \$0 == begin { skip = 1; next }
-    \$0 == end { skip = 0; next }
-    !skip { print }
-  ' "\$TMUX_CONF" > "\$tmp_tmux_conf"
-else
-  : > "\$tmp_tmux_conf"
-fi
-if [[ -s "\$tmp_tmux_conf" ]]; then
-  printf '\n' >> "\$tmp_tmux_conf"
-fi
-cat >> "\$tmp_tmux_conf" <<'TMUXCONF'
-$TMUX_MANAGED_START
-set -g @plugin 'tmux-plugins/tpm'
-set -g @plugin 'tmux-plugins/tmux-resurrect'
-set -g @plugin 'tmux-plugins/tmux-continuum'
-set -g @continuum-save-interval '15'
-set -g @continuum-restore 'off'
-set -g @resurrect-dir '/root/.tmux/resurrect'
-run '~/.tmux/plugins/tpm/tpm'
-$TMUX_MANAGED_END
-TMUXCONF
-mv "\$tmp_tmux_conf" "\$TMUX_CONF"
-chmod 0644 "\$TMUX_CONF"
-EOF
-
-if [[ -n "$MODULES_DIR" ]]; then
-  MODULES_BASE="$(basename "$MODULES_DIR")"
-  log "copying kernel modules ($MODULES_BASE) into guest"
-  tar -C "$(dirname "$MODULES_DIR")" -cf - "$MODULES_BASE" | \
-    ssh -i "$SSH_KEY" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-      "root@${GUEST_IP}" bash -lc "set -e
-mkdir -p /lib/modules
-tar -C /lib/modules -xf -
-depmod -a \"$MODULES_BASE\"
- mkdir -p /etc/modules-load.d
- printf 'nf_tables\nnft_chain_nat\nveth\nbr_netfilter\noverlay\n' > /etc/modules-load.d/docker-netfilter.conf
- mkdir -p /etc/sysctl.d
- cat > /etc/sysctl.d/99-docker.conf <<'SYSCTL'
-net.bridge.bridge-nf-call-iptables = 1
-net.bridge.bridge-nf-call-ip6tables = 1
-net.ipv4.ip_forward = 1
-SYSCTL
- sysctl --system >/dev/null 2>&1 || true
-sync
-"
-fi
-
-log "shutting down guest"
-ssh -i "$SSH_KEY" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-  "root@${GUEST_IP}" bash -lc "sync" || true
-sudo -E curl --unix-socket "$API_SOCK" -X PUT http://localhost/actions \
-  -H "Content-Type: application/json" \
-  -d '{ "action_type": "SendCtrlAltDel" }' >/dev/null || true
-for _ in $(seq 1 200); do
-  if ! ps -p "$FC_PID" >/dev/null 2>&1; then
-    break
-  fi
-  sleep 0.05
-done
-banger_write_rootfs_manifest_metadata "$OUT_ROOTFS" "$PACKAGES_HASH"
-log "building work seed $WORK_SEED"
-"$BANGER_BIN" internal work-seed --rootfs "$OUT_ROOTFS" --out "$WORK_SEED"
-log "done"
--- a/docs/advanced.md
+++ b/docs/advanced.md
@ -0,0 +1,103 @@
+# Advanced flows
+
+`banger vm run` covers the common sandbox case. This doc is for the
+rest: scripting, arbitrary images, custom rootfs stacks, long-lived
+guest processes.
+
+Host-side assumption for everything below: the supported runtime model
+is still the two-service `systemd` install:
+
+- `bangerd.service` running as the owner user
+- `bangerd-root.service` running as the privileged host helper
+
+These advanced flows widen what you do with banger, not which host
+init systems or privilege model are supported.
+
+## `vm create` — the low-level primitive
+
+Use when you want to provision without starting, or when you need to
+script VM creation piecewise.
+
+```bash
+banger vm create --image debian-bookworm --name testbox --no-start
+banger vm start testbox
+banger vm ssh testbox
+banger vm stop testbox
+banger vm delete testbox
+```
+
+Sweep every non-running VM (stopped, created, error) with:
+
+```bash
+banger vm prune          # interactive confirmation
+banger vm prune -f       # skip the prompt
+```
+
+`vm create` is synchronous by default, but on a TTY it shows live
+progress until the VM is fully ready.
+
+## `image pull <oci-ref>` — arbitrary container images
+
+For images outside banger's catalog, pull from any OCI registry:
+
+```bash
+banger image pull docker.io/library/alpine:3.20 --kernel-ref generic-6.12
+```
+
+Layers are flattened, ownership is fixed (setuid binaries, root-owned
+config preserved), banger's guest agents are injected, and a first-boot
+systemd service installs `openssh-server` via the guest's package
+manager so the VM is reachable on first boot.
+
+See [`docs/oci-import.md`](oci-import.md) for supported distros,
+caveats, and the `internal/imagepull` design.
+
+## `image register` — existing host-side stack
+
+If you already have an ext4 rootfs, a kernel, optional initrd, and
+optional modules as files on disk:
+
+```bash
+banger image register --name base \
+  --rootfs /abs/path/rootfs.ext4 \
+  --kernel-ref generic-6.12
+```
+
+You can mix `--kernel-ref` (a cataloged kernel) with `--rootfs` from
+disk, or pass `--kernel /abs/path/vmlinux` for a one-off kernel.
+
+For reproducible custom images, write a Dockerfile and publish it to
+an image catalog. See [`docs/image-catalog.md`](image-catalog.md).
+
+## Workspace primitive
+
+`vm run ./repo` (see README) handles the common case. For a manual
+flow against an already-running VM, `vm workspace prepare`
+materialises a local git checkout into the guest:
+
+```bash
+banger vm workspace prepare <vm> ./other-repo --guest-path /root/repo
+```
+
+Default guest path is `/root/repo`; default mode is a shallow
+metadata copy plus a tracked-files overlay. Untracked files are
+skipped by default — pass `--include-untracked` to ship untracked
+non-ignored files too. Pass `--dry-run` to list the exact file set
+without touching the guest. For repositories with submodules, pass
+`--mode full_copy`.
+
+## Inspecting boot failures
+
+When a VM's create flow errors ("ssh did not come up within 90s" or
+similar), the VM is kept alive for inspection:
+
+- `banger vm logs <name>` — the firecracker serial console output,
+  the best window into a stuck boot (systemd unit failures, kernel
+  panics, missing modules).
+- `banger vm ports <name>` — what's listening in the guest. Works as
+  long as banger's vsock agent has come up, even if SSH is wedged.
+- `banger vm show <name>` — daemon-side state (IP, PID, overlay
+  paths).
+
+`--rm` on `vm run` intentionally does NOT fire when the initial ssh
+wait times out, so the VM stays around for post-mortem.
--- a/docs/config.md
+++ b/docs/config.md
@ -0,0 +1,153 @@
+# Config reference
+
+banger reads `~/.config/banger/config.toml` at daemon start; every key is
+optional. Defaults are applied for anything you omit. Path: see also
+[docs/privileges.md](privileges.md) > Filesystem mutations.
+
+---
+
+## Top-level keys
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| `log_level` | string | `"info"` | Daemon log verbosity; overridden at runtime by `BANGER_LOG_LEVEL`. Accepted values are the standard slog levels: `debug`, `info`, `warn`, `error`. |
+| `firecracker_bin` | string | auto-detected from `PATH` | Path to the `firecracker` binary. Accepts absolute paths or `~/`-anchored paths. If unset, banger resolves `firecracker` on `PATH` at startup. |
+| `jailer_bin` | string | `"/usr/bin/jailer"` | Path to the Firecracker `jailer` binary used to sandbox each VM process. |
+| `jailer_enabled` | bool | `true` | When `false`, VMs are launched directly without the jailer. Disabling the jailer removes the seccomp/namespace sandbox; only for debugging or environments where jailer is unavailable. |
+| `jailer_chroot_base` | string | `"<state_dir>/jail"` | Base directory under which the jailer creates per-VM chroot trees. Must be on the same filesystem as the image store to allow hard-linking without crossing device boundaries. |
+| `ssh_key_path` | string | `"<state_dir>/ssh/id_ed25519"` (auto-generated) | Host SSH key used to reach guest VMs. Accepts absolute paths or `~/`-anchored paths; `~/foo` expands against `$HOME`. Relative paths are rejected. If unset, banger auto-generates an ed25519 keypair on first start. |
+| `default_image_name` | string | `"debian-bookworm"` | Image used when `--image` is omitted from `vm run` / `vm create`. The named image is auto-pulled from the catalog if not already local. |
+| `auto_stop_stale_after` | duration | `"0"` (disabled) | If non-zero, the daemon automatically stops VMs that have not been touched within this duration. Accepts Go duration strings (`"24h"`, `"2h30m"`). |
+| `stats_poll_interval` | duration | `"10s"` | How often the daemon collects CPU and memory stats for running VMs. Accepts Go duration strings (`"30s"`, `"1m"`). |
+| `bridge_name` | string | `"br-fc"` | Name of the Linux bridge device banger creates for the VM network. |
+| `bridge_ip` | string | `"172.16.0.1"` | IP address assigned to the host side of the bridge (the gateway VMs see). |
+| `cidr` | string | `"24"` | Prefix length for the VM subnet (combined with `bridge_ip` to define the network, e.g. `172.16.0.0/24`). |
+| `tap_pool_size` | int | `4` | Number of TAP network devices pre-allocated in the pool. Increase if you routinely run more concurrent VMs than this value. |
+| `default_dns` | string | `"1.1.1.1"` | DNS resolver address advertised to guest VMs via DHCP. |
+
+---
+
+## `[vm_defaults]`
+
+The optional `[vm_defaults]` block sets the sizing floor for every new VM.
+When a key is omitted (or zero), banger falls back to host-derived heuristics
+and then to built-in constants. `banger doctor` prints the effective defaults
+with their provenance.
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| `vcpu` | int | host heuristic (≈ `cpus/4`, max 4) | Number of vCPUs assigned to each new VM. Must be ≥ 0; 0 means "let banger decide." |
+| `memory_mib` | int | host heuristic (≈ `ram/8`, max 8192) | RAM in mebibytes assigned to each new VM. Must be ≥ 0; 0 means "let banger decide." |
+| `disk_size` | string | `"8G"` | Size of the per-VM work disk. Accepts K/M/G suffixes (`"16G"`, `"512M"`). Maximum is 128 GiB. |
+| `system_overlay_size` | string | `"8G"` | Size of the copy-on-write overlay layered over the read-only root filesystem. Accepts K/M/G suffixes. Maximum is 128 GiB. |
+
+---
+
+## `[[file_sync]]`
+
+Each `[[file_sync]]` entry copies a file or directory from the host into
+the VM's work disk at `vm create` time. You may declare any number of
+entries; the default is none. Missing host paths are skipped with a warning
+rather than failing the create.
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| `host` | string | **required** | Source path on the host. Must be absolute or `~/`-anchored, and must resolve inside the installed owner's home directory. Top-level symlinks are followed only when their target stays inside that home. |
+| `guest` | string | **required** | Destination path inside the VM. Must be absolute or `~/`-anchored, and must resolve under `/root` (the work disk mount point). |
+| `mode` | string | `"0600"` for files, `"0755"` for directories | Unix permission bits applied to the destination. Must be a 3- or 4-digit octal string (`"0755"`, `"600"`). |
+
+---
+
+## Example
+
+A fully annotated `config.toml` showing every section. Omit any key to keep
+the built-in default.
+
+```toml
+# ~/.config/banger/config.toml
+
+# ── Binaries ──────────────────────────────────────────────────────────────────
+
+# Override the auto-resolved firecracker binary.
+# firecracker_bin = "/usr/local/bin/firecracker"
+
+# Override the default jailer binary path.
+# jailer_bin = "/usr/bin/jailer"
+
+# Disable the jailer (removes seccomp/namespace sandbox — debug only).
+# jailer_enabled = false
+
+# Base directory for per-VM jailer chroot trees.
+# jailer_chroot_base = "/var/lib/banger/jail"
+
+# ── Identity ──────────────────────────────────────────────────────────────────
+
+# SSH key used to reach VMs. Auto-generated as an ed25519 key if unset.
+# ssh_key_path = "~/.local/state/banger/ssh/id_ed25519"
+
+# Default image for `vm run` / `vm create` when --image is omitted.
+# default_image_name = "debian-bookworm"
+
+# ── Logging ───────────────────────────────────────────────────────────────────
+
+# Daemon log verbosity: debug | info | warn | error
+# log_level = "info"
+
+# ── Lifecycle ─────────────────────────────────────────────────────────────────
+
+# Automatically stop VMs not touched within this window. 0 disables auto-stop.
+# auto_stop_stale_after = "24h"
+
+# How often to collect CPU/memory stats for running VMs.
+# stats_poll_interval = "10s"
+
+# ── Networking ────────────────────────────────────────────────────────────────
+
+# Name of the Linux bridge device created for the VM network.
+# bridge_name = "br-fc"
+
+# Host-side IP address of the bridge (the gateway VMs see).
+# bridge_ip = "172.16.0.1"
+
+# Subnet prefix length combined with bridge_ip.
+# cidr = "24"
+
+# TAP device pool size — increase if you run more concurrent VMs than this.
+# tap_pool_size = 4
+
+# DNS resolver advertised to guests.
+# default_dns = "1.1.1.1"
+
+# ── VM sizing defaults ────────────────────────────────────────────────────────
+
+[vm_defaults]
+# vCPUs per VM. 0 = let banger decide from host heuristics.
+vcpu = 2
+
+# RAM in MiB per VM. 0 = let banger decide from host heuristics.
+memory_mib = 2048
+
+# Work disk size (K/M/G suffix). Max 128G.
+disk_size = "8G"
+
+# Copy-on-write overlay over the root filesystem (K/M/G suffix). Max 128G.
+system_overlay_size = "8G"
+
+# ── Host → guest file copies ──────────────────────────────────────────────────
+
+# Copy an entire directory (recursive).
+[[file_sync]]
+host  = "~/.aws"
+guest = "~/.aws"
+
+# Copy a single file with explicit permissions.
+[[file_sync]]
+host  = "~/.config/gh/hosts.yml"
+guest = "~/.config/gh/hosts.yml"
+
+# Copy a script and make it executable.
+[[file_sync]]
+host  = "~/bin/my-script"
+guest = "~/bin/my-script"
+mode  = "0755"
+```
--- a/docs/dns-routing.md
+++ b/docs/dns-routing.md
@ -0,0 +1,161 @@
+# DNS routing — resolving `<vm>.vm` hostnames from the host
+
+banger's owner daemon runs a local DNS server on `127.0.0.1:42069` that
+answers queries under the `.vm` zone. Every VM you create gets a
+record:
+
+```
+devbox.vm   →  172.16.0.9  (whatever guest IP it was assigned)
+```
+
+With that plus host-side DNS routing, you can:
+
+```bash
+ssh root@devbox.vm
+curl http://devbox.vm:3000
+```
+
+from anywhere on the host without copy-pasting guest IPs.
+
+## Supported path
+
+The supported host-side path is:
+
+- `systemd` on the host
+- `bangerd.service` running as the owner user
+- `bangerd-root.service` running as the privileged host helper
+- `systemd-resolved` handling `.vm` routing via `resolvectl`
+
+If you're on a non-`systemd` host or a host without `systemd-resolved`,
+the recipes below are best-effort guidance, not the primary supported
+deployment model.
+
+## systemd-resolved hosts — nothing to configure
+
+If your host uses `systemd-resolved` (most modern Linux desktops —
+Ubuntu ≥18.04, Fedora, Arch with the service enabled), banger
+auto-wires it. When the banger services start, the owner daemon asks
+the root helper to apply the equivalent of:
+
+```
+sudo resolvectl dns      <bridge>  127.0.0.1:42069
+sudo resolvectl domain   <bridge>  ~vm
+sudo resolvectl default-route <bridge> no
+```
+
+against the banger bridge (`br-fc` by default). systemd-resolved
+routes only `.vm` lookups to banger's DNS; everything else goes to
+your normal upstream. No other changes needed.
+
+Verify: `resolvectl status br-fc` should list `127.0.0.1:42069` under
+**Current DNS Server** and `~vm` under **DNS Domain**.
+
+Stopping or uninstalling the services reverts the bridge's
+`resolvectl` state on shutdown:
+
+```bash
+sudo banger daemon stop
+sudo banger system uninstall
+```
+
+## Non-systemd-resolved hosts
+
+banger detects `resolvectl`'s absence and skips the auto-wire. You
+configure your own resolver. Below are recipes for the common cases.
+They can be useful in local experiments, but this is outside banger's
+supported host/runtime path.
+
+In every case the goal is the same: **route `.vm` queries to
+`127.0.0.1` port `42069`, leave everything else alone**.
+
+### dnsmasq
+
+Add a stanza to your dnsmasq config (e.g.
+`/etc/dnsmasq.d/banger-vm.conf`):
+
+```
+server=/vm/127.0.0.1#42069
+```
+
+Reload dnsmasq (`sudo systemctl reload dnsmasq` or equivalent) and
+test:
+
+```
+dig devbox.vm
+```
+
+### NetworkManager with dnsmasq plugin
+
+Same file as above; NetworkManager picks it up automatically if it's
+configured to use the dnsmasq plugin (`dns=dnsmasq` in
+`/etc/NetworkManager/NetworkManager.conf`). Restart NetworkManager
+after editing.
+
+### Raw `/etc/resolv.conf`
+
+If you edit `resolv.conf` directly, there's no per-domain routing —
+you'd have to point ALL DNS through banger, which you probably don't
+want. Install `dnsmasq` instead and use the stanza above.
+
+### macOS (if you ever run banger on a Linux VM hosted on macOS)
+
+macOS supports per-TLD resolvers out of the box. Create
+`/etc/resolver/vm` (as root):
+
+```
+nameserver 127.0.0.1
+port 42069
+```
+
+No daemon reload needed — `scutil --dns` should list `.vm` under
+"Resolver configurations" immediately.
+
+### Windows/WSL
+
+WSL2 inherits the Windows resolver by default and cannot be told to
+route `.vm` anywhere. Options:
+
+1. Run banger inside WSL but resolve manually: `ssh root@172.16.0.9`.
+2. Set up `dnsmasq` on the WSL distro and point its resolv.conf at
+   it; then follow the dnsmasq recipe above.
+
+## Verifying the DNS server
+
+Regardless of host-side routing, you can always query banger's DNS
+server directly:
+
+```bash
+dig @127.0.0.1 -p 42069 devbox.vm
+```
+
+Returns the guest IP if the VM is running. If it returns NXDOMAIN,
+the VM either doesn't exist under that name or isn't running yet.
+
+`banger vm list` shows the VM names banger knows about.
+
+## Troubleshooting
+
+- **`resolvectl` errors about "system has not been booted with systemd
+  as init system"** — you're probably inside a container or on a
+  non-`systemd` host. Manual resolver setup may still work, but that's
+  outside the supported path.
+- **Port 42069 already in use** — another daemon is bound there
+  (previous banger instance not shut down cleanly, or an unrelated
+  app). `ss -ulpn | grep 42069` shows who. `sudo banger daemon stop`
+  stops both banger services and cleans up banger's own listener.
+- **`devbox.vm` resolves but SSH hangs** — DNS is fine; the VM
+  might not be up yet or the bridge NAT is misconfigured.
+  `banger vm ssh devbox` uses the guest IP directly and bypasses
+  DNS — try that to isolate.
+- **Changes to `default_dns` don't affect `.vm` resolution** —
+  `default_dns` is the upstream the GUEST uses; it's unrelated to
+  host-side `.vm` routing.
+
+## Port and bridge tuning
+
+| Setting | Default | Notes |
+|---|---|---|
+| DNS listen addr | `127.0.0.1:42069` | Not configurable in v1. Edit `internal/vmdns/server.go` if you really need to change it. |
+| Bridge name | `br-fc` | Configurable via `bridge_name` in `~/.config/banger/config.toml`. |
+| Bridge IP | `172.16.0.1` | Configurable via `bridge_ip`. |
+| Resolver route domain | `~vm` | Not configurable. |
--- a/docs/image-catalog.md
+++ b/docs/image-catalog.md
@ -0,0 +1,123 @@
+# Image catalog
+
+The image catalog ships pre-built banger rootfs bundles so users don't
+have to register or build anything. It's the fast path behind
+`banger vm run` (auto-pull) and `banger image pull <name>`. The
+catalog is embedded into the banger binary and updated each release.
+
+End-user flow:
+
+```bash
+banger image pull debian-bookworm             # explicit
+banger vm run --name sandbox                  # implicit (auto-pulls)
+```
+
+## Architecture
+
+Two parts — the same shape as the kernel catalog:
+
+1. **`internal/imagecat/catalog.json`** — JSON manifest embedded into
+   the banger binary via `go:embed`. Each entry: name, distro, arch,
+   kernel_ref (a `kernelcat` entry name), tarball URL, tarball
+   sha256, size.
+
+2. **Tarballs at `https://images.thaloco.com/`** — Cloudflare R2
+   bucket `banger-images`, fronted by a public custom domain. Each
+   tarball is `<name>-<arch>-<sha256-prefix>.tar.zst` (content-
+   addressed filename so CDN edge cache can never serve stale bytes
+   for the URL the catalog points at). Contents at the archive root:
+   `rootfs.ext4` (finalized: flattened + ownership-fixed + agent-
+   injected at build time) and `manifest.json`.
+
+The `banger image pull` bundle path streams the tarball, verifies
+sha256 against the catalog entry, extracts both files into a staging
+dir, resolves the kernel via `kernel_ref` (auto-pulling from
+`kernelcat` if the user hasn't pulled it yet), stages boot artifacts
+alongside, and registers the result as a managed image.
+
+The same `image pull` command transparently falls through to the
+existing OCI-pull path when `<name>` doesn't match a catalog entry —
+see [`docs/oci-import.md`](oci-import.md).
+
+## Adding or updating an entry
+
+The repo has no CI for bundle publishing yet. Catalog updates are
+manual.
+
+```bash
+# 1. Build the bundle + upload + patch catalog.json in one shot.
+scripts/publish-golden-image.sh
+
+# 2. Review and commit the catalog change.
+git diff -- internal/imagecat/catalog.json
+git add internal/imagecat/catalog.json
+git commit -m 'imagecat: publish debian-bookworm'
+
+# 3. Rebuild so the new catalog is embedded.
+make build
+```
+
+`scripts/publish-golden-image.sh` wraps `scripts/make-golden-bundle.sh`
+(which runs `docker build` on `images/golden/Dockerfile` then pipes
+`docker export` into `banger internal make-bundle`), computes the
+bundle's sha256, uses the first 12 hex chars as a cache-busting
+filename suffix, uploads via `rclone` to R2, HEAD-checks the public
+URL, and patches `internal/imagecat/catalog.json`.
+
+Environment overrides if the defaults need to change:
+`RCLONE_REMOTE`, `RCLONE_BUCKET`, `BASE_URL`.
+
+`--skip-upload` builds the bundle into `dist/` and stops — useful for
+local testing without touching R2 or the catalog.
+
+## Bundle format
+
+A bundle is a tar+zstd archive with exactly two entries at the root:
+
+```
+rootfs.ext4      # finalized banger rootfs
+manifest.json    # {name, distro, arch, kernel_ref, description}
+```
+
+`rootfs.ext4` is fully prepared at build time: ownership fixed via
+`debugfs sif`, banger guest agents (vsock agent, network bootstrap,
+first-boot unit) already injected and enabled in
+`multi-user.target.wants`. The pull path only has to place the file
+and register the image — no mkfs, no ownership pass, no injection on
+the daemon host.
+
+## Removing an entry
+
+1. Remove the entry from `internal/imagecat/catalog.json` and commit.
+2. Delete the tarball from R2:
+   `rclone delete banger-images:banger-images/<name>-<arch>-<hash>.tar.zst`.
+3. Rebuild banger.
+
+Already-pulled local images are not invalidated — users keep using
+them until they run `banger image delete <name>`.
+
+## Versioning conventions
+
+- **Entry names**: `<distro>-<release>` (e.g. `debian-bookworm`).
+  Per-release names make it trivial to publish `debian-trixie`
+  alongside without collisions.
+- **Content-addressed filenames**: the `-<sha256-prefix>` suffix is
+  mandatory (set by `publish-golden-image.sh`). Never reuse a URL for
+  different bytes.
+- **Architecture**: `x86_64` only today. The `arch` field is additive
+  — adding `arm64` is a config change, not a schema change.
+
+## Trust model
+
+Same as the kernel catalog: the embedded `catalog.json` carries each
+bundle's sha256, and `imagecat.Fetch` rejects any download whose hash
+doesn't match. This protects against transport corruption and against
+an attacker swapping an R2 object without landing a commit in the
+banger repo. GPG/sigstore signing is deferred until banger is public
+and the threat model justifies the operational overhead.
+
+## Hosting
+
+Tarballs live in Cloudflare R2 (bucket `banger-images`), served at
+`images.thaloco.com`. The bucket is publicly readable; writes require
+the R2 API token configured on the `banger-images` rclone remote.
--- a/docs/kernel-catalog.md
+++ b/docs/kernel-catalog.md
@ -0,0 +1,142 @@
+# Kernel catalog
+
+The kernel catalog ships pre-built Firecracker-ready kernel bundles so users
+don't have to compile anything. The catalog is embedded into the banger
+binary and updated each release.
+
+End-user flow:
+
+```bash
+banger kernel list --available       # browse the catalog
+banger kernel pull generic-6.12      # download a bundle (no sudo, no make)
+banger image register --name myimg --rootfs … --kernel-ref generic-6.12
+```
+
+## Architecture
+
+Two parts:
+
+1. **`internal/kernelcat/catalog.json`** — a JSON manifest embedded into the
+   banger binary via `go:embed`. Each entry carries a name, distro, arch,
+   kernel version, tarball URL, and tarball SHA256. Updating the catalog
+   means editing this file in the repo and rebuilding banger.
+
+2. **Tarballs at `https://kernels.thaloco.com/`** — Cloudflare R2 bucket
+   `banger-kernels`, fronted by a public custom domain. Each tarball is
+   `<name>-<arch>.tar.zst` and contains `vmlinux`, optional `initrd.img`,
+   and an optional `modules/` tree at the archive root.
+
+The `banger kernel pull` flow streams the tarball, verifies its SHA256
+against the embedded catalog entry, decompresses it (zstd), extracts it
+into `~/.local/state/banger/kernels/<name>/`, and writes a manifest. Path
+traversal entries and unsafe symlinks are rejected.
+
+## Kernel types
+
+**`generic-<version>`** — built from upstream kernel.org sources with
+Firecracker's official config. All essential drivers (virtio_blk,
+virtio_net, ext4, vsock) compiled in — no modules, no initramfs. This
+is the kernel the golden image pairs with and the recommended kernel
+for OCI-pulled images. Build with `scripts/make-generic-kernel.sh`.
+
+## Adding or updating an entry
+
+The repo has no CI for kernel publishing yet. Catalog updates are manual
+and infrequent (kernel version bumps every few weeks at most).
+
+```bash
+# 1. Build the kernel locally.
+scripts/make-generic-kernel.sh
+
+# 2. Import it into the local catalog so the canonical layout exists.
+banger kernel import generic-6.12 \
+  --from build/manual/generic-kernel \
+  --distro generic \
+  --arch x86_64
+
+# 3. Package, upload, patch catalog.json.
+scripts/publish-kernel.sh generic-6.12 \
+  --description "Generic Firecracker kernel 6.12 (all drivers built-in, no initrd)"
+
+# 4. Review and commit the catalog change.
+git diff -- internal/kernelcat/catalog.json
+git add internal/kernelcat/catalog.json
+git commit -m 'kernel catalog: add/update generic-6.12'
+
+# 5. Rebuild so the new catalog is embedded.
+make build
+```
+
+`scripts/publish-kernel.sh` reads the locally-imported entry under
+`~/.local/state/banger/kernels/<name>/`, builds a tar+zstd archive, uploads
+it to R2 via `rclone`, HEAD-checks the public URL, and patches
+`internal/kernelcat/catalog.json` with the new URL, SHA256, and size.
+
+Environment overrides if the defaults need to change:
+`RCLONE_REMOTE`, `RCLONE_BUCKET`, `BASE_URL`, `BANGER_KERNELS_DIR`.
+
+## Removing an entry
+
+1. Delete the line from `internal/kernelcat/catalog.json` and commit.
+2. Delete the tarball from R2: `rclone delete r2:banger-kernels/<name>-<arch>.tar.zst`.
+3. Rebuild banger.
+
+Already-pulled local copies on user machines are not invalidated — they
+keep working until the user runs `banger kernel rm <name>`. That's
+intentional: pulling is idempotent, removing should not break anyone in
+the middle of a workflow.
+
+## Versioning conventions
+
+- **Entry names**: `<family>-<major.minor>` (e.g. `generic-6.12`).
+  The major.minor is the kernel line. Patch-level bumps reuse the
+  entry name and replace the tarball; minor bumps create a new entry
+  (`generic-6.13`).
+- **Architecture**: only `x86_64` is published today. The `arch` field in
+  the catalog schema is additive — adding `arm64` later is a config
+  change, not a schema change.
+- **Tarball layout**: contents at the archive root (no top-level
+  versioned directory). `vmlinux` is required; `initrd.img` and
+  `modules/` are optional. Symlinks inside `modules/` are allowed but
+  must resolve within the archive.
+
+## Trust model
+
+The embedded `catalog.json` carries the SHA256 of each tarball. `banger
+kernel pull` rejects any download whose hash doesn't match. This protects
+against transport corruption and against an attacker swapping a tarball
+on R2 without also pushing a banger release.
+
+It does **not** protect against a compromise of the banger source repo
+itself — an attacker who can land a commit can change both the catalog
+SHA256 and the tarball. GPG/sigstore signing of the published catalog
+tarballs is deferred until banger is public and the threat model
+justifies the operational overhead.
+
+Upstream kernel sources *are* verified: `scripts/make-generic-kernel.sh`
+fetches the detached PGP signature alongside the tarball from
+kernel.org and rejects the build if gpg can't verify it against one
+of the three known release signing keys (Greg KH / Linus / Sasha
+Levin). So a compromised kernel.org mirror can't slip a backdoored
+tarball past a maintainer rebuilding the kernel locally.
+
+## Hosting
+
+Tarballs live in Cloudflare R2 (bucket `banger-kernels`), served at the
+custom domain `kernels.thaloco.com`. The bucket is publicly readable;
+writes require the `banger-kernels-publish` API token (kept locally,
+never committed). R2's free tier covers the expected traffic comfortably
+(zero egress fees, generous storage).
+
+If hosting ever moves, catalog entries can be migrated by reuploading the
+tarballs and editing the URLs in `catalog.json` — no other code changes
+required.
+
+## Tech debt
+
+- Kernel publishing is manual; there is no CI yet. `scripts/make-generic-kernel.sh`
+  plus `scripts/publish-kernel.sh` is fine while refreshes are
+  infrequent and maintainer-only. CI becomes relevant once banger
+  goes public.
+- `make lint-shell` runs at `--severity=error` only. Tightening to
+  `--severity=warning` is a nice-to-have but low priority.
--- a/docs/oci-import-internals.md
+++ b/docs/oci-import-internals.md
@ -0,0 +1,46 @@
+# OCI import — internals
+
+> **Advanced reading.** This document describes implementation details of the
+> OCI import pipeline. It is not needed for day-to-day use of
+> `banger image pull`. User-facing documentation is in
+> [`docs/oci-import.md`](oci-import.md).
+
+## Architecture
+
+`internal/imagepull/` owns the mechanics:
+
+- **`Pull`** wraps `go-containerregistry`'s `remote.Image` with the
+  `linux/amd64` platform pinned. Layer blobs cache under
+  `/var/cache/banger/oci/blobs/` (system install) or
+  `~/.cache/banger/oci/blobs/` (dev mode) and populate lazily during
+  flatten.
+- **`Flatten`** replays layers oldest-first into a staging directory,
+  applies whiteouts, rejects unsafe paths plus filenames that banger's
+  debugfs ownership fixup cannot encode safely. Returns a `Metadata`
+  map of per-file uid/gid/mode from tar headers.
+- **`BuildExt4`** runs `mkfs.ext4 -F -d <staging> -E root_owner=0:0`
+  at the size of the pre-truncated file — no mount, no sudo, no
+  loopback. Requires `e2fsprogs ≥ 1.43`.
+- **`ApplyOwnership`** streams a batched `set_inode_field` script to
+  `debugfs -w` to rewrite per-file uid/gid/mode to the captured tar-
+  header values.
+- **`InjectGuestAgents`** uses the same `debugfs` scripting to drop
+  banger's guest assets into the ext4 with root ownership:
+  vsock agent binary, network bootstrap + unit, first-boot script +
+  unit, `multi-user.target.wants` symlinks, vsock modules-load
+  config, `/var/lib/banger/first-boot-pending` marker.
+
+`internal/daemon/images_pull.go` orchestrates `pullFromOCI`:
+
+1. Parse + validate the OCI ref, derive a default name when `--name`
+   is omitted (`debian-bookworm` from
+   `docker.io/library/debian:bookworm`).
+2. Resolve kernel info via `resolveKernelInputs` (auto-pulls from
+   `kernelcat` if `--kernel-ref` names a catalog entry that isn't
+   yet local).
+3. Stage at `<ImagesDir>/<id>.staging`; extract layers to a temp
+   tree under `$TMPDIR`.
+4. `BuildExt4` → `ApplyOwnership` → `InjectGuestAgents`.
+5. `imagemgr.StageBootArtifacts` stages the kernel triple alongside.
+6. Atomic `os.Rename` publishes the artifact dir.
+7. Persist a `model.Image{Managed: true, …}` record.
--- a/docs/oci-import.md
+++ b/docs/oci-import.md
@ -0,0 +1,135 @@
+# OCI import (`banger image pull`)
+
+`banger image pull` has two paths. The primary one — catalog bundle —
+is documented in [`docs/image-catalog.md`](image-catalog.md). This
+doc covers the fallthrough: OCI-registry pull for arbitrary container
+images.
+
+## When to use it
+
+Use the OCI path when you need a distro or image that isn't in the
+catalog. The catalog covers the common happy path
+(`debian-bookworm`); anything else (`alpine`, `fedora`, `ubuntu`,
+custom corporate images) goes through OCI pull.
+
+```bash
+banger image pull docker.io/library/alpine:3.20 --kernel-ref generic-6.12
+banger image pull ghcr.io/myorg/devimg:v2        --kernel-ref generic-6.12
+```
+
+`banger image pull` dispatches based on the reference:
+
+- `banger image pull debian-bookworm` → catalog (fast path).
+- `banger image pull docker.io/library/foo:bar` → OCI (anything not
+  in the catalog).
+
+## What works
+
+- Any public OCI image that exposes a `linux/amd64` manifest.
+- Correct layer replay with whiteout semantics (`.wh.*` deletes,
+  `.wh..wh..opq` opaque-dir markers).
+- Path-traversal, debugfs-hostile filename, and relative-symlink-escape protection.
+- Content-aware default sizing (`content × 1.5`, floor 1 GiB).
+- Layer caching on disk, keyed by blob sha256.
+- **Ownership preservation** — tar-header uid/gid/mode captured
+  during flatten, applied to the ext4 via a `debugfs` pass, so
+  setuid binaries (`sudo`, `passwd`) and root-owned config
+  (`/etc/shadow`, `/etc/sudoers`) end up correctly owned.
+- **Pre-injected banger agents** — the pulled ext4 ships with
+  `banger-vsock-agent`, `banger-network.service`, and the
+  `banger-first-boot` unit already enabled.
+- **First-boot sshd install** — a one-shot systemd service installs
+  `openssh-server` via the guest's package manager on first boot.
+  Dispatches on `/etc/os-release` → `apt-get` / `apk` / `dnf` /
+  `pacman` / `zypper`. Subsequent boots skip the install.
+
+## What doesn't yet work
+
+- **Private registries**. Anonymous pulls only. Docker Hub, GHCR
+  (public), quay.io (public) all work. Adding auth via
+  `authn.DefaultKeychain` (from `go-containerregistry`) is a cheap
+  follow-up when someone needs it.
+- **Non-`linux/amd64`**. The kernel catalog is x86_64-only, so pulled
+  rootfses match. `arm64` is additive in the schema.
+- **Non-systemd rootfses**. The injected units assume systemd as
+  PID 1. Alpine ≥3.20 ships systemd; older alpine + void + busybox-
+  init images won't honour the banger-* units.
+- **First boot needs network access**. The first-boot sshd install
+  reaches out to the distro's package repo. VMs without NAT or
+  without the bridge reaching the internet time out. The marker file
+  stays in place so a later restart retries.
+
+## Architecture
+
+> Implementation details live in [`docs/oci-import-internals.md`](oci-import-internals.md).
+
+## Guest-side boot sequence
+
+On first boot of a pulled image:
+
+1. **`banger-network.service`** — brings the guest interface up with
+   the IP assigned by banger's VM-create lifecycle.
+2. **`banger-first-boot.service`** (first boot only) — reads
+   `/etc/os-release`, dispatches to the native package manager,
+   installs `openssh-server`, enables `ssh.service`.
+3. **`banger-vsock-agent.service`** — the health-check daemon banger
+   uses to confirm the VM is alive.
+
+Subsequent boots skip step 2.
+
+## Adding distro support to first-boot
+
+`internal/imagepull/assets/first-boot.sh` is the POSIX-sh dispatch.
+Add a new `ID=` branch and its install command, then rebuild banger
+(the asset is `go:embed`-ed).
+
+Supported `ID` values today: `debian`, `ubuntu`, `kali`, `raspbian`,
+`linuxmint`, `pop`, `alpine`, `fedora`, `rhel`, `centos`, `rocky`,
+`almalinux`, `arch`, `archlinux`, `manjaro`, `opensuse*`, `suse`.
+Unknown distros fall back to `ID_LIKE`, then error cleanly.
+
+## Paths
+
+Paths below assume the system install (`banger system install`). When
+running `bangerd` directly without the helper, the same files live
+under `~/.cache/banger/` and `~/.local/state/banger/` instead.
+
+| What | Where |
+|------|-------|
+| Layer blob cache | `/var/cache/banger/oci/blobs/sha256/<hex>` |
+| Staging dir | `/var/lib/banger/images/<id>.staging/` |
+| Extraction scratch | `$TMPDIR/banger-pull-<rand>/` |
+| Published image | `/var/lib/banger/images/<id>/rootfs.ext4` |
+
+## Cache lifecycle
+
+OCI layer blobs accumulate as you pull images. Banger flattens every
+pull into a self-contained ext4, so the cache is purely a re-pull
+avoidance — losing it only costs network round-trips on the next
+pull of the same image. Reclaim disk with:
+
+```
+banger image cache prune --dry-run   # report size only
+banger image cache prune             # remove every cached blob
+```
+
+Run with the daemon idle; an in-flight pull racing against prune may
+fail and need a retry.
+
+## Tech debt
+
+- **Auth**. When we add private-registry support, the natural path
+  is `authn.DefaultKeychain`, which honours `~/.docker/config.json`
+  and the standard credential helpers.
+- **Non-systemd rootfses**. The guest agents assume systemd. Adding
+  openrc / s6 / busybox-init variants means keeping parallel unit
+  trees keyed on `/etc/os-release`.
+
+## Trust model
+
+`image pull` (OCI path) delegates trust to the registry the user
+selected. `go-containerregistry` verifies layer digests against the
+manifest during download, so a tampered mirror can't ship modified
+layers without breaking the sha256 chain. Banger does not verify OCI
+image signatures (cosign/sigstore) — users who care should verify
+references out-of-band.
--- a/docs/privileges.md
+++ b/docs/privileges.md
@ -0,0 +1,379 @@
+# Privileges
+
+This document describes exactly what banger does with the privileges it
+asks for, what runs where, and how to undo it. The aim is to give a
+reader enough information to grant — or refuse — the privileges with
+their eyes open.
+
+## Two services, two trust boundaries
+
+`banger system install` lays down two systemd units:
+
+| Unit | User | Socket | Purpose |
+|---|---|---|---|
+| `bangerd.service` | owner user (chosen at install) | `/run/banger/bangerd.sock` (0600, owner) | Orchestration: VM/image lifecycle, store, RPC to the CLI. |
+| `bangerd-root.service` | `root` | `/run/banger-root/bangerd-root.sock` (0600, owner; root-owned dir at 0711) | Narrow root helper: bridge/tap, DM snapshots, NAT, Firecracker launch. |
+
+The owner daemon does all the business logic. It never runs as root.
+The root helper runs as root but only accepts a fixed list of operations
+and rejects every input that isn't a banger-managed path or name.
+
+The CLI (`banger ...`) talks to the owner daemon. The owner daemon
+talks to the root helper for the handful of things only root can do.
+Users and CI scripts never call the root helper directly.
+
+### Why two daemons
+
+Before this split the owner daemon shelled `sudo` for every device or
+network operation. That meant the user's `sudo` config gated daily
+work, and an attacker who compromised the owner daemon inherited
+arbitrary `sudo` reach. After the split, the owner daemon has no
+ambient root. The only way for it to make a privileged change is to
+ask the helper, and the helper only honours requests that fit a
+specific shape.
+
+## Authentication
+
+The root helper:
+
+- Listens on a Unix socket at `/run/banger-root/bangerd-root.sock`,
+  mode 0600, owned by the registered owner UID, in a root-owned
+  runtime dir at 0711.
+- Reads `SO_PEERCRED` on every accepted connection and rejects any
+  caller whose UID is not 0 or the owner UID recorded in
+  `/etc/banger/install.toml`. The match is by UID, not username.
+- Decodes one JSON request per connection and dispatches it through a
+  named-method switch. Unknown methods return `unknown_method`.
+
+The owner daemon:
+
+- Listens on `/run/banger/bangerd.sock`, mode 0600, owned by the
+  install-time owner user. Other host users cannot connect.
+- Reads `SO_PEERCRED` on every accepted connection and rejects any
+  caller whose UID is not 0 or the install-time owner UID. The
+  filesystem perms already gate access; the peer-cred read is
+  belt-and-braces in case the socket FD is ever leaked to a
+  non-owner process.
+- Resolves the helper socket path from the install metadata and
+  retries with backoff if the helper hasn't started yet.
+
+There is no network listener. Every banger control surface is a Unix
+socket on the local host.
+
+## What the root helper will do, exactly
+
+The helper exposes a fixed list of RPC methods (see
+`internal/roothelper/roothelper.go` for the canonical set). Each is
+shaped so the owner daemon can name a banger-managed object but
+cannot pass an arbitrary host path or interface name. Every input
+that names a path, device, PID, or interface is checked against a
+validator before the helper touches the host.
+
+| Method | Effect | Validation gate |
+|---|---|---|
+| `priv.ensure_bridge` | Create the configured Linux bridge if missing; assign the bridge IP. | Bridge name must equal `br-fc` or start with `br-fc-` (so a compromised daemon can't drive `ip link` against `eth0` / `docker0` / `lo`). Bridge IP must parse as IPv4. CIDR prefix must be a number in `[8, 32]`. |
+| `priv.create_tap` | `ip link add tap NAME tuntap` and add to bridge, owned by the owner user. | Tap name must match `tap-fc-*` or `tap-pool-*`. Bridge config (name + IP + CIDR) passes the same banger-managed check as `priv.ensure_bridge`, otherwise the new tap could be `master`-attached to an arbitrary host iface. |
+| `priv.delete_tap` | `ip link del NAME`. | Same prefix check on the tap name. |
+| `priv.sync_resolver_routing` | `resolvectl dns/domain/default-route` on the configured bridge. | Bridge name must equal `br-fc` or start with `br-fc-` (same banger-managed check). Resolver address must parse via `net.ParseIP`. |
+| `priv.clear_resolver_routing` | `resolvectl revert` on the bridge. | Same banger-managed bridge-name check. |
+| `priv.ensure_nat` | `iptables -t nat MASQUERADE` for `(guest_ip, tap)` plus matching FORWARD rules; `enable=false` removes them. | Tap must be banger-prefixed. Guest IP must parse as IPv4. |
+| `priv.create_dm_snapshot` | Create a `dmsetup` device-mapper snapshot from `rootfs.ext4` with COW backing file. | Both paths must be inside `/var/lib/banger`; DM name must start with `fc-rootfs-`. |
+| `priv.cleanup_dm_snapshot` | `dmsetup remove` and `losetup -d` for a snapshot the helper itself just created. | Every non-empty `dmsnap.Handles` field is checked: DM name `fc-rootfs-*`, DM device `/dev/mapper/fc-rootfs-*`, loops `/dev/loopN`. |
+| `priv.remove_dm_snapshot` | `dmsetup remove` by target. | Target must be either a `fc-rootfs-*` name or a `/dev/mapper/fc-rootfs-*` path. |
+| `priv.fsck_snapshot` | `e2fsck -fy` against the DM device. | DM device path must match `/dev/mapper/fc-rootfs-*`. Exit 1 (filesystem cleaned) is tolerated. |
+| `priv.read_ext4_file` | Read a file from inside an ext4 image via `debugfs cat`. | Image path must be inside `/var/lib/banger` or a managed DM device. Guest path is rejected if it contains debugfs-hostile chars (`"`/`\`/newline). |
+| `priv.write_ext4_files` | Batch write files into an ext4 image, root:root, mode-controlled. | Same image-path validator. |
+| `priv.resolve_firecracker_binary` | Stat and return the firecracker binary path. | Path is opened with `O_PATH \| O_NOFOLLOW` (refusing symlinks) and Fstat'd through the resulting fd: must be a regular file, executable, root-owned, not group/world-writable. |
+| `priv.launch_firecracker` | Start the firecracker process for a VM (jailer-wrapped). | Socket and vsock paths must be inside `/run/banger`. Log/metrics/kernel/initrd paths must be inside `/var/lib/banger`. Tap name must be banger-prefixed. Drives must be inside the state dir or be a `/dev/mapper/fc-rootfs-*` device. Jailer chroot base must be inside the system state/runtime dirs; jailer UID/GID must equal the registered owner. Binary must pass the same root-owned-executable check. |
+| `priv.ensure_socket_access` | `chown` and `chmod 0600` on a firecracker API or vsock socket so the owner user can talk to it. | Path must be inside `/run/banger` and not a symlink. The helper opens it with `O_PATH \| O_NOFOLLOW`, refuses anything that isn't a unix socket, and chmod/chown via the resulting fd (no symlink-follow). The local-priv fallback uses `chown -h`. |
+| `priv.cleanup_jailer_chroot` | Detach every mount under the per-VM jailer chroot via direct `umount2(MNT_DETACH \| UMOUNT_NOFOLLOW)` syscalls (deepest-first), then `rm -rf` the tree. | Path must be inside the system state/runtime dirs and not a symlink — including no symlinks at intermediate components (resolved with `EvalSymlinks` and re-checked). `UMOUNT_NOFOLLOW` makes the unmounts symlink-safe even if a path is swapped after validation. A `findmnt` guard refuses to `rm -rf` if any mount remains underneath. |
+| `priv.find_firecracker_pid` | Resolve a firecracker PID by API socket path. | Filters to processes whose cmdline mentions the requested API socket. |
+| `priv.kill_process` / `priv.signal_process` | Send SIGKILL or a named signal to a PID. | PID must refer to a running process whose `/proc/<pid>/cmdline` mentions `firecracker`. |
+| `priv.process_running` | Check whether a PID is alive (no host mutation). | Read-only; same cmdline filter. |
+
+Anything outside this list returns `unknown_method` and is logged. The
+helper does not run a shell, does not exec helper scripts, and does
+not accept commands as strings.
+
+## Filesystem mutations
+
+Path used | Owner | What is created or changed
+---|---|---
+`/etc/banger/install.toml` | root, 0644 | Written once by `banger system install`. Holds owner UID/GID/home, install timestamp, version. Read by both daemons at startup.
+`/etc/systemd/system/bangerd.service` | root, 0644 | Owner-daemon unit. Contents are deterministic; see below.
+`/etc/systemd/system/bangerd-root.service` | root, 0644 | Root-helper unit.
+`/usr/local/bin/banger` | root, 0755 | Copy of the build output.
+`/usr/local/bin/bangerd` | root, 0755 | Same binary, second name.
+`/usr/local/lib/banger/banger-vsock-agent` | root, 0755 | Companion agent injected into guests at image-pull time.
+`/var/lib/banger/...` | owner (via systemd `StateDirectory=banger`), 0700 | Image artifacts, VM dirs, work disks, kernels, OCI cache, SSH key + known_hosts.
+`/var/cache/banger/...` | owner, 0700 | Bundle and OCI download cache.
+`/run/banger/...` | owner, 0700 | Owner daemon socket and per-VM firecracker API + vsock sockets.
+`/run/banger-root/...` | root, 0711 | Root-helper socket dir; the socket itself is 0600.
+`~/.config/banger/config.toml` | owner | Optional user config. Read by the owner daemon at startup.
+
+Outside these directories, banger does not write to the host filesystem
+during normal operation. The two exceptions are file-sync (the user
+explicitly opts in to copying paths from their home into a guest, which
+the owner daemon validates is inside the owner home before reading)
+and the install/uninstall actions above.
+
+### Why the owner home is locked down
+
+The `[[file_sync]]` config lets users mirror host files into guests.
+banger refuses to follow paths that escape the owner home, including
+through symlinks:
+
+- `ResolveFileSyncHostPath` (`internal/config/config.go`) expands a
+  leading `~/` and rejects any candidate that resolves outside the
+  configured `OwnerHomeDir`.
+- `ResolveExistingFileSyncHostPath` re-checks after `EvalSymlinks` so
+  a symlink inside `~/.aws` that points at `/etc/shadow` cannot leak
+  out.
+
+This means an installed banger never reads outside the owner home in
+the file-sync path, even if the owner edits config to try.
+
+## Network mutations
+
+For each running VM banger creates:
+
+- One bridge (default `br-fc`, configurable). Created on first VM
+  start, never deleted automatically.
+- One tap interface named `tap-fc-<vm_id>`. Created on VM start,
+  deleted on VM stop or crash recovery.
+- One iptables MASQUERADE rule per VM, only when `--nat` was passed.
+  Removed by the symmetric `EnsureNAT(enable=false)` call at stop.
+- Optionally, `resolvectl` routing entries that send `*.vm` lookups to
+  banger's in-process DNS server on the bridge. Reverted at stop.
+
+Banger does not touch UFW, firewalld, or other rule managers. It only
+edits the iptables tables it created the rules in.
+
+## Cleanup and uninstall
+
+Per-VM cleanup happens at:
+
+- `banger vm stop <name>` — stops firecracker, removes the per-VM tap,
+  drops the NAT rule, removes the DM snapshot, removes per-VM
+  sockets, leaves the work disk.
+- `banger vm delete <name>` — same as stop, plus deletes the per-VM
+  state directory under `/var/lib/banger/vms/<id>` (work disk,
+  metadata).
+- `banger vm prune` — bulk version.
+- Crash recovery: on daemon start, `reconcile` runs the same teardown
+  for any VM whose firecracker process is no longer alive.
+
+System-level uninstall:
+
+```
+sudo banger system uninstall          # remove services, units, binaries
+sudo banger system uninstall --purge  # also remove /var/lib/banger,
+                                      # /var/cache/banger, /run/banger
+```
+
+Without `--purge`, the state dirs survive so a reinstall can pick up
+where the previous one left off. With `--purge`, banger leaves no
+files behind under `/var/lib`, `/var/cache`, or `/run`.
+
+What `uninstall` does, in order:
+
+1. `systemctl disable --now bangerd.service bangerd-root.service`.
+2. Remove `/etc/systemd/system/bangerd.service` and `bangerd-root.service`.
+3. Remove `/etc/banger/install.toml` and `/etc/banger/`.
+4. `systemctl daemon-reload`.
+5. Remove `/usr/local/bin/banger`, `/usr/local/bin/bangerd`,
+   `/usr/local/lib/banger/`.
+6. With `--purge` only: remove the system state, cache, and runtime
+   dirs.
+
+What `uninstall` does NOT do automatically:
+
+- It does not delete the bridge or any iptables rules. Stop your VMs
+  first (`banger vm prune` or `banger vm stop <name>` for each VM) so
+  the per-VM teardown drops them. The bridge itself is intentionally
+  persistent — a future reinstall reuses it. To remove it manually:
+  `sudo ip link del br-fc`.
+- It does not undo `resolvectl` routing on a bridge that no longer
+  exists; the entries are harmless if the bridge is gone.
+- It does not remove the owner user, the owner's home, or anything
+  the user wrote into a guest from inside the guest.
+
+## Updating banger
+
+`banger update` is a user-triggered, manually-invoked operation. It
+never runs in the background and never auto-checks for new releases.
+
+The flow:
+
+1. **Discover.** GET `https://releases.thaloco.com/banger/manifest.json`
+   over HTTPS. The URL is hardcoded in the binary at compile time —
+   a compromised daemon config can't redirect the updater. Manifest
+   schema_version gates forward compat: a CLI that doesn't recognise
+   the server's schema_version refuses to update.
+2. **In-flight gate.** `daemon.operations.list` RPC. If any operation
+   is not Done, refuse with the operation list. `--force` overrides.
+3. **Download.** Capped GET on the tarball + `SHA256SUMS` (≤ 256 MiB
+   and ≤ 16 KiB respectively). Tarball is sha256-verified on the fly
+   against the digest published in `SHA256SUMS`; partial files are
+   removed on any verification failure.
+4. **Cosign signature.** `SHA256SUMS.sig` is fetched (≤ 1 KiB) and
+   verified against the `BangerReleasePublicKey` embedded in the
+   running banger binary. The signature is an ECDSA P-256 / SHA-256
+   blob signature produced by `cosign sign-blob` — verified by Go's
+   stdlib `crypto/ecdsa.VerifyASN1`, no third-party crypto deps. A
+   missing signature URL or a verification failure aborts the update
+   before any binary is touched.
+5. **Sanity-run.** Staged `banger --version` must mention the
+   expected version; staged `bangerd --check-migrations --system`
+   must exit 0 (compatible) or 1 (will auto-migrate). Exit 2
+   (incompatible — DB has migrations the new binary doesn't know)
+   aborts the swap; the running install is untouched.
+6. **Swap.** Atomic `os.Rename` for each of the three binaries
+   (banger-vsock-agent → bangerd → banger), with `.previous` backups.
+7. **Restart.** `systemctl restart bangerd-root.service` then
+   `bangerd.service`. Wait for the new daemon socket to answer
+   `ping`. Running VMs survive the daemon restart — they're each
+   their own firecracker process and live in `bangerd-root.service`'s
+   cgroup; restart's `KillMode=control-group` doesn't reach them.
+   The new daemon's `reconcile` step re-attaches by reading the
+   per-VM `handles.json` scratch file and verifying the firecracker
+   process is still alive.
+8. **Verify.** Run `banger doctor` against the just-installed CLI.
+   FAIL triggers auto-rollback: restore `.previous` backups, restart
+   services again so the OLD binaries take over. The original error
+   bubbles to the operator; `--force` skips this step.
+9. **Finalise.** Update `/etc/banger/install.toml`'s Version /
+   Commit / BuiltAt. Remove `.previous` backups. Wipe the staging
+   directory under `/var/cache/banger/updates/`.
+
+What you're trusting in this flow:
+
+- The cosign **public key** baked into the binary you're updating
+  FROM. The maintainer rotates it by cutting a new release with a
+  new key embedded; from then on, only signatures made with the
+  new private key are accepted. v0.1.x predates a clean rotation
+  story.
+- TLS to `releases.thaloco.com` for transport. The cosign signature
+  is the actual integrity check; TLS just gets us the bytes faster.
+- The systemd unit owners (root for the helper, owner for the
+  daemon). `banger update` requires root because it writes
+  `/usr/local/bin` and talks to systemctl; it does NOT run via the
+  helper RPC interface.
+
+What `banger update` deliberately does NOT do:
+
+- No background check timers. Operators run `banger update --check`
+  on a schedule themselves if they want.
+- No update across MINOR boundaries without an explicit `--to`
+  flag. v0.x is pre-stable; we don't promise that v0.1.5 → v0.2.0
+  is automatic.
+- No state-DB downgrade. Schema migrations are forward-only;
+  `--check-migrations` refuses to swap a binary that's older than
+  the running schema.
+- No agent re-injection into existing VMs. The vsock agent inside
+  each VM is the version banger had at image-pull time, not the
+  current install. v0.1.x doesn't enforce or detect skew here; the
+  agent's HTTP API is small enough that compat across MINORs is
+  expected.
+
+## Running outside the system install
+
+Everything above describes the supported deployment: `banger system
+install` lays down both systemd units and the helper takes over every
+privileged operation.
+
+It is also possible to run `bangerd` directly without installing the
+helper — the binary still works as a per-user daemon and shells `sudo
+-n` for each privileged operation it would otherwise hand off
+(`iptables`, `ip`, `mount`, `mknod`, `dmsetup`, `e2fsck`, `kill`,
+`chown -h`, `chmod`, `losetup`, `chown`, `chmod`, `firecracker`).
+This mode is intended for ad-hoc developer machines while iterating on
+banger itself.
+
+It carries a different trust model:
+
+- It needs `NOPASSWD` sudoers entries for the developer (otherwise
+  every VM action prompts for a password).
+- Once those entries exist, **any** process running as the developer
+  can invoke those commands with arbitrary arguments — banger's input
+  validators only constrain what banger itself sends. They are no
+  defence against a different program on the same account.
+- The helper's `SO_PEERCRED` boundary, the systemd hardening
+  (`NoNewPrivileges`, `ProtectSystem=strict`, the narrow
+  `CapabilityBoundingSet`), and the helper's own input validators are
+  all bypassed.
+
+If you care about isolating banger's blast radius from anything else
+running as your user, use the system install. If you only need
+banger to work on your own dev box, the non-system mode is fine —
+just don't run it on a shared or production host.
+
+## Hardening of the systemd units
+
+The two units ship with restrictive defaults; they are written by
+banger at install time and the contents are deterministic.
+
+Owner daemon (`bangerd.service`):
+
+- `User=` is the install-time owner; never `root`.
+- `NoNewPrivileges=yes`.
+- `ProtectSystem=strict` — system directories are read-only.
+- `ProtectHome=read-only` — owner home is read-only to the daemon
+  unit. The daemon writes only to `StateDirectory`, `CacheDirectory`,
+  `RuntimeDirectory`, plus owner config that the user edits.
+- `ProtectControlGroups`, `ProtectKernelLogs`, `ProtectKernelModules`,
+  `ProtectClock`, `ProtectHostname`, `RestrictSUIDSGID`,
+  `LockPersonality`.
+- `RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6 AF_NETLINK AF_VSOCK`.
+- No `AmbientCapabilities`.
+
+Root helper (`bangerd-root.service`):
+
+- Same hardening as above, plus `ProtectHome=yes` (no host-home
+  visibility at all from the helper).
+- `CapabilityBoundingSet=CAP_CHOWN CAP_DAC_OVERRIDE CAP_FOWNER CAP_KILL CAP_MKNOD CAP_NET_ADMIN CAP_NET_RAW CAP_SETGID CAP_SETUID CAP_SYS_ADMIN CAP_SYS_CHROOT`.
+  Only the capabilities required for tap/bridge, iptables, dmsetup,
+  loop devices, ownership fixups, device node creation, and Firecracker
+  process management. No `CAP_SYS_BOOT`, no `CAP_SYS_PTRACE`,
+  no `CAP_SYS_MODULE`, no `CAP_NET_BIND_SERVICE`.
+- `ReadWritePaths=/var/lib/banger`.
+
+## What this leaves you trusting
+
+If you install banger as root, you are trusting:
+
+1. The two binaries banger drops under `/usr/local/bin` and the
+   companion agent under `/usr/local/lib/banger`. These should match
+   the build artifacts you reviewed.
+2. The path/identifier validators in
+   `internal/roothelper/roothelper.go` to be tight: `validateManagedPath`,
+   `validateTapName`, `validateDMName`, `validateDMDevicePath`,
+   `validateLoopDevicePath`, `validateDMRemoveTarget`,
+   `validateDMSnapshotHandles`, `validateRootExecutable`,
+   `validateNotSymlink`, `validateExt4ImagePath`,
+   `validateLinuxIfaceName`, `validateBangerBridgeName`,
+   `validateNetworkConfig`, `validateCIDRPrefix`, `validateIPv4`,
+   `validateResolverAddr`, `validateSignalName`, and
+   `validateFirecrackerPID`. If any of these are bypassed, the helper
+   would carry out a privileged op against an unmanaged target. They
+   are unit-tested in `internal/roothelper/roothelper_test.go`.
+3. The Firecracker binary banger executes. The helper refuses to launch
+   anything that isn't a regular, executable, root-owned, not
+   world-writable file — but the binary's own behaviour is your
+   responsibility.
+4. Your own owner-user account. The owner can ask the helper to
+   create taps, run firecracker, and edit ext4 images under
+   `/var/lib/banger`. Anyone with the owner's UID can do those
+   things; treat that account as semi-privileged.
+
+What you do **not** have to trust:
+
+- The CLI process. It only talks Unix-socket RPC.
+- Other host users. The helper socket is 0600 root and the owner
+  socket is 0700 owner.
+- The contents of the user's home, except the file paths that
+  `[[file_sync]]` explicitly names — and even those are clamped to
+  the owner home.
+- The guest. Guests cannot reach the helper or the owner daemon; the
+  only host endpoint a guest sees is the in-process DNS server on the
+  bridge IP and the bridge itself for outbound NAT.
--- a/docs/reference/firecracker-api.yaml
+++ b/docs/reference/firecracker-api.yaml
--- a/docs/release-process.md
+++ b/docs/release-process.md
@ -0,0 +1,189 @@
+# Release process
+
+Maintainer-facing runbook for cutting and publishing a new banger
+release. End users don't need any of this — they pick up new releases
+through `banger update` or the curl-piped `install.sh`.
+
+## What ships in a release
+
+Each release publishes four objects to the R2 bucket served at
+`https://releases.thaloco.com/banger/`:
+
+| Object | Path | Notes |
+|---|---|---|
+| Tarball | `<version>/banger-<version>-linux-amd64.tar.gz` | `banger`, `bangerd`, `banger-vsock-agent` at the root, no subdirs |
+| Hashes | `<version>/SHA256SUMS` | One line for the tarball, GNU `sha256sum` format |
+| Signature | `<version>/SHA256SUMS.sig` | base64-encoded ASN.1 ECDSA cosign-blob signature over `SHA256SUMS` |
+| Manifest | `manifest.json` (bucket root) | Describes every published release; `latest_stable` points at the most recent |
+
+`install.sh` lives at the bucket root too (unversioned) so the
+`curl … | bash` URL stays stable across releases.
+
+## Trust model recap
+
+Every release is cosign-signed. The public key is pinned in two places
+that MUST stay in sync:
+
+- `internal/updater/verify_signature.go` — `BangerReleasePublicKey`
+  used by `banger update`.
+- `scripts/install.sh` — embedded copy used by the curl-piped installer
+  before any banger binary is on disk.
+
+`scripts/publish-banger-release.sh` aborts the upload if the two copies
+diverge — that's the only mechanism keeping them coupled, so don't
+edit either alone.
+
+The signed payload is `SHA256SUMS`, which in turn covers the tarball.
+Verification uses the Go standard library (`crypto/ecdsa.VerifyASN1`)
+on the update path and `openssl dgst -verify` on the install-script
+path. cosign is needed only for **signing**.
+
+## Pre-flight checklist
+
+Run these before tagging or publishing:
+
+1. **`make smoke`** — the full systemd-driven scenario suite must be
+   green. The smoke harness exercises the real install + update path
+   end to end; if it's red, do not cut.
+2. **CHANGELOG entry.** Add a `## [vX.Y.Z] - YYYY-MM-DD` section under
+   `## [Unreleased]` describing what changed. Use the
+   [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) sub-headings
+   (`### Added`, `### Fixed`, `### Notes`).
+3. **Bump the link table** at the bottom of `CHANGELOG.md`:
+   ```markdown
+   [Unreleased]: …/compare/vX.Y.Z...HEAD
+   [vX.Y.Z]: …/releases/tag/vX.Y.Z
+   ```
+4. **Note unit-file changes loudly** in the CHANGELOG entry. `banger
+   update` swaps binaries only — it does NOT rewrite
+   `/etc/systemd/system/bangerd*.service`. If this release changed
+   `renderSystemdUnit` / `renderRootHelperSystemdUnit`, the entry must
+   tell existing-install users to run `sudo banger system install`
+   once after updating to pick up the new units. v0.1.4 and v0.1.6
+   are reference examples.
+
+Commit the CHANGELOG change, push to `main`, and confirm CI is green.
+
+## Cutting the release
+
+Order matters: publish first, then tag.
+
+1. **Run the publish script:**
+
+   ```sh
+   scripts/publish-banger-release.sh vX.Y.Z
+   ```
+
+   The script:
+   - Builds `banger`, `bangerd`, `banger-vsock-agent` with `-ldflags`
+     baking the version, the current commit SHA, and a UTC build
+     timestamp into `internal/buildinfo`.
+   - Tarballs the three binaries (bare basenames at the tar root —
+     `internal/updater/StageTarball` rejects anything else).
+   - Computes `SHA256SUMS`, signs it with `cosign sign-blob` (no
+     transparency log, no bundle format — banger verifies the bare
+     ASN.1 DER signature directly).
+   - Verifies the signature against the public key extracted from
+     `internal/updater/verify_signature.go`, then diffs that against
+     the public key embedded in `scripts/install.sh`. Either failure
+     aborts before upload.
+   - Pulls the existing `manifest.json` from the bucket, appends the
+     new release entry, points `latest_stable` at it, and uploads
+     everything via rclone.
+   - Uploads `scripts/install.sh` to the bucket root so the curl-piped
+     installer stays current.
+
+2. **Tag and push:**
+
+   ```sh
+   git tag vX.Y.Z
+   git push --tags
+   ```
+
+   Tagging happens AFTER publishing so the tag only exists if the
+   release actually shipped.
+
+3. **Verify from a clean machine:**
+
+   ```sh
+   curl -fsSL https://releases.thaloco.com/banger/manifest.json | jq .latest_stable
+   curl -fsSL https://releases.thaloco.com/banger/install.sh | head -20
+   banger update --check       # on an existing install
+   ```
+
+## Verification releases
+
+If a release fixes anything in the update flow itself —
+`runUpdate` (`internal/cli/commands_update.go`), the systemd unit
+templates, or the helper/daemon restart sequencing — cut a follow-up
+no-op verification release immediately. The reason: `banger update`
+runs the OLD binary as the driver of the swap. A fix in vN can't be
+observed end-to-end on a vN-1 host updating to vN, because vN-1 is
+still in the driver seat. vN+1 with no functional changes lets a host
+on vN update to it and observe the fix live with vN as the driver.
+
+Examples in CHANGELOG.md: v0.1.3 follows v0.1.2's update-flow fix;
+v0.1.5 follows v0.1.4's daemon-restart fix.
+
+The verification-release CHANGELOG section is short and explicit:
+> No functional changes. Verification release for vN: …
+
+## Patch vs minor
+
+banger follows [SemVer](https://semver.org/spec/v2.0.0.html). For
+v0.1.x, the practical contract:
+
+- **Patch (v0.1.x):** bug fixes, internal refactors, anything that
+  doesn't change the exposed API/CLI behavior.
+- **Minor (v0.2.x):** any change to the **exposed API behavior or
+  contract**. The vsock guest-agent protocol is the canonical example —
+  a minor bump means existing VMs created against the older minor need
+  to be re-pulled. Other minor-trigger changes: removing a CLI flag,
+  changing a stable RPC method's request/response shape, breaking the
+  on-disk store schema in a non-forward-compatible way.
+
+If in doubt, prefer the higher bump. Patch releases that turn out to
+have broken a contract are the worst-of-both: users update without
+warning, then break.
+
+## Sibling catalogs
+
+Kernel and golden-image releases ship through the same gate. The
+`internal/kernelcat/catalog.json` and `internal/imagecat/catalog.json`
+manifests are `go:embed`-ed at build time, so a new entry only
+reaches users when banger itself is re-released. In practice:
+
+1. Run `scripts/publish-kernel.sh <name>` or
+   `scripts/publish-golden-image.sh …` to upload the artefact and
+   patch the appropriate `catalog.json` in the working tree.
+2. Commit the catalog change with whatever banger fix or feature it's
+   landing alongside.
+3. Cut a banger release the normal way; the new catalog entry ships
+   with the next `banger` binary.
+
+The kernel and image catalogs each have their own R2 bucket
+(`kernels.thaloco.com`, `images.thaloco.com`) so versioning of the
+artefacts is independent of banger's release cadence — but
+**discoverability** is gated by the banger release that embeds the
+catalog pointer.
+
+## When something goes wrong mid-release
+
+- **Signature verification fails locally** in
+  `publish-banger-release.sh`: confirm `internal/updater/verify_signature.go`
+  contains the same public key as `cosign.pub` in the repo root. If
+  the script reports drift between `verify_signature.go` and
+  `install.sh`, run `diff` between the two `BEGIN PUBLIC KEY` blocks
+  and resolve before rerunning.
+- **rclone upload fails partway through:** the script uploads tarball,
+  hashes, signature, and manifest in that order. Re-running is safe;
+  rclone will overwrite. Until the manifest is uploaded, no client
+  sees the new release — so a partial upload is invisible.
+- **Manifest already names the version** (re-cutting): the publish
+  script's `jq` filter dedupes by `version`, so re-running with the
+  same `vX.Y.Z` cleanly replaces the entry.
+- **Already tagged but the release is bad:** delete the tag locally
+  AND on the remote (`git push --delete origin vX.Y.Z`), revert the
+  CHANGELOG entry, fix the bug, and start the cycle over with a fresh
+  patch number. Do NOT re-use the version — installed clients have
+  already cached its `SHA256SUMS` against the manifest.
--- a/examples/void-exp.config.toml
+++ b/examples/void-exp.config.toml
@ -1,10 +0,0 @@
-# Experimental Void Linux guest profile for local testing.
-#
-# Copy the values you want into ~/.config/banger/config.toml and replace
-# /abs/path/to/banger with your checkout path. Do not set default_base_rootfs
-# to the Void image yet; banger image build still assumes the Debian flow.
-
-runtime_dir = "/abs/path/to/banger/runtime"
-default_image_name = "void-exp"
-default_rootfs = "/abs/path/to/banger/runtime/rootfs-void.ext4"
-default_work_seed = "/abs/path/to/banger/runtime/rootfs-void.work-seed.ext4"
--- a/go.mod
+++ b/go.mod
@ -3,17 +3,15 @@ module banger
 go 1.25.0

 require (
-	github.com/charmbracelet/bubbles v0.14.0
-	github.com/charmbracelet/bubbletea v0.21.1-0.20220623121936-ca32c4c62873
-	github.com/charmbracelet/lipgloss v0.5.1-0.20220407020210-a86f21a0ae43
 	github.com/firecracker-microvm/firecracker-go-sdk v1.0.0
-	github.com/mattn/go-isatty v0.0.20
+	github.com/google/go-containerregistry v0.21.5
+	github.com/klauspost/compress v1.18.5
 	github.com/miekg/dns v1.1.72
 	github.com/pelletier/go-toml v1.9.5
 	github.com/sirupsen/logrus v1.9.4
-	github.com/spf13/cobra v1.8.1
-	golang.org/x/crypto v0.46.0
-	golang.org/x/sys v0.39.0
+	github.com/spf13/cobra v1.10.2
+	golang.org/x/crypto v0.50.0
+	golang.org/x/sys v0.43.0
 	modernc.org/sqlite v1.38.2
 )

@ -21,11 +19,12 @@ require (
 	github.com/PuerkitoBio/purell v1.1.1 // indirect
 	github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
 	github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d // indirect
-	github.com/atotto/clipboard v0.1.4 // indirect
-	github.com/containerd/console v1.0.3 // indirect
 	github.com/containerd/fifo v1.0.0 // indirect
+	github.com/containerd/stargz-snapshotter/estargz v0.18.2 // indirect
 	github.com/containernetworking/cni v1.0.1 // indirect
 	github.com/containernetworking/plugins v1.0.1 // indirect
+	github.com/docker/cli v29.4.0+incompatible // indirect
+	github.com/docker/docker-credential-helpers v0.9.3 // indirect
 	github.com/dustin/go-humanize v1.0.1 // indirect
 	github.com/go-openapi/analysis v0.21.2 // indirect
 	github.com/go-openapi/errors v0.20.2 // indirect
@ -43,33 +42,30 @@ require (
 	github.com/hashicorp/go-multierror v1.1.1 // indirect
 	github.com/inconshreveable/mousetrap v1.1.0 // indirect
 	github.com/josharian/intern v1.0.0 // indirect
-	github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
 	github.com/mailru/easyjson v0.7.7 // indirect
-	github.com/mattn/go-runewidth v0.0.13 // indirect
+	github.com/mattn/go-isatty v0.0.20 // indirect
 	github.com/mdlayher/socket v0.2.0 // indirect
 	github.com/mdlayher/vsock v1.1.1 // indirect
+	github.com/mitchellh/go-homedir v1.1.0 // indirect
 	github.com/mitchellh/mapstructure v1.4.3 // indirect
-	github.com/muesli/ansi v0.0.0-20211018074035-2e021307bc4b // indirect
-	github.com/muesli/cancelreader v0.2.1 // indirect
-	github.com/muesli/reflow v0.3.0 // indirect
-	github.com/muesli/termenv v0.11.1-0.20220212125758-44cd13922739 // indirect
 	github.com/ncruces/go-strftime v0.1.9 // indirect
 	github.com/oklog/ulid v1.3.1 // indirect
+	github.com/opencontainers/go-digest v1.0.0 // indirect
+	github.com/opencontainers/image-spec v1.1.1 // indirect
 	github.com/opentracing/opentracing-go v1.2.0 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
 	github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
-	github.com/rivo/uniseg v0.2.0 // indirect
-	github.com/spf13/pflag v1.0.5 // indirect
+	github.com/spf13/pflag v1.0.10 // indirect
+	github.com/vbatts/tar-split v0.12.2 // indirect
 	github.com/vishvananda/netlink v1.1.1-0.20210330154013-f5de75959ad5 // indirect
 	github.com/vishvananda/netns v0.0.0-20210104183010-2eb08e3e575f // indirect
 	go.mongodb.org/mongo-driver v1.8.3 // indirect
 	golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect
-	golang.org/x/mod v0.31.0 // indirect
-	golang.org/x/net v0.48.0 // indirect
-	golang.org/x/sync v0.19.0 // indirect
-	golang.org/x/term v0.38.0 // indirect
-	golang.org/x/text v0.32.0 // indirect
-	golang.org/x/tools v0.40.0 // indirect
+	golang.org/x/mod v0.35.0 // indirect
+	golang.org/x/net v0.53.0 // indirect
+	golang.org/x/sync v0.20.0 // indirect
+	golang.org/x/text v0.36.0 // indirect
+	golang.org/x/tools v0.44.0 // indirect
 	gopkg.in/yaml.v2 v2.4.0 // indirect
 	modernc.org/libc v1.66.3 // indirect
 	modernc.org/mathutil v1.7.1 // indirect
--- a/go.sum
+++ b/go.sum
@ -72,8 +72,6 @@ github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod h1:l
 github.com/asaskevich/govalidator v0.0.0-20200907205600-7a23bdc65eef/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw=
 github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d h1:Byv0BzEl3/e6D5CLfI0j/7hiIEtvGVFPCZ7Ei2oq8iQ=
 github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw=
-github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4=
-github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI=
 github.com/aws/aws-sdk-go v1.15.11/go.mod h1:mFuSZ37Z9YOHbQEwBWztmVzqXrEkub65tZoCYDt7FT0=
 github.com/beorn7/perks v0.0.0-20160804104726-4c0e84591b9a/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
 github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
@ -93,15 +91,6 @@ github.com/bugsnag/panicwrap v0.0.0-20151223152923-e2c28503fcd0/go.mod h1:D/8v3k
 github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
 github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
 github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
-github.com/charmbracelet/bubbles v0.14.0 h1:DJfCwnARfWjZLvMglhSQzo76UZ2gucuHPy9jLWX45Og=
-github.com/charmbracelet/bubbles v0.14.0/go.mod h1:bbeTiXwPww4M031aGi8UK2HT9RDWoiNibae+1yCMtcc=
-github.com/charmbracelet/bubbletea v0.21.0/go.mod h1:GgmJMec61d08zXsOhqRC/AiOx4K4pmz+VIcRIm1FKr4=
-github.com/charmbracelet/bubbletea v0.21.1-0.20220623121936-ca32c4c62873 h1:ti/1QRoSzanYHPW4jLgIjCkfJ3beXh2h1nr6nEkWOig=
-github.com/charmbracelet/bubbletea v0.21.1-0.20220623121936-ca32c4c62873/go.mod h1:aoVIwlNlr5wbCB26KhxfrqAn0bMp4YpJcoOelbxApjs=
-github.com/charmbracelet/harmonica v0.2.0/go.mod h1:KSri/1RMQOZLbw7AHqgcBycp8pgJnQMYYT8QZRqZ1Ao=
-github.com/charmbracelet/lipgloss v0.5.0/go.mod h1:EZLha/HbzEt7cYqdFPovlqy5FZPj0xFhg5SaqxScmgs=
-github.com/charmbracelet/lipgloss v0.5.1-0.20220407020210-a86f21a0ae43 h1:xO5Bh21Ii+0p3EYp1GdFEF/Iax7VhBgMbBVCOFBZ2/Q=
-github.com/charmbracelet/lipgloss v0.5.1-0.20220407020210-a86f21a0ae43/go.mod h1:EZLha/HbzEt7cYqdFPovlqy5FZPj0xFhg5SaqxScmgs=
 github.com/checkpoint-restore/go-criu/v4 v4.1.0/go.mod h1:xUQBLp4RLc5zJtWY++yjOoMoB5lihDt7fai+75m+rGw=
 github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
 github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
@ -132,8 +121,6 @@ github.com/containerd/console v0.0.0-20181022165439-0650fd9eeb50/go.mod h1:Tj/on
 github.com/containerd/console v0.0.0-20191206165004-02ecf6a7291e/go.mod h1:8Pf4gM6VEbTNRIT26AyyU7hxdQU3MvAvxVI0sc00XBE=
 github.com/containerd/console v1.0.1/go.mod h1:XUsP6YE/mKtz6bxc+I8UiKKTP04qjQL4qcS3XoQ5xkw=
 github.com/containerd/console v1.0.2/go.mod h1:ytZPjGgY2oeTkAONYafi2kSj0aYggsf8acV1PGKCbzQ=
-github.com/containerd/console v1.0.3 h1:lIr7SlA5PxZyMV30bDW0MGbiOPXwc63yRuCP0ARubLw=
-github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U=
 github.com/containerd/containerd v1.2.10/go.mod h1:bC6axHOhabU15QhwfG7w5PipXdVtMXFTttgp+kVtyUA=
 github.com/containerd/containerd v1.3.0-beta.2.0.20190828155532-0293cbd26c69/go.mod h1:bC6axHOhabU15QhwfG7w5PipXdVtMXFTttgp+kVtyUA=
 github.com/containerd/containerd v1.3.0/go.mod h1:bC6axHOhabU15QhwfG7w5PipXdVtMXFTttgp+kVtyUA=
@ -175,6 +162,8 @@ github.com/containerd/imgcrypt v1.1.1/go.mod h1:xpLnwiQmEUJPvQoAapeb2SNCxz7Xr6PJ
 github.com/containerd/nri v0.0.0-20201007170849-eb1350a75164/go.mod h1:+2wGSDGFYfE5+So4M5syatU0N0f0LbWpuqyMi4/BE8c=
 github.com/containerd/nri v0.0.0-20210316161719-dbaa18c31c14/go.mod h1:lmxnXF6oMkbqs39FiCt1s0R2HSMhcLel9vNL3m4AaeY=
 github.com/containerd/nri v0.1.0/go.mod h1:lmxnXF6oMkbqs39FiCt1s0R2HSMhcLel9vNL3m4AaeY=
+github.com/containerd/stargz-snapshotter/estargz v0.18.2 h1:yXkZFYIzz3eoLwlTUZKz2iQ4MrckBxJjkmD16ynUTrw=
+github.com/containerd/stargz-snapshotter/estargz v0.18.2/go.mod h1:XyVU5tcJ3PRpkA9XS2T5us6Eg35yM0214Y+wvrZTBrY=
 github.com/containerd/ttrpc v0.0.0-20190828154514-0e0f228740de/go.mod h1:PvCDdDGpgqzQIzDW1TphrGLssLDZp2GuS+X5DkEJB8o=
 github.com/containerd/ttrpc v0.0.0-20190828172938-92c8520ef9f8/go.mod h1:PvCDdDGpgqzQIzDW1TphrGLssLDZp2GuS+X5DkEJB8o=
 github.com/containerd/ttrpc v0.0.0-20191028202541-4f1b8fe65a5c/go.mod h1:LPm1u0xBw8r8NOKoOdNMeVHSawSsltak+Ihv+etqsE8=
@ -219,7 +208,7 @@ github.com/coreos/pkg v0.0.0-20160727233714-3ac0863d7acf/go.mod h1:E3G3o1h8I7cfc
 github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA=
 github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
 github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
-github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
+github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
 github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY=
 github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
 github.com/cyphar/filepath-securejoin v0.2.2/go.mod h1:FpkQEhXnPnOthhzymB7CGsFk2G9VLXONKD9G7QGMM+4=
@ -235,9 +224,13 @@ github.com/dgrijalva/jwt-go v0.0.0-20170104182250-a601269ab70c/go.mod h1:E3ru+11
 github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
 github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no=
 github.com/dnaeon/go-vcr v1.0.1/go.mod h1:aBB1+wY4s93YsC3HHjMBMrwTj2R9FHDzUr9KyGc8n1E=
+github.com/docker/cli v29.4.0+incompatible h1:+IjXULMetlvWJiuSI0Nbor36lcJ5BTcVpUmB21KBoVM=
+github.com/docker/cli v29.4.0+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8=
 github.com/docker/distribution v0.0.0-20190905152932-14b96e55d84c/go.mod h1:0+TTO4EOBfRPhZXAeF1Vu+W3hHZ8eLp8PgKVZlcvtFY=
 github.com/docker/distribution v2.7.1-0.20190205005809-0d3efadf0154+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=
 github.com/docker/distribution v2.7.1+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=
+github.com/docker/docker-credential-helpers v0.9.3 h1:gAm/VtF9wgqJMoxzT3Gj5p4AqIjCBS4wrsOh9yRqcz8=
+github.com/docker/docker-credential-helpers v0.9.3/go.mod h1:x+4Gbw9aGmChi3qTLZj8Dfn0TD20M/fuWy0E5+WDeCo=
 github.com/docker/go-events v0.0.0-20170721190031-9461782956ad/go.mod h1:Uw6UezgYA44ePAFQYUehOuCzmy5zmg/+nl2ZfMWGkpA=
 github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c/go.mod h1:Uw6UezgYA44ePAFQYUehOuCzmy5zmg/+nl2ZfMWGkpA=
 github.com/docker/go-metrics v0.0.0-20180209012529-399ea8c73916/go.mod h1:/u0gXw0Gay3ceNrsHubL3BtdOL2fHf93USgMTe0W5dI=
@ -397,8 +390,10 @@ github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
 github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE=
-github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
-github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
+github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
+github.com/google/go-containerregistry v0.21.5 h1:KTJG9Pn/jC0VdZR6ctV3/jcN+q6/Iqlx0sTVz3ywZlM=
+github.com/google/go-containerregistry v0.21.5/go.mod h1:ySvMuiWg+dOsRW0Hw8GYwfMwBlNRTmpYBFJPlkco5zU=
 github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 github.com/google/gofuzz v1.1.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
@ -475,6 +470,8 @@ github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+o
 github.com/klauspost/compress v1.11.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
 github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
 github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
+github.com/klauspost/compress v1.18.5 h1:/h1gH5Ce+VWNLSWqPzOVn6XBO+vJbCNGvjoaGBFW2IE=
+github.com/klauspost/compress v1.18.5/go.mod h1:cwPg85FWrGar70rWktvGQj8/hthj3wpl0PGDogxkrSQ=
 github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
 github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
 github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
@ -487,9 +484,6 @@ github.com/kr/pty v1.1.5/go.mod h1:9r2w37qlBe7rQ6e1fg1S/9xpWHSnaqNdHD3WcMdbPDA=
 github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
 github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
-github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
-github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
-github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
 github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
 github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
 github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
@ -502,14 +496,9 @@ github.com/markbates/safe v1.0.1/go.mod h1:nAqgmRi7cY2nqMc92/bSEeQA+R4OheNU2T1kN
 github.com/marstr/guid v1.1.0/go.mod h1:74gB1z2wpxxInTG6yaqA7KrtM0NZ+RbrcqDvYHefzho=
 github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
 github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=
-github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
 github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
 github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
 github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
-github.com/mattn/go-runewidth v0.0.10/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
-github.com/mattn/go-runewidth v0.0.12/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
-github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU=
-github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
 github.com/mattn/go-shellwords v1.0.3/go.mod h1:3xCvwCdWdlDJUrvuMn7Wuy9eWs4pE8vqg+NOMyg4B2o=
 github.com/mattn/go-shellwords v1.0.12/go.mod h1:EZzvwXDESEeg03EKmM+RmDnNOPKG4lLtQsUlTZDWQ8Y=
 github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
@ -522,6 +511,7 @@ github.com/miekg/dns v1.1.72 h1:vhmr+TF2A3tuoGNkLDFK9zi36F2LS+hKTRW0Uf8kbzI=
 github.com/miekg/dns v1.1.72/go.mod h1:+EuEPhdHOsfk6Wk5TT2CzssZdqkmFhf8r+aVyDEToIs=
 github.com/miekg/pkcs11 v1.0.3/go.mod h1:XsNlhZGX73bx86s2hdc/FuaLm2CPZJemRLMA+WTFxgs=
 github.com/mistifyio/go-zfs v2.1.2-0.20190413222219-f784269be439+incompatible/go.mod h1:8AuVvqP/mXw1px98n46wfvcGfQ4ci2FwoAjKYxuo3Z4=
+github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
 github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
 github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
 github.com/mitchellh/mapstructure v1.3.3/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
@ -540,17 +530,6 @@ github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lN
 github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
 github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc=
 github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
-github.com/muesli/ansi v0.0.0-20211018074035-2e021307bc4b h1:1XF24mVaiu7u+CFywTdcDo2ie1pzzhwjt6RHqzpMU34=
-github.com/muesli/ansi v0.0.0-20211018074035-2e021307bc4b/go.mod h1:fQuZ0gauxyBcmsdE3ZT4NasjaRdxmbCS0jRHsrWu3Ho=
-github.com/muesli/cancelreader v0.2.0/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo=
-github.com/muesli/cancelreader v0.2.1 h1:Xzd1B4U5bWQOuSKuN398MyynIGTNT89dxzpEDsalXZs=
-github.com/muesli/cancelreader v0.2.1/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo=
-github.com/muesli/reflow v0.2.1-0.20210115123740-9e1d0d53df68/go.mod h1:Xk+z4oIWdQqJzsxyjgl3P22oYZnHdZ8FFTHAQQt5BMQ=
-github.com/muesli/reflow v0.3.0 h1:IFsN6K9NfGtjeggFP+68I4chLZV2yIKsXJFNZ+eWh6s=
-github.com/muesli/reflow v0.3.0/go.mod h1:pbwTDkVPibjO2kyvBQRBxTWEEGDGq0FlB1BIKtnHY/8=
-github.com/muesli/termenv v0.11.1-0.20220204035834-5ac8409525e0/go.mod h1:Bd5NYQ7pd+SrtBSrSNoBBmXlcY8+Xj4BMJgh8qcZrvs=
-github.com/muesli/termenv v0.11.1-0.20220212125758-44cd13922739 h1:QANkGiGr39l1EESqrE0gZw0/AJNYzIvoGLhIoVYtluI=
-github.com/muesli/termenv v0.11.1-0.20220212125758-44cd13922739/go.mod h1:Bd5NYQ7pd+SrtBSrSNoBBmXlcY8+Xj4BMJgh8qcZrvs=
 github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
 github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
@ -588,9 +567,12 @@ github.com/opencontainers/go-digest v0.0.0-20170106003457-a6d0ee40d420/go.mod h1
 github.com/opencontainers/go-digest v0.0.0-20180430190053-c9281466c8b2/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s=
 github.com/opencontainers/go-digest v1.0.0-rc1/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s=
 github.com/opencontainers/go-digest v1.0.0-rc1.0.20180430190053-c9281466c8b2/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s=
+github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
 github.com/opencontainers/image-spec v1.0.0/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0=
 github.com/opencontainers/image-spec v1.0.1/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0=
+github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040=
+github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M=
 github.com/opencontainers/runc v0.0.0-20190115041553-12f6a991201f/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U=
 github.com/opencontainers/runc v0.1.1/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U=
 github.com/opencontainers/runc v1.0.0-rc8.0.20190926000215-3e425f80a8c9/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U=
@ -651,9 +633,6 @@ github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1
 github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU=
 github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
 github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
-github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
-github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
-github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
 github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
 github.com/rogpeppe/go-internal v1.1.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
 github.com/rogpeppe/go-internal v1.2.2/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
@ -662,7 +641,6 @@ github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQD
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/safchain/ethtool v0.0.0-20190326074333-42ed695e3de8/go.mod h1:Z0q5wiBQGYcxhMZ6gUqHn6pYNLypFAvaL3UvgZLR0U4=
 github.com/safchain/ethtool v0.0.0-20210803160452-9aa261dae9b1/go.mod h1:Z0q5wiBQGYcxhMZ6gUqHn6pYNLypFAvaL3UvgZLR0U4=
-github.com/sahilm/fuzzy v0.1.0/go.mod h1:VFvziUEIMCrT6A6tw2RFIXPXXmzXbOsSHF0DOI8ZK9Y=
 github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=
 github.com/sclevine/agouti v3.0.0+incompatible/go.mod h1:b4WX9W9L1sfQKXeJf1mUTLZKJ48R1S7H23Ji7oFO5Bw=
 github.com/seccomp/libseccomp-golang v0.9.1/go.mod h1:GbW5+tmTXfcxTToHLXlScSlAvWlF4P2Ca7zGrPiEpWo=
@ -688,15 +666,17 @@ github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkU
 github.com/spf13/cobra v0.0.2-0.20171109065643-2da4a54c5cee/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ=
 github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ=
 github.com/spf13/cobra v1.0.0/go.mod h1:/6GTrnGXV9HjY+aR4k0oJ5tcvakLuG6EuKReYlHNrgE=
-github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
-github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
+github.com/spf13/cobra v1.10.2 h1:DMTTonx5m65Ic0GOoRY2c16WCbHxOOw6xxezuLaBpcU=
+github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4=
 github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo=
 github.com/spf13/pflag v0.0.0-20170130214245-9ff6c6923cff/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
 github.com/spf13/pflag v1.0.1-0.20171106142849-4c012f6dcd95/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
 github.com/spf13/pflag v1.0.1/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
 github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
-github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
 github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk=
+github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
 github.com/spf13/viper v1.4.0/go.mod h1:PTJ7Z/lr49W6bUbkmS1V3by4uWynFiR9p7+dSq/yZzE=
 github.com/stefanberger/go-pkcs11uri v0.0.0-20201008174630-78d3cae3a980/go.mod h1:AO3tvPzVZ/ayst6UlUKUv6rcPQInYe3IknH3jYhAKu8=
 github.com/stretchr/objx v0.0.0-20180129172003-8a3f7159479f/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
@ -728,6 +708,8 @@ github.com/urfave/cli v0.0.0-20171014202726-7bc6a0acffa5/go.mod h1:70zkFmudgCuE/
 github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
 github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
 github.com/urfave/cli v1.22.2/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
+github.com/vbatts/tar-split v0.12.2 h1:w/Y6tjxpeiFMR47yzZPlPj/FcPLpXbTUi/9H7d3CPa4=
+github.com/vbatts/tar-split v0.12.2/go.mod h1:eF6B6i6ftWQcDqEn3/iGFRFRo8cBIMSJVOpnNdfTMFA=
 github.com/vishvananda/netlink v0.0.0-20181108222139-023a6dafdcdf/go.mod h1:+SR5DhBJrl6ZM7CoCKvpw5BKroDKQ+PJqOg65H/2ktk=
 github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE=
 github.com/vishvananda/netlink v1.1.1-0.20201029203352-d40f9887b852/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho=
@ -771,6 +753,7 @@ go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
 go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
 go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
 go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
+go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
 golang.org/x/crypto v0.0.0-20171113213409-9f005a07e0d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
 golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
 golang.org/x/crypto v0.0.0-20181009213950-7c1a557ab941/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
@ -788,8 +771,8 @@ golang.org/x/crypto v0.0.0-20201002170205-7f63de1d35b0/go.mod h1:LzIPMQfyMNhhGPh
 golang.org/x/crypto v0.0.0-20201216223049-8b5274cf687f/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I=
 golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4=
 golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
-golang.org/x/crypto v0.46.0 h1:cKRW/pmt1pKAfetfu+RCEvjvZkA9RimPbh7bhFjGVBU=
-golang.org/x/crypto v0.46.0/go.mod h1:Evb/oLKmMraqjZ2iQTwDwvCtJkczlDuTmdJXoZVzqU0=
+golang.org/x/crypto v0.50.0 h1:zO47/JPrL6vsNkINmLoo/PH1gcxpls50DNogFvB5ZGI=
+golang.org/x/crypto v0.50.0/go.mod h1:3muZ7vA7PBCE6xgPX7nkzzjiUq87kRItoJQM1Yo8S+Q=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
@ -822,8 +805,8 @@ golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzB
 golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
 golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
-golang.org/x/mod v0.31.0 h1:HaW9xtz0+kOcWKwli0ZXy79Ix+UW/vOfmWI5QVd2tgI=
-golang.org/x/mod v0.31.0/go.mod h1:43JraMp9cGx1Rx3AqioxrbrhNsLl2l/iNAvuBkrezpg=
+golang.org/x/mod v0.35.0 h1:Ww1D637e6Pg+Zb2KrWfHQUnH2dQRLBQyAtpr/haaJeM=
+golang.org/x/mod v0.35.0/go.mod h1:+GwiRhIInF8wPm+4AoT6L0FA1QWAad3OMdTRx4tFYlU=
 golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@ -864,8 +847,8 @@ golang.org/x/net v0.0.0-20210421230115-4e50805a0758/go.mod h1:72T/g9IO56b78aLF+1
 golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk=
 golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
 golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
-golang.org/x/net v0.48.0 h1:zyQRTTrjc33Lhh0fBgT/H3oZq9WuvRR5gPC70xpDiQU=
-golang.org/x/net v0.48.0/go.mod h1:+ndRgGjkh8FGtu1w1FGbEC31if4VrNVMuKTgcAAnQRY=
+golang.org/x/net v0.53.0 h1:d+qAbo5L0orcWAr0a9JweQpjXF19LMXJE8Ey7hwOdUA=
+golang.org/x/net v0.53.0/go.mod h1:JvMuJH7rrdiCfbeHoo3fCQU24Lf5JJwT9W3sJFulfgs=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@ -882,8 +865,8 @@ golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4=
-golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
+golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
+golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
 golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@ -952,20 +935,18 @@ golang.org/x/sys v0.0.0-20210324051608-47abb6519492/go.mod h1:h1NjWce9XRLGQEsW7w
 golang.org/x/sys v0.0.0-20210420072515-93ed5bcd2bfe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220204135822-1c1b9b1eba6a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220209214540-3681064d5158/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk=
-golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI=
+golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
 golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
-golang.org/x/term v0.38.0 h1:PQ5pkm/rLO6HnxFR7N2lJHOZX6Kez5Y1gDSJla6jo7Q=
-golang.org/x/term v0.38.0/go.mod h1:bSEAKrOT1W+VSu9TSCMtoGEOUcKxOKgl3LE5QEF/xVg=
+golang.org/x/term v0.42.0 h1:UiKe+zDFmJobeJ5ggPwOshJIVt6/Ft0rcfrXZDLWAWY=
+golang.org/x/term v0.42.0/go.mod h1:Dq/D+snpsbazcBG5+F9Q1n2rXV8Ma+71xEjTRufARgY=
 golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
@ -975,8 +956,8 @@ golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
-golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU=
-golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY=
+golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg=
+golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164=
 golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
@ -1024,8 +1005,8 @@ golang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb/go.mod h1:o4KQGtdN14AW+yjs
 golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
 golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
-golang.org/x/tools v0.40.0 h1:yLkxfA+Qnul4cs9QA3KnlFu0lVmd8JJfoq+E41uSutA=
-golang.org/x/tools v0.40.0/go.mod h1:Ik/tzLRlbscWpqqMRjyWYDisX8bG13FrdXp3o4Sr9lc=
+golang.org/x/tools v0.44.0 h1:UP4ajHPIcuMjT1GqzDWRlalUEoY+uzoZKnhOjbIPD2c=
+golang.org/x/tools v0.44.0/go.mod h1:KA0AfVErSdxRZIsOVipbv3rQhVXTnlU6UhKxHd1seDI=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@ -1130,8 +1111,10 @@ gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C
 gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo=
 gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw=
 gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk=
+gotest.tools/v3 v3.0.3 h1:4AuOwCGf4lLR9u3YOe2awrHygurzhO/HeQ6laiA6Sx0=
 gotest.tools/v3 v3.0.3/go.mod h1:Z7Lb0S5l+klDB31fvDQX8ss/FlKDxtlFlw3Oa8Ymbl8=
 honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
 honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
--- a/images/golden/Dockerfile
+++ b/images/golden/Dockerfile
@ -0,0 +1,149 @@
+# banger golden image — Debian bookworm sandbox for development + testing.
+#
+# Two sections:
+#   1. ESSENTIAL — what banger's lifecycle requires to boot the guest.
+#   2. OPINION   — developer conveniences curated for banger sandboxes.
+#
+# Banger's guest agents (vsock agent, network bootstrap, first-boot unit)
+# are injected at `banger image pull` time, not baked here. Keeping them
+# out means this image stays portable enough to run in other contexts.
+
+FROM debian:bookworm-slim
+
+ENV DEBIAN_FRONTEND=noninteractive \
+    LANG=C.UTF-8 \
+    LC_ALL=C.UTF-8
+
+# -------- 1. ESSENTIAL --------
+# Banger needs: an init (systemd + udev + dbus), sshd (the only
+# control channel), TLS roots + curl (first-boot installs + mise
+# installer), gnupg (build-time signing-key verification for the
+# Docker apt repo), iproute2 (debugging; `ip` is still useful even
+# when the kernel sets IP via cmdline).
+#
+# udev is a Recommends of the systemd package on Debian. With
+# --no-install-recommends it's skipped — and without it systemd never
+# activates device units, so fstab mounts of /dev/vdb (banger's work
+# disk) hang forever waiting for a device that is already enumerated
+# by the kernel but never "seen" by systemd. dbus gets the same
+# treatment for the same reason (system-bus-ness services wedge
+# without it).
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends \
+      systemd systemd-sysv udev dbus \
+      openssh-server \
+      ca-certificates \
+      curl \
+      gnupg \
+      iproute2 \
+ && rm -rf /var/lib/apt/lists/*
+
+# -------- 2. OPINION --------
+# Developer sandbox conveniences. Language runtimes are deliberately
+# absent — `mise` (below) handles per-repo `.mise.toml`/`.tool-versions`
+# on first `vm run`.
+
+# Core CLI + search/nav + build toolchain + lint/debug + editor/session.
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends \
+      git jq less tree file unzip zip rsync \
+      ripgrep fd-find \
+      build-essential pkg-config make \
+      shellcheck sqlite3 \
+      iputils-ping dnsutils \
+      vim-tiny tmux htop \
+ && rm -rf /var/lib/apt/lists/*
+
+# Docker CE (with Compose v2 + buildx) from the official apt repo.
+# Nested-VM docker gives Compose workflows hostname/port isolation
+# per banger VM, which is a big part of the sandbox story.
+#
+# The apt key is verified against its published fingerprint before
+# we commit it to the signed-by keyring, so a tampered download (or
+# a TLS compromise against download.docker.com) cannot silently
+# swap in an attacker-controlled signing key. Fingerprint source:
+#   https://docs.docker.com/engine/install/debian/#install-using-the-repository
+RUN set -eu; \
+    expected_fpr=9DC858229FC7DD38854AE2D88D81803C0EBFCD88; \
+    install -m 0755 -d /etc/apt/keyrings; \
+    curl -fsSL https://download.docker.com/linux/debian/gpg -o /tmp/docker.asc; \
+    got="$(gpg --with-colons --show-keys --fingerprint /tmp/docker.asc | awk -F: '/^fpr:/ {print $10; exit}')"; \
+    if [ "$got" != "$expected_fpr" ]; then \
+      echo "docker apt key fingerprint mismatch: got $got, want $expected_fpr" >&2; \
+      exit 1; \
+    fi; \
+    mv /tmp/docker.asc /etc/apt/keyrings/docker.asc; \
+    chmod a+r /etc/apt/keyrings/docker.asc; \
+    printf 'deb [arch=%s signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/debian bookworm stable\n' \
+      "$(dpkg --print-architecture)" > /etc/apt/sources.list.d/docker.list; \
+    apt-get update; \
+    apt-get install -y --no-install-recommends \
+      docker-ce docker-ce-cli containerd.io \
+      docker-buildx-plugin docker-compose-plugin; \
+    rm -rf /var/lib/apt/lists/*
+
+# mise — per-repo version manager. Installed from a pinned GitHub
+# release asset rather than `curl https://mise.run | sh` so a compromise
+# of the installer endpoint can't silently push arbitrary code into
+# the golden image.
+#
+# Update protocol: bump MISE_VERSION + MISE_SHA256 together. Source
+# for the hash is the `digest` field on the release asset from
+# `gh release view --repo jdx/mise --json assets`, or compute from
+# the downloaded file and cross-reference against SHASUMS256.txt on
+# the release page.
+ARG MISE_VERSION=v2026.4.18
+ARG MISE_SHA256_AMD64=6ae2d5f0f23a2f2149bc5d9bf264fe0922a1da843f1903e453516c462b23cc1f
+RUN set -eux; \
+    arch="$(dpkg --print-architecture)"; \
+    if [ "$arch" != "amd64" ]; then \
+      echo "mise pin only tracks amd64; add a ${arch} hash to refresh" >&2; \
+      exit 1; \
+    fi; \
+    curl -fsSL -o /tmp/mise "https://github.com/jdx/mise/releases/download/${MISE_VERSION}/mise-${MISE_VERSION}-linux-x64"; \
+    echo "${MISE_SHA256_AMD64}  /tmp/mise" | sha256sum -c -; \
+    install -m 0755 /tmp/mise /usr/local/bin/mise; \
+    rm /tmp/mise; \
+    install -d /etc/profile.d; \
+    printf '%s\n' 'if [ -x /usr/local/bin/mise ]; then eval "$(/usr/local/bin/mise activate bash)"; fi' \
+      > /etc/profile.d/mise.sh; \
+    chmod 0644 /etc/profile.d/mise.sh
+
+# Default branch for any git init inside the sandbox.
+RUN git config --system init.defaultBranch main
+
+# `fd-find` installs as `fdfind` on Debian to avoid a long-standing name
+# clash. Expose the ergonomic name for interactive use.
+RUN ln -s /usr/bin/fdfind /usr/local/bin/fd
+
+# Strip per-image identity so every banger VM gets its own.
+#  - /etc/machine-id: systemd-firstboot regenerates at boot when empty.
+#  - SSH host keys: removed here; a ssh.service drop-in (below) runs
+#    `ssh-keygen -A` before sshd so the VM's first boot generates a
+#    unique set.
+#  - /run/sshd tmpfiles entry: Debian's openssh-server package doesn't
+#    ship one, and ssh.service's own `RuntimeDirectory=sshd` fires too
+#    late for the ExecStartPre config test, so sshd -t blows up with
+#    "Missing privilege separation directory: /run/sshd" before the
+#    daemon ever starts. Creating the dir via tmpfiles.d runs early in
+#    systemd-tmpfiles-setup, well before ssh.service kicks off.
+RUN : > /etc/machine-id \
+ && rm -f /etc/ssh/ssh_host_*_key /etc/ssh/ssh_host_*_key.pub \
+ && install -d /etc/systemd/system/ssh.service.d \
+ && printf '%s\n' \
+      '[Service]' \
+      '# Reset main unit ExecStartPre list: Debian ships `sshd -t` as' \
+      '# the first ExecStartPre, which fails on missing host keys and' \
+      '# short-circuits the service before ours gets a chance to run.' \
+      'ExecStartPre=' \
+      'ExecStartPre=/usr/bin/mkdir -p /run/sshd' \
+      'ExecStartPre=/usr/bin/ssh-keygen -A' \
+      'ExecStartPre=/usr/sbin/sshd -t' \
+      'StandardOutput=journal+console' \
+      'StandardError=journal+console' \
+      > /etc/systemd/system/ssh.service.d/banger.conf \
+ && rm -f /etc/systemd/system/ssh.service.d/regen-host-keys.conf \
+ && printf 'd /run/sshd 0755 root root -\n' > /usr/lib/tmpfiles.d/sshd.conf
+
+# No CMD / ENTRYPOINT: banger boots this via systemd as PID 1 after
+# first-boot, not via `docker run`.
--- a/interactive.sh
+++ b/interactive.sh
@ -1,317 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-
-log() {
-  printf '[interactive] %s\n' "$*"
-}
-
-usage() {
-  cat <<'EOF'
-Usage: ./interactive.sh <base-rootfs> [--out <path>] [--size <size>]
-
-Creates a writable copy of the base rootfs and boots a VM so you can
-customize it manually over SSH. No automatic package/config changes
-are applied.
-EOF
-}
-
-parse_size() {
-  local raw="$1"
-  if [[ "$raw" =~ ^([0-9]+)([KMG])?$ ]]; then
-    local num="${BASH_REMATCH[1]}"
-    local unit="${BASH_REMATCH[2]}"
-    case "$unit" in
-      K) echo $((num * 1024)) ;;
-      M|"") echo $((num * 1024 * 1024)) ;;
-      G) echo $((num * 1024 * 1024 * 1024)) ;;
-    esac
-    return 0
-  fi
-  return 1
-}
-
-DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-DEFAULT_RUNTIME_DIR="$DIR"
-if [[ -d "$DIR/runtime" ]]; then
-  DEFAULT_RUNTIME_DIR="$DIR/runtime"
-fi
-RUNTIME_DIR="${BANGER_RUNTIME_DIR:-$DEFAULT_RUNTIME_DIR}"
-if [[ ! -d "$RUNTIME_DIR" ]]; then
-  log "runtime bundle not found: $RUNTIME_DIR"
-  log "run 'make runtime-bundle' or set BANGER_RUNTIME_DIR"
-  exit 1
-fi
-STATE="${BANGER_STATE_DIR:-${XDG_STATE_HOME:-$HOME/.local/state}/banger/interactive}"
-VM_ROOT="$STATE/vms"
-mkdir -p "$VM_ROOT"
-
-BUNDLE_METADATA="$RUNTIME_DIR/bundle.json"
-
-bundle_path() {
-  local key="$1"
-  local fallback="$2"
-  local rel=""
-
-  if [[ -f "$BUNDLE_METADATA" ]] && command -v jq >/dev/null 2>&1; then
-    rel="$(jq -r --arg key "$key" '.[$key] // empty' "$BUNDLE_METADATA" 2>/dev/null || true)"
-  fi
-  if [[ -n "$rel" && "$rel" != "null" ]]; then
-    printf '%s\n' "$RUNTIME_DIR/$rel"
-    return
-  fi
-  printf '%s\n' "$fallback"
-}
-
-FC_BIN="$RUNTIME_DIR/firecracker"
-KERNEL="$(bundle_path default_kernel "$RUNTIME_DIR/wtf/root/boot/vmlinux-6.8.0-94-generic")"
-INITRD="$(bundle_path default_initrd "$RUNTIME_DIR/wtf/root/boot/initrd.img-6.8.0-94-generic")"
-SSH_KEY="$RUNTIME_DIR/id_ed25519"
-
-BR_DEV="br-fc"
-BR_IP="172.16.0.1"
-CIDR="24"
-DNS_SERVER="1.1.1.1"
-
-resolve_banger_bin() {
-  if [[ -n "${BANGER_BIN:-}" ]]; then
-    printf '%s\n' "$BANGER_BIN"
-    return
-  fi
-  if [[ -x "$DIR/banger" ]]; then
-    printf '%s\n' "$DIR/banger"
-    return
-  fi
-  if command -v banger >/dev/null 2>&1; then
-    command -v banger
-    return
-  fi
-  log "banger binary not found; install/build banger or set BANGER_BIN"
-  exit 1
-}
-
-BANGER_BIN="$(resolve_banger_bin)"
-NAT_ACTIVE=0
-
-banger_nat() {
-  local action="$1"
-  "$BANGER_BIN" internal nat "$action" --guest-ip "$GUEST_IP" --tap "$TAP_DEV"
-}
-
-BASE_ROOTFS=""
-OUT_ROOTFS=""
-SIZE_SPEC=""
-while [[ $# -gt 0 ]]; do
-  case "$1" in
-    --out)
-      OUT_ROOTFS="${2:-}"
-      shift 2
-      ;;
-    --size)
-      SIZE_SPEC="${2:-}"
-      shift 2
-      ;;
-    -h|--help)
-      usage
-      exit 0
-      ;;
-    *)
-      if [[ -z "$BASE_ROOTFS" ]]; then
-        BASE_ROOTFS="$1"
-        shift
-      else
-        log "unknown option: $1"
-        usage
-        exit 1
-      fi
-      ;;
-  esac
-done
-
-if [[ -z "$BASE_ROOTFS" ]]; then
-  usage
-  exit 1
-fi
-if [[ ! -f "$BASE_ROOTFS" ]]; then
-  log "base rootfs not found: $BASE_ROOTFS"
-  exit 1
-fi
-if [[ ! -f "$KERNEL" ]]; then
-  log "kernel not found: $KERNEL"
-  exit 1
-fi
-if [[ ! -f "$INITRD" ]]; then
-  log "initrd not found: $INITRD"
-  exit 1
-fi
-
-if [[ -z "$OUT_ROOTFS" ]]; then
-  base_dir="$(dirname "$BASE_ROOTFS")"
-  base_name="$(basename "$BASE_ROOTFS")"
-  OUT_ROOTFS="${base_dir}/rw-${base_name}"
-fi
-if [[ -e "$OUT_ROOTFS" ]]; then
-  log "output rootfs already exists: $OUT_ROOTFS"
-  exit 1
-fi
-
-log "copying base rootfs to $OUT_ROOTFS"
-cp --reflink=auto "$BASE_ROOTFS" "$OUT_ROOTFS"
-
-if [[ -n "$SIZE_SPEC" ]]; then
-  SIZE_BYTES="$(parse_size "$SIZE_SPEC")"
-  BASE_BYTES="$(stat -c%s "$BASE_ROOTFS")"
-  if [[ -z "$SIZE_BYTES" || "$SIZE_BYTES" -lt "$BASE_BYTES" ]]; then
-    log "size must be >= base image size"
-    exit 1
-  fi
-  log "resizing rootfs to $SIZE_SPEC"
-  truncate -s "$SIZE_BYTES" "$OUT_ROOTFS"
-  e2fsck -p -f "$OUT_ROOTFS" >/dev/null
-  resize2fs "$OUT_ROOTFS" >/dev/null
-fi
-
-VM_ID="$(head -c 32 /dev/urandom | xxd -p -c 256)"
-VM_TAG="${VM_ID:0:8}"
-VM_NAME="interactive-${VM_TAG}"
-VM_DIR="$VM_ROOT/$VM_ID"
-mkdir -p "$VM_DIR"
-
-API_SOCK="${XDG_RUNTIME_DIR:-/run/user/$(id -u)}/banger/fc-$VM_TAG.sock"
-LOG_FILE="$VM_DIR/firecracker.log"
-TAP_DEV="tap-fc-$VM_TAG"
-
-# Allocate guest IP
-NEXT_IP_FILE="$STATE/next_ip"
-NEXT_IP="$(cat "$NEXT_IP_FILE" 2>/dev/null || echo 2)"
-GUEST_IP="172.16.0.$NEXT_IP"
-echo "$((NEXT_IP + 1))" > "$NEXT_IP_FILE"
-
-sudo -v
-
-cleanup() {
-  sudo kill "${FC_PID:-}" 2>/dev/null || true
-  if [[ "$NAT_ACTIVE" -eq 1 ]]; then
-    banger_nat down >/dev/null 2>&1 || true
-  fi
-  sudo ip link del "$TAP_DEV" 2>/dev/null || true
-  rm -f "$API_SOCK"
-  rm -rf "$VM_DIR"
-}
-trap cleanup EXIT
-
-sudo mkdir -p "$(dirname "$API_SOCK")"
-sudo chown "$(id -u):$(id -g)" "$(dirname "$API_SOCK")"
-
-# Host bridge
-if ! ip link show "$BR_DEV" >/dev/null 2>&1; then
-  log "creating host bridge $BR_DEV ($BR_IP/$CIDR)"
-  sudo ip link add name "$BR_DEV" type bridge
-  sudo ip addr add "${BR_IP}/${CIDR}" dev "$BR_DEV"
-  sudo ip link set "$BR_DEV" up
-else
-  sudo ip link set "$BR_DEV" up
-fi
-
-log "creating tap device $TAP_DEV"
-TAP_USER="${SUDO_UID:-$(id -u)}"
-TAP_GROUP="${SUDO_GID:-$(id -g)}"
-sudo ip tuntap add dev "$TAP_DEV" mode tap user "$TAP_USER" group "$TAP_GROUP"
-sudo ip link set "$TAP_DEV" master "$BR_DEV"
-sudo ip link set "$TAP_DEV" up
-sudo ip link set "$BR_DEV" up
-
-log "starting firecracker process"
-rm -f "$API_SOCK"
-nohup sudo -E "$FC_BIN" --api-sock "$API_SOCK" >"$LOG_FILE" 2>&1 &
-FC_PID="$!"
-
-log "waiting for firecracker api socket"
-for _ in $(seq 1 200); do
-  [[ -S "$API_SOCK" ]] && break
-  sleep 0.02
-done
-[[ -S "$API_SOCK" ]] || { log "firecracker api socket not ready"; exit 1; }
-
-log "configuring machine"
-sudo -E curl --unix-socket "$API_SOCK" -X PUT http://localhost/machine-config \
-  -H "Content-Type: application/json" \
-  -d '{
-    "vcpu_count": 2,
-    "mem_size_mib": 1024,
-    "smt": false
-  }' >/dev/null
-
-KCMD="console=ttyS0 reboot=k panic=1 pci=off root=/dev/vda rw ip=${GUEST_IP}::${BR_IP}:255.255.255.0:${VM_NAME}:eth0:off:${DNS_SERVER} hostname=${VM_NAME} systemd.mask=home.mount systemd.mask=var.mount"
-
-sudo -E curl --unix-socket "$API_SOCK" -X PUT http://localhost/boot-source \
-  -H "Content-Type: application/json" \
-  -d "{
-    \"kernel_image_path\": \"$KERNEL\",
-    \"boot_args\": \"$KCMD\",
-    \"initrd_path\": \"$INITRD\"
-  }" >/dev/null
-
-sudo -E curl --unix-socket "$API_SOCK" -X PUT http://localhost/drives/rootfs \
-  -H "Content-Type: application/json" \
-  -d "{
-    \"drive_id\": \"rootfs\",
-    \"path_on_host\": \"$OUT_ROOTFS\",
-    \"is_root_device\": true,
-    \"is_read_only\": false
-  }" >/dev/null
-
-sudo -E curl --unix-socket "$API_SOCK" -X PUT http://localhost/network-interfaces/eth0 \
-  -H "Content-Type: application/json" \
-  -d "{
-    \"iface_id\": \"eth0\",
-    \"host_dev_name\": \"$TAP_DEV\"
-  }" >/dev/null
-
-sudo -E curl --unix-socket "$API_SOCK" -X PUT http://localhost/actions \
-  -H "Content-Type: application/json" \
-  -d '{ "action_type": "InstanceStart" }' >/dev/null
-
-SUDO_CHILD_PID="$(pgrep -n -f "$API_SOCK" || true)"
-if [[ -n "$SUDO_CHILD_PID" ]]; then
-  FC_PID="$SUDO_CHILD_PID"
-fi
-
-VM_CONFIG_JSON="$(sudo -E curl --unix-socket "$API_SOCK" -sS http://localhost/vm/config)"
-CREATED_AT="$(date -Iseconds)"
-jq -n \
-  --arg id "$VM_ID" \
-  --arg name "$VM_NAME" \
-  --arg pid "$FC_PID" \
-  --arg created_at "$CREATED_AT" \
-  --arg guest_ip "$GUEST_IP" \
-  --arg tap "$TAP_DEV" \
-  --arg api_sock "$API_SOCK" \
-  --arg log "$LOG_FILE" \
-  --arg rootfs "$OUT_ROOTFS" \
-  --arg kernel "$KERNEL" \
-  --argjson config "$VM_CONFIG_JSON" \
-  '{meta:{id:$id,name:$name,pid:$pid,created_at:$created_at,guest_ip:$guest_ip,tap:$tap,api_sock:$api_sock,log:$log,rootfs:$rootfs,kernel:$kernel},config:$config}' \
-  > "$VM_DIR/vm.json"
-
-log "enabling NAT for interactive session"
-banger_nat up >/dev/null
-NAT_ACTIVE=1
-
-log "waiting for SSH"
-log "guest ip: $GUEST_IP"
-log "ssh: ssh -i \"$SSH_KEY\" root@${GUEST_IP}"
-for _ in $(seq 1 60); do
-  if ssh -i "$SSH_KEY" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-    "root@${GUEST_IP}" "true" >/dev/null 2>&1; then
-    log "ssh ready"
-    break
-  fi
-  sleep 1
-done
-
-log "output rootfs: $OUT_ROOTFS"
-log "press Ctrl+C to stop and clean up"
-
-while kill -0 "$FC_PID" >/dev/null 2>&1; do
-  sleep 1
-done
--- a/internal/api/types.go
+++ b/internal/api/types.go
@ -1,12 +1,19 @@
 package api

-import "banger/internal/model"
+import (
+	"time"
+
+	"banger/internal/model"
+)

 type Empty struct{}

 type PingResult struct {
-	Status string `json:"status"`
-	PID    int    `json:"pid"`
+	Status  string `json:"status"`
+	PID     int    `json:"pid"`
+	Version string `json:"version,omitempty"`
+	Commit  string `json:"commit,omitempty"`
+	BuiltAt string `json:"built_at,omitempty"`
 }

 type ShutdownResult struct {
@ -24,6 +31,32 @@ type VMCreateParams struct {
 	NoStart           bool   `json:"no_start,omitempty"`
 }

+type VMCreateStatusParams struct {
+	ID string `json:"id"`
+}
+
+type VMCreateOperation struct {
+	ID        string          `json:"id"`
+	VMID      string          `json:"vm_id,omitempty"`
+	VMName    string          `json:"vm_name,omitempty"`
+	Stage     string          `json:"stage,omitempty"`
+	Detail    string          `json:"detail,omitempty"`
+	StartedAt time.Time       `json:"started_at,omitempty"`
+	UpdatedAt time.Time       `json:"updated_at,omitempty"`
+	Done      bool            `json:"done"`
+	Success   bool            `json:"success"`
+	Error     string          `json:"error,omitempty"`
+	VM        *model.VMRecord `json:"vm,omitempty"`
+}
+
+type VMCreateBeginResult struct {
+	Operation VMCreateOperation `json:"operation"`
+}
+
+type VMCreateStatusResult struct {
+	Operation VMCreateOperation `json:"operation"`
+}
+
 type VMRefParams struct {
 	IDOrName string `json:"id_or_name"`
 }
@ -89,14 +122,32 @@ type VMPortsResult struct {
 	Ports   []VMPort `json:"ports"`
 }

-type ImageBuildParams struct {
-	Name       string `json:"name,omitempty"`
-	BaseRootfs string `json:"base_rootfs,omitempty"`
-	Size       string `json:"size,omitempty"`
-	KernelPath string `json:"kernel_path,omitempty"`
-	InitrdPath string `json:"initrd_path,omitempty"`
-	ModulesDir string `json:"modules_dir,omitempty"`
-	Docker     bool   `json:"docker,omitempty"`
+type WorkspaceExportParams struct {
+	IDOrName   string `json:"id_or_name"`
+	GuestPath  string `json:"guest_path,omitempty"`
+	BaseCommit string `json:"base_commit,omitempty"`
+}
+
+type WorkspaceExportResult struct {
+	GuestPath    string   `json:"guest_path"`
+	BaseCommit   string   `json:"base_commit"`
+	Patch        []byte   `json:"patch"`
+	ChangedFiles []string `json:"changed_files"`
+	HasChanges   bool     `json:"has_changes"`
+}
+
+type VMWorkspacePrepareParams struct {
+	IDOrName         string `json:"id_or_name"`
+	SourcePath       string `json:"source_path"`
+	GuestPath        string `json:"guest_path,omitempty"`
+	Branch           string `json:"branch,omitempty"`
+	From             string `json:"from,omitempty"`
+	Mode             string `json:"mode,omitempty"`
+	IncludeUntracked bool   `json:"include_untracked,omitempty"`
+}
+
+type VMWorkspacePrepareResult struct {
+	Workspace model.WorkspacePrepareResult `json:"workspace"`
 }

 type ImageRegisterParams struct {
@ -106,14 +157,48 @@ type ImageRegisterParams struct {
 	KernelPath   string `json:"kernel_path,omitempty"`
 	InitrdPath   string `json:"initrd_path,omitempty"`
 	ModulesDir   string `json:"modules_dir,omitempty"`
-	PackagesPath string `json:"packages_path,omitempty"`
-	Docker       bool   `json:"docker,omitempty"`
+	KernelRef    string `json:"kernel_ref,omitempty"`
+}
+
+type ImagePullParams struct {
+	Ref        string `json:"ref"`
+	Name       string `json:"name,omitempty"`
+	KernelPath string `json:"kernel_path,omitempty"`
+	InitrdPath string `json:"initrd_path,omitempty"`
+	ModulesDir string `json:"modules_dir,omitempty"`
+	KernelRef  string `json:"kernel_ref,omitempty"`
+	SizeBytes  int64  `json:"size_bytes,omitempty"`
 }

 type ImageRefParams struct {
 	IDOrName string `json:"id_or_name"`
 }

+type OperationSummary struct {
+	ID        string    `json:"id"`
+	Kind      string    `json:"kind"`
+	Stage     string    `json:"stage,omitempty"`
+	Detail    string    `json:"detail,omitempty"`
+	Done      bool      `json:"done"`
+	StartedAt time.Time `json:"started_at,omitempty"`
+	UpdatedAt time.Time `json:"updated_at,omitempty"`
+}
+
+type OperationsListResult struct {
+	Operations []OperationSummary `json:"operations"`
+}
+
+type ImageCachePruneParams struct {
+	DryRun bool `json:"dry_run,omitempty"`
+}
+
+type ImageCachePruneResult struct {
+	BytesFreed int64  `json:"bytes_freed"`
+	BlobsFreed int    `json:"blobs_freed"`
+	DryRun     bool   `json:"dry_run"`
+	CacheDir   string `json:"cache_dir"`
+}
+
 type ImageListResult struct {
 	Images []model.Image `json:"images"`
 }
@ -121,3 +206,54 @@ type ImageListResult struct {
 type ImageShowResult struct {
 	Image model.Image `json:"image"`
 }
+
+type KernelEntry struct {
+	Name          string `json:"name"`
+	Distro        string `json:"distro,omitempty"`
+	Arch          string `json:"arch,omitempty"`
+	KernelVersion string `json:"kernel_version,omitempty"`
+	SHA256        string `json:"sha256,omitempty"`
+	Source        string `json:"source,omitempty"`
+	ImportedAt    string `json:"imported_at,omitempty"`
+	KernelPath    string `json:"kernel_path,omitempty"`
+	InitrdPath    string `json:"initrd_path,omitempty"`
+	ModulesDir    string `json:"modules_dir,omitempty"`
+}
+
+type KernelListResult struct {
+	Entries []KernelEntry `json:"entries"`
+}
+
+type KernelRefParams struct {
+	Name string `json:"name"`
+}
+
+type KernelShowResult struct {
+	Entry KernelEntry `json:"entry"`
+}
+
+type KernelImportParams struct {
+	Name    string `json:"name"`
+	FromDir string `json:"from_dir"`
+	Distro  string `json:"distro,omitempty"`
+	Arch    string `json:"arch,omitempty"`
+}
+
+type KernelPullParams struct {
+	Name  string `json:"name"`
+	Force bool   `json:"force,omitempty"`
+}
+
+type KernelCatalogEntry struct {
+	Name          string `json:"name"`
+	Distro        string `json:"distro,omitempty"`
+	Arch          string `json:"arch,omitempty"`
+	KernelVersion string `json:"kernel_version,omitempty"`
+	SizeBytes     int64  `json:"size_bytes,omitempty"`
+	Description   string `json:"description,omitempty"`
+	Pulled        bool   `json:"pulled"`
+}
+
+type KernelCatalogResult struct {
+	Entries []KernelCatalogEntry `json:"entries"`
+}
--- a/internal/buildinfo/buildinfo.go
+++ b/internal/buildinfo/buildinfo.go
@ -0,0 +1,34 @@
+package buildinfo
+
+import "strings"
+
+var (
+	Version = "dev"
+	Commit  = "unknown"
+	BuiltAt = "unknown"
+)
+
+type Info struct {
+	Version string
+	Commit  string
+	BuiltAt string
+}
+
+func Current() Info {
+	return Normalize(Version, Commit, BuiltAt)
+}
+
+func Normalize(version, commit, builtAt string) Info {
+	return Info{
+		Version: normalizedValue(version, "dev"),
+		Commit:  normalizedValue(commit, "unknown"),
+		BuiltAt: normalizedValue(builtAt, "unknown"),
+	}
+}
+
+func normalizedValue(value, fallback string) string {
+	if trimmed := strings.TrimSpace(value); trimmed != "" {
+		return trimmed
+	}
+	return fallback
+}
--- a/internal/buildinfo/buildinfo_test.go
+++ b/internal/buildinfo/buildinfo_test.go
@ -0,0 +1,33 @@
+package buildinfo
+
+import "testing"
+
+func TestNormalizeUsesFallbacks(t *testing.T) {
+	t.Parallel()
+
+	info := Normalize("", "  ", "\t")
+	if info.Version != "dev" {
+		t.Fatalf("Version = %q, want dev", info.Version)
+	}
+	if info.Commit != "unknown" {
+		t.Fatalf("Commit = %q, want unknown", info.Commit)
+	}
+	if info.BuiltAt != "unknown" {
+		t.Fatalf("BuiltAt = %q, want unknown", info.BuiltAt)
+	}
+}
+
+func TestNormalizeTrimsValues(t *testing.T) {
+	t.Parallel()
+
+	info := Normalize(" v1.2.3 ", " abc123 ", " 2026-03-22T12:00:00Z ")
+	if info.Version != "v1.2.3" {
+		t.Fatalf("Version = %q, want v1.2.3", info.Version)
+	}
+	if info.Commit != "abc123" {
+		t.Fatalf("Commit = %q, want abc123", info.Commit)
+	}
+	if info.BuiltAt != "2026-03-22T12:00:00Z" {
+		t.Fatalf("BuiltAt = %q, want 2026-03-22T12:00:00Z", info.BuiltAt)
+	}
+}
--- a/internal/cli/aliases_test.go
+++ b/internal/cli/aliases_test.go
@ -0,0 +1,102 @@
+package cli
+
+import (
+	"testing"
+
+	"github.com/spf13/cobra"
+)
+
+// findSubcommand walks cmd's subtree along path and returns the
+// matching command, or nil.
+func findSubcommand(root *cobra.Command, path ...string) *cobra.Command {
+	cur := root
+	for _, name := range path {
+		var next *cobra.Command
+		for _, sub := range cur.Commands() {
+			if sub.Name() == name {
+				next = sub
+				break
+			}
+		}
+		if next == nil {
+			return nil
+		}
+		cur = next
+	}
+	return cur
+}
+
+func assertHasAlias(t *testing.T, cmd *cobra.Command, alias string) {
+	t.Helper()
+	if cmd == nil {
+		t.Fatal("command is nil")
+	}
+	for _, a := range cmd.Aliases {
+		if a == alias {
+			return
+		}
+	}
+	t.Errorf("%q missing alias %q; have %v", cmd.Name(), alias, cmd.Aliases)
+}
+
+func TestListCommandsHaveLsAlias(t *testing.T) {
+	root := NewBangerCommand()
+
+	cases := [][]string{
+		{"vm", "list"},
+		{"image", "list"},
+		{"kernel", "list"},
+	}
+	for _, path := range cases {
+		t.Run(path[len(path)-1], func(t *testing.T) {
+			cmd := findSubcommand(root, path...)
+			if cmd == nil {
+				t.Fatalf("missing command: %v", path)
+			}
+			assertHasAlias(t, cmd, "ls")
+		})
+	}
+}
+
+func TestDeleteCommandsHaveRmAlias(t *testing.T) {
+	root := NewBangerCommand()
+
+	cases := [][]string{
+		{"vm", "delete"},
+		{"image", "delete"},
+	}
+	for _, path := range cases {
+		t.Run(path[len(path)-1], func(t *testing.T) {
+			cmd := findSubcommand(root, path...)
+			if cmd == nil {
+				t.Fatalf("missing command: %v", path)
+			}
+			assertHasAlias(t, cmd, "rm")
+		})
+	}
+}
+
+func TestVMCommandRegistersPrune(t *testing.T) {
+	root := NewBangerCommand()
+	cmd := findSubcommand(root, "vm", "prune")
+	if cmd == nil {
+		t.Fatal("vm prune not registered")
+	}
+	if flag := cmd.Flags().Lookup("force"); flag == nil {
+		t.Error("vm prune missing --force flag")
+	}
+	if flag := cmd.Flags().ShorthandLookup("f"); flag == nil {
+		t.Error("vm prune missing -f shorthand")
+	}
+}
+
+func TestKernelRmHasDeleteAlias(t *testing.T) {
+	// This already existed prior to this feature — guard against regressions.
+	root := NewBangerCommand()
+	cmd := findSubcommand(root, "kernel", "rm")
+	if cmd == nil {
+		t.Fatal("kernel rm missing")
+	}
+	assertHasAlias(t, cmd, "delete")
+	assertHasAlias(t, cmd, "remove")
+}
--- a/internal/cli/banger.go
+++ b/internal/cli/banger.go
--- a/internal/cli/bangerd.go
+++ b/internal/cli/bangerd.go
@ -1,20 +1,55 @@
 package cli

 import (
+	"errors"
+	"fmt"
+	"os"
+	"strings"
+
+	"banger/internal/buildinfo"
 	"banger/internal/daemon"
+	"banger/internal/paths"
+	"banger/internal/roothelper"
+	"banger/internal/store"

 	"github.com/spf13/cobra"
 )

+// bangerdExit is var-injected so tests can capture the exit code
+// without terminating the test process. Production points at os.Exit.
+var bangerdExit = os.Exit
+
 func NewBangerdCommand() *cobra.Command {
+	var systemMode bool
+	var rootHelperMode bool
+	var checkMigrations bool
 	cmd := &cobra.Command{
 		Use:           "bangerd",
+		Version:       strings.Replace(formatVersionLine(buildinfo.Current()), "banger ", "bangerd ", 1),
 		Short:         "Run the banger daemon",
 		SilenceUsage:  true,
 		SilenceErrors: true,
 		Args:          noArgsUsage("usage: bangerd"),
 		RunE: func(cmd *cobra.Command, args []string) error {
-			d, err := daemon.Open(cmd.Context())
+			if systemMode && rootHelperMode {
+				return errors.New("choose only one of --system or --root-helper")
+			}
+			if checkMigrations {
+				return runCheckMigrations(cmd, systemMode)
+			}
+			if rootHelperMode {
+				server, err := roothelper.Open()
+				if err != nil {
+					return err
+				}
+				defer server.Close()
+				return server.Serve(cmd.Context())
+			}
+			open := daemon.Open
+			if systemMode {
+				open = daemon.OpenSystem
+			}
+			d, err := open(cmd.Context())
 			if err != nil {
 				return err
 			}
@ -22,6 +57,71 @@ func NewBangerdCommand() *cobra.Command {
 			return d.Serve(cmd.Context())
 		},
 	}
+	cmd.Flags().BoolVar(&systemMode, "system", false, "run as the owner-user system service")
+	cmd.Flags().BoolVar(&rootHelperMode, "root-helper", false, "run as the privileged root helper service")
+	cmd.Flags().BoolVar(&checkMigrations, "check-migrations", false, "inspect the state DB and report whether this binary's schema matches; exit 0=compatible, 1=migrations needed, 2=incompatible")
+	cmd.SetVersionTemplate("{{.Version}}\n")
 	cmd.CompletionOptions.DisableDefaultCmd = true
 	return cmd
 }
+
+// runCheckMigrations is the entry point for `bangerd --check-migrations`.
+// Used by `banger update` to gate a binary swap on a staged binary
+// before service restart: if the staged binary doesn't recognise the
+// running install's schema, the swap is aborted before any host state
+// changes.
+//
+// Exit codes are part of the contract:
+//
+//	0 — compatible (no migrations to apply on Open)
+//	1 — migrations needed (binary newer than DB; safe to swap)
+//	2 — incompatible (DB has migrations this binary doesn't know;
+//	    swapping would leave the daemon unable to open the store)
+func runCheckMigrations(cmd *cobra.Command, systemMode bool) error {
+	layout := paths.ResolveSystem()
+	if !systemMode {
+		userLayout, err := paths.Resolve()
+		if err != nil {
+			return err
+		}
+		layout = userLayout
+	}
+	state, err := store.InspectSchemaState(layout.DBPath)
+	if err != nil {
+		return fmt.Errorf("inspect %s: %w", layout.DBPath, err)
+	}
+	out := cmd.OutOrStdout()
+	switch state.Compatibility {
+	case store.SchemaCompatible:
+		fmt.Fprintf(out, "compatible: db at v%d, binary knows up to v%d\n", lastID(state.AppliedIDs), state.KnownMaxID)
+		return nil
+	case store.SchemaMigrationsNeeded:
+		fmt.Fprintf(out, "migrations needed: pending %v (binary will apply on first Open)\n", state.Pending)
+		// Distinct exit code so callers can tell "safe to swap, will
+		// auto-migrate" apart from "compatible, no work pending".
+		// Returning a cobra error would also exit non-zero, but we
+		// want a specific code (1) — and we don't want SilenceErrors
+		// to print our message twice.
+		bangerdExit(1)
+		return nil
+	case store.SchemaIncompatible:
+		fmt.Fprintf(out, "incompatible: db has unknown migrations %v (binary knows up to v%d)\n", state.Unknown, state.KnownMaxID)
+		bangerdExit(2)
+		return nil
+	default:
+		return fmt.Errorf("unexpected schema-state classification %d", state.Compatibility)
+	}
+}
+
+// lastID returns the largest int in xs, or 0 when empty. The schema-
+// migrations table doesn't guarantee insert order, so we scan rather
+// than trusting xs[len-1].
+func lastID(xs []int) int {
+	max := 0
+	for _, x := range xs {
+		if x > max {
+			max = x
+		}
+	}
+	return max
+}
--- a/internal/cli/bangerd_test.go
+++ b/internal/cli/bangerd_test.go
@ -0,0 +1,194 @@
+package cli
+
+import (
+	"bytes"
+	"database/sql"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"banger/internal/store"
+
+	"github.com/spf13/cobra"
+	_ "modernc.org/sqlite"
+)
+
+func TestNewBangerdCommandSubcommands(t *testing.T) {
+	cmd := NewBangerdCommand()
+	if cmd.Use != "bangerd" {
+		t.Errorf("Use = %q, want bangerd", cmd.Use)
+	}
+	for _, flag := range []string{"system", "root-helper", "check-migrations"} {
+		if cmd.Flag(flag) == nil {
+			t.Errorf("flag %q missing", flag)
+		}
+	}
+}
+
+func TestLastID(t *testing.T) {
+	tests := []struct {
+		name string
+		in   []int
+		want int
+	}{
+		{"nil", nil, 0},
+		{"empty", []int{}, 0},
+		{"single", []int{7}, 7},
+		{"sorted ascending", []int{1, 2, 3}, 3},
+		{"unsorted, max in middle", []int{1, 99, 5}, 99},
+		{"duplicates", []int{4, 4, 2, 4}, 4},
+		{"negative ignored", []int{-3, -1, 0}, 0},
+	}
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			if got := lastID(tc.in); got != tc.want {
+				t.Fatalf("lastID(%v) = %d, want %d", tc.in, got, tc.want)
+			}
+		})
+	}
+}
+
+// stubExit replaces bangerdExit for the test and returns a pointer to
+// the captured exit code (-1 = not called) and a restore func.
+func stubExit(t *testing.T) *int {
+	t.Helper()
+	called := -1
+	prev := bangerdExit
+	bangerdExit = func(code int) { called = code }
+	t.Cleanup(func() { bangerdExit = prev })
+	return &called
+}
+
+// pointHomeAtTempDB sets XDG_STATE_HOME (and HOME, which Resolve falls
+// back to) so that paths.Resolve().DBPath lands at <tmp>/banger/state.db.
+// Returns the DB path.
+func pointHomeAtTempDB(t *testing.T) string {
+	t.Helper()
+	tmp := t.TempDir()
+	t.Setenv("HOME", tmp)
+	t.Setenv("XDG_STATE_HOME", tmp)
+	t.Setenv("XDG_CONFIG_HOME", tmp)
+	t.Setenv("XDG_CACHE_HOME", tmp)
+	t.Setenv("XDG_RUNTIME_DIR", tmp)
+	dir := filepath.Join(tmp, "banger")
+	if err := os.MkdirAll(dir, 0o700); err != nil {
+		t.Fatalf("mkdir state dir: %v", err)
+	}
+	return filepath.Join(dir, "state.db")
+}
+
+func TestRunCheckMigrationsCompatible(t *testing.T) {
+	dbPath := pointHomeAtTempDB(t)
+	s, err := store.Open(dbPath)
+	if err != nil {
+		t.Fatalf("store.Open: %v", err)
+	}
+	_ = s.Close()
+
+	exit := stubExit(t)
+	cmd := &cobra.Command{}
+	var out bytes.Buffer
+	cmd.SetOut(&out)
+
+	if err := runCheckMigrations(cmd, false); err != nil {
+		t.Fatalf("runCheckMigrations: %v", err)
+	}
+	if *exit != -1 {
+		t.Errorf("bangerdExit called with %d, want no call", *exit)
+	}
+	if !strings.HasPrefix(out.String(), "compatible:") {
+		t.Errorf("stdout = %q, want prefix \"compatible:\"", out.String())
+	}
+}
+
+func TestRunCheckMigrationsMigrationsNeeded(t *testing.T) {
+	dbPath := pointHomeAtTempDB(t)
+	// Hand-craft a DB that has schema_migrations with only the baseline
+	// row — InspectSchemaState classifies this as "migrations needed".
+	dsn := "file:" + dbPath + "?_pragma=foreign_keys(1)"
+	db, err := sql.Open("sqlite", dsn)
+	if err != nil {
+		t.Fatalf("sql.Open: %v", err)
+	}
+	if _, err := db.Exec(`CREATE TABLE schema_migrations (id INTEGER PRIMARY KEY, name TEXT NOT NULL, applied_at TEXT NOT NULL)`); err != nil {
+		t.Fatalf("create table: %v", err)
+	}
+	if _, err := db.Exec(`INSERT INTO schema_migrations VALUES (1, 'baseline', '2026-01-01T00:00:00Z')`); err != nil {
+		t.Fatalf("insert baseline: %v", err)
+	}
+	_ = db.Close()
+
+	exit := stubExit(t)
+	cmd := &cobra.Command{}
+	var out bytes.Buffer
+	cmd.SetOut(&out)
+
+	if err := runCheckMigrations(cmd, false); err != nil {
+		t.Fatalf("runCheckMigrations: %v", err)
+	}
+	if *exit != 1 {
+		t.Errorf("bangerdExit called with %d, want 1", *exit)
+	}
+	if !strings.HasPrefix(out.String(), "migrations needed:") {
+		t.Errorf("stdout = %q, want prefix \"migrations needed:\"", out.String())
+	}
+}
+
+func TestRunCheckMigrationsIncompatible(t *testing.T) {
+	dbPath := pointHomeAtTempDB(t)
+	s, err := store.Open(dbPath)
+	if err != nil {
+		t.Fatalf("store.Open: %v", err)
+	}
+	_ = s.Close()
+
+	// Inject an unknown migration id directly so the binary's known set
+	// is a strict subset — InspectSchemaState classifies as incompatible.
+	dsn := "file:" + dbPath
+	db, err := sql.Open("sqlite", dsn)
+	if err != nil {
+		t.Fatalf("sql.Open: %v", err)
+	}
+	if _, err := db.Exec(`INSERT INTO schema_migrations VALUES (9999, 'from_the_future', '2030-01-01T00:00:00Z')`); err != nil {
+		t.Fatalf("insert future row: %v", err)
+	}
+	_ = db.Close()
+
+	exit := stubExit(t)
+	cmd := &cobra.Command{}
+	var out bytes.Buffer
+	cmd.SetOut(&out)
+
+	if err := runCheckMigrations(cmd, false); err != nil {
+		t.Fatalf("runCheckMigrations: %v", err)
+	}
+	if *exit != 2 {
+		t.Errorf("bangerdExit called with %d, want 2", *exit)
+	}
+	if !strings.HasPrefix(out.String(), "incompatible:") {
+		t.Errorf("stdout = %q, want prefix \"incompatible:\"", out.String())
+	}
+}
+
+func TestRunCheckMigrationsInspectError(t *testing.T) {
+	// Point at a state dir with a non-DB file at state.db so Inspect
+	// fails to open it. The function should wrap the error with the path.
+	dbPath := pointHomeAtTempDB(t)
+	if err := os.WriteFile(dbPath, []byte("not a sqlite file"), 0o600); err != nil {
+		t.Fatalf("write garbage: %v", err)
+	}
+
+	stubExit(t)
+	cmd := &cobra.Command{}
+	var out bytes.Buffer
+	cmd.SetOut(&out)
+
+	err := runCheckMigrations(cmd, false)
+	if err == nil {
+		t.Fatal("runCheckMigrations: nil error, want wrapped inspect error")
+	}
+	if !strings.Contains(err.Error(), dbPath) {
+		t.Errorf("error %q does not mention DB path %q", err.Error(), dbPath)
+	}
+}
--- a/internal/cli/cli_test.go
+++ b/internal/cli/cli_test.go
--- a/internal/cli/commands_daemon.go
+++ b/internal/cli/commands_daemon.go
@ -0,0 +1,55 @@
+package cli
+
+import (
+	"fmt"
+
+	"banger/internal/installmeta"
+	"banger/internal/paths"
+
+	"github.com/spf13/cobra"
+)
+
+func (d *deps) newDaemonCommand() *cobra.Command {
+	cmd := &cobra.Command{
+		Use:   "daemon",
+		Short: "Manage the installed banger services",
+		RunE:  helpNoArgs,
+	}
+	cmd.AddCommand(
+		&cobra.Command{
+			Use:   "status",
+			Short: "Show owner-daemon and root-helper status",
+			Args:  noArgsUsage("usage: banger daemon status"),
+			RunE: func(cmd *cobra.Command, args []string) error {
+				return d.runSystemStatus(cmd.Context(), cmd.OutOrStdout())
+			},
+		},
+		&cobra.Command{
+			Use:   "stop",
+			Short: "Stop the installed banger services",
+			Args:  noArgsUsage("usage: banger daemon stop"),
+			RunE: func(cmd *cobra.Command, args []string) error {
+				if err := requireRoot(); err != nil {
+					return err
+				}
+				if err := d.runSystemctl(cmd.Context(), "stop", installmeta.DefaultService, installmeta.DefaultRootHelperService); err != nil {
+					return err
+				}
+				_, err := fmt.Fprintln(cmd.OutOrStdout(), "stopped")
+				return err
+			},
+		},
+		&cobra.Command{
+			Use:   "socket",
+			Short: "Print the daemon socket path",
+			Args:  noArgsUsage("usage: banger daemon socket"),
+			RunE: func(cmd *cobra.Command, args []string) error {
+				layout := paths.ResolveSystem()
+				var err error
+				_, err = fmt.Fprintln(cmd.OutOrStdout(), layout.SocketPath)
+				return err
+			},
+		},
+	)
+	return cmd
+}
--- a/internal/cli/commands_image.go
+++ b/internal/cli/commands_image.go
@ -0,0 +1,302 @@
+package cli
+
+import (
+	"errors"
+	"fmt"
+	"strings"
+
+	"banger/internal/api"
+	"banger/internal/model"
+	"banger/internal/rpc"
+
+	"github.com/spf13/cobra"
+)
+
+func (d *deps) newImageCommand() *cobra.Command {
+	cmd := &cobra.Command{
+		Use:   "image",
+		Short: "Pull and manage banger images (rootfs + kernel + work-seed)",
+		Long: strings.TrimSpace(`
+A banger image bundles a rootfs.ext4, a kernel, an optional initrd
+ modules, and an optional work-seed (the snapshot used to populate
+each new VM's /root). Most users only need 'banger image pull
+<catalog-name>' for the cataloged paths (see internal/imagecat),
+or 'banger image pull <oci-ref>' for an OCI image.
+
+Subcommands:
+  pull       fetch a bundle by catalog name OR pull an OCI image
+  register   point banger at an existing local rootfs (advanced)
+  promote    copy a registered image's files into banger's managed dir
+  list       show what's installed
+  show       print one image's full record as JSON
+  delete     remove an image (no VMs may reference it)
+`),
+		Example: strings.TrimSpace(`
+  banger image pull debian-bookworm
+  banger image pull docker.io/library/alpine:3.20 --kernel-ref generic-6.12
+  banger image list
+`),
+		RunE: helpNoArgs,
+	}
+	cmd.AddCommand(
+		d.newImageRegisterCommand(),
+		d.newImagePullCommand(),
+		d.newImagePromoteCommand(),
+		d.newImageListCommand(),
+		d.newImageShowCommand(),
+		d.newImageDeleteCommand(),
+		d.newImageCacheCommand(),
+	)
+	return cmd
+}
+
+// newImageCacheCommand groups OCI-cache lifecycle subcommands. Today
+// the only one is `prune`; future additions (size, list, etc.) plug
+// in here without polluting the top-level `image` namespace.
+func (d *deps) newImageCacheCommand() *cobra.Command {
+	cmd := &cobra.Command{
+		Use:   "cache",
+		Short: "Manage banger's OCI layer-blob cache",
+		Long: strings.TrimSpace(`
+banger keeps a local copy of every OCI layer it downloads so a re-pull
+of the same image (or any image that shares a base layer) skips the
+network round-trip. The cache lives under the daemon's CacheDir
+(see 'banger doctor' or docs/config.md). Layers accumulate forever;
+'banger image cache prune' is the cheap way to reclaim disk.
+`),
+		Example: strings.TrimSpace(`
+  banger image cache prune --dry-run
+  banger image cache prune
+`),
+		RunE: helpNoArgs,
+	}
+	cmd.AddCommand(d.newImageCachePruneCommand())
+	return cmd
+}
+
+func (d *deps) newImageCachePruneCommand() *cobra.Command {
+	var dryRun bool
+	cmd := &cobra.Command{
+		Use:   "prune",
+		Short: "Remove every cached OCI layer blob",
+		Long: strings.TrimSpace(`
+Removes every layer blob under the OCI cache. Registered banger
+images are independent of the cache (each pull flattens layers into
+a self-contained ext4), so prune only loses re-pull avoidance — the
+next pull of the same image re-downloads the layers it needs.
+
+Safe to run any time the daemon is idle. If you have an image pull
+in flight when you run prune, that pull may fail and need a retry.
+
+--dry-run reports the byte count without removing anything.
+`),
+		Args: noArgsUsage("usage: banger image cache prune [--dry-run]"),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			layout, _, err := d.ensureDaemon(cmd.Context())
+			if err != nil {
+				return err
+			}
+			result, err := rpc.Call[api.ImageCachePruneResult](cmd.Context(), layout.SocketPath, "image.cache.prune", api.ImageCachePruneParams{DryRun: dryRun})
+			if err != nil {
+				return err
+			}
+			out := cmd.OutOrStdout()
+			verb := "freed"
+			if result.DryRun {
+				verb = "would free"
+			}
+			_, err = fmt.Fprintf(out, "%s %s across %d blob(s) in %s\n",
+				verb, humanSize(result.BytesFreed), result.BlobsFreed, result.CacheDir)
+			return err
+		},
+	}
+	cmd.Flags().BoolVar(&dryRun, "dry-run", false, "report the size that would be freed without deleting anything")
+	return cmd
+}
+
+func (d *deps) newImageRegisterCommand() *cobra.Command {
+	var params api.ImageRegisterParams
+	cmd := &cobra.Command{
+		Use:   "register",
+		Short: "Register or update an unmanaged image",
+		Args:  noArgsUsage("usage: banger image register --name <name> --rootfs <path> [--work-seed <path>] (--kernel <path> [--initrd <path>] [--modules <dir>] | --kernel-ref <name>)"),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			if strings.TrimSpace(params.KernelRef) != "" && (params.KernelPath != "" || params.InitrdPath != "" || params.ModulesDir != "") {
+				return errors.New("--kernel-ref is mutually exclusive with --kernel/--initrd/--modules")
+			}
+			if err := absolutizeImageRegisterPaths(&params); err != nil {
+				return err
+			}
+			layout, _, err := d.ensureDaemon(cmd.Context())
+			if err != nil {
+				return err
+			}
+			result, err := rpc.Call[api.ImageShowResult](cmd.Context(), layout.SocketPath, "image.register", params)
+			if err != nil {
+				return err
+			}
+			return printImageSummary(cmd.OutOrStdout(), result.Image)
+		},
+	}
+	cmd.Flags().StringVar(&params.Name, "name", "", "image name")
+	cmd.Flags().StringVar(&params.RootfsPath, "rootfs", "", "rootfs path")
+	cmd.Flags().StringVar(&params.WorkSeedPath, "work-seed", "", "work-seed path")
+	cmd.Flags().StringVar(&params.KernelPath, "kernel", "", "kernel path")
+	cmd.Flags().StringVar(&params.InitrdPath, "initrd", "", "initrd path")
+	cmd.Flags().StringVar(&params.ModulesDir, "modules", "", "modules dir")
+	cmd.Flags().StringVar(&params.KernelRef, "kernel-ref", "", "name of a cataloged kernel (see 'banger kernel list')")
+	_ = cmd.RegisterFlagCompletionFunc("kernel-ref", d.completeKernelNames)
+	return cmd
+}
+
+func (d *deps) newImagePullCommand() *cobra.Command {
+	var (
+		params  api.ImagePullParams
+		sizeRaw string
+	)
+	cmd := &cobra.Command{
+		Use:               "pull <name-or-oci-ref>",
+		Short:             "Pull an image bundle (catalog name) or OCI image and register it",
+		ValidArgsFunction: d.completeImageCatalogNameOnlyAtPos0,
+		Long: strings.TrimSpace(`
+Pull an image into banger. Two paths:
+
+  • Catalog name  (e.g. 'debian-bookworm')
+      Fetches a pre-built bundle from the embedded imagecat catalog.
+      Kernel-ref comes from the catalog entry; --kernel-ref still
+      overrides.
+
+  • OCI reference (e.g. 'docker.io/library/debian:bookworm')
+      Pulls the image, flattens its layers, fixes ownership, injects
+      banger's guest agents. --kernel-ref or direct --kernel/--initrd/
+      --modules are required.
+
+Use 'banger image list' to see installed images.
+`),
+		Example: strings.TrimSpace(`
+  banger image pull debian-bookworm
+  banger image pull debian-bookworm --name sandbox
+  banger image pull docker.io/library/debian:bookworm --kernel-ref generic-6.12
+`),
+		Args: exactArgsUsage(1, "usage: banger image pull <name-or-oci-ref> [--name <name>] [--kernel-ref <name>] [--kernel <path>] [--initrd <path>] [--modules <dir>] [--size <human>]"),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			params.Ref = args[0]
+			if strings.TrimSpace(params.KernelRef) != "" && (params.KernelPath != "" || params.InitrdPath != "" || params.ModulesDir != "") {
+				return errors.New("--kernel-ref is mutually exclusive with --kernel/--initrd/--modules")
+			}
+			if strings.TrimSpace(sizeRaw) != "" {
+				size, err := model.ParseSize(sizeRaw)
+				if err != nil {
+					return fmt.Errorf("--size: %w", err)
+				}
+				params.SizeBytes = size
+			}
+			if err := absolutizePaths(&params.KernelPath, &params.InitrdPath, &params.ModulesDir); err != nil {
+				return err
+			}
+			layout, _, err := d.ensureDaemon(cmd.Context())
+			if err != nil {
+				return err
+			}
+			var result api.ImageShowResult
+			err = withHeartbeat(cmd.ErrOrStderr(), "image pull", func() error {
+				var callErr error
+				result, callErr = rpc.Call[api.ImageShowResult](cmd.Context(), layout.SocketPath, "image.pull", params)
+				return callErr
+			})
+			if err != nil {
+				return err
+			}
+			return printImageSummary(cmd.OutOrStdout(), result.Image)
+		},
+	}
+	cmd.Flags().StringVar(&params.Name, "name", "", "image name (defaults to the ref's repo+tag, sanitised)")
+	cmd.Flags().StringVar(&params.KernelPath, "kernel", "", "kernel path")
+	cmd.Flags().StringVar(&params.InitrdPath, "initrd", "", "initrd path")
+	cmd.Flags().StringVar(&params.ModulesDir, "modules", "", "modules dir")
+	cmd.Flags().StringVar(&params.KernelRef, "kernel-ref", "", "name of a cataloged kernel (see 'banger kernel list')")
+	cmd.Flags().StringVar(&sizeRaw, "size", "", "ext4 image size, e.g. 4GiB, 512M, 2G (defaults to content + 25%, min 1GiB)")
+	_ = cmd.RegisterFlagCompletionFunc("kernel-ref", d.completeKernelNames)
+	return cmd
+}
+
+func (d *deps) newImagePromoteCommand() *cobra.Command {
+	return &cobra.Command{
+		Use:               "promote <id-or-name>",
+		Short:             "Promote an unmanaged image to a managed artifact",
+		Args:              exactArgsUsage(1, "usage: banger image promote <id-or-name>"),
+		ValidArgsFunction: d.completeImageNameOnlyAtPos0,
+		RunE: func(cmd *cobra.Command, args []string) error {
+			layout, _, err := d.ensureDaemon(cmd.Context())
+			if err != nil {
+				return err
+			}
+			result, err := rpc.Call[api.ImageShowResult](cmd.Context(), layout.SocketPath, "image.promote", api.ImageRefParams{IDOrName: args[0]})
+			if err != nil {
+				return err
+			}
+			return printImageSummary(cmd.OutOrStdout(), result.Image)
+		},
+	}
+}
+
+func (d *deps) newImageListCommand() *cobra.Command {
+	return &cobra.Command{
+		Use:     "list",
+		Aliases: []string{"ls"},
+		Short:   "List images",
+		Args:    noArgsUsage("usage: banger image list"),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			layout, _, err := d.ensureDaemon(cmd.Context())
+			if err != nil {
+				return err
+			}
+			result, err := rpc.Call[api.ImageListResult](cmd.Context(), layout.SocketPath, "image.list", api.Empty{})
+			if err != nil {
+				return err
+			}
+			return printImageListTable(cmd.OutOrStdout(), result.Images)
+		},
+	}
+}
+
+func (d *deps) newImageShowCommand() *cobra.Command {
+	return &cobra.Command{
+		Use:               "show <id-or-name>",
+		Short:             "Show image details",
+		Args:              exactArgsUsage(1, "usage: banger image show <id-or-name>"),
+		ValidArgsFunction: d.completeImageNameOnlyAtPos0,
+		RunE: func(cmd *cobra.Command, args []string) error {
+			layout, _, err := d.ensureDaemon(cmd.Context())
+			if err != nil {
+				return err
+			}
+			result, err := rpc.Call[api.ImageShowResult](cmd.Context(), layout.SocketPath, "image.show", api.ImageRefParams{IDOrName: args[0]})
+			if err != nil {
+				return err
+			}
+			return printJSON(cmd.OutOrStdout(), result.Image)
+		},
+	}
+}
+
+func (d *deps) newImageDeleteCommand() *cobra.Command {
+	return &cobra.Command{
+		Use:               "delete <id-or-name>",
+		Aliases:           []string{"rm"},
+		Short:             "Delete an image",
+		Args:              exactArgsUsage(1, "usage: banger image delete <id-or-name>"),
+		ValidArgsFunction: d.completeImageNameOnlyAtPos0,
+		RunE: func(cmd *cobra.Command, args []string) error {
+			layout, _, err := d.ensureDaemon(cmd.Context())
+			if err != nil {
+				return err
+			}
+			result, err := rpc.Call[api.ImageShowResult](cmd.Context(), layout.SocketPath, "image.delete", api.ImageRefParams{IDOrName: args[0]})
+			if err != nil {
+				return err
+			}
+			return printImageSummary(cmd.OutOrStdout(), result.Image)
+		},
+	}
+}
--- a/internal/cli/commands_internal.go
+++ b/internal/cli/commands_internal.go
@ -0,0 +1,441 @@
+package cli
+
+import (
+	"archive/tar"
+	"crypto/sha256"
+	"encoding/hex"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"io/fs"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"banger/internal/config"
+	"banger/internal/hostnat"
+	"banger/internal/imagecat"
+	"banger/internal/imagepull"
+	"banger/internal/model"
+	"banger/internal/paths"
+	"banger/internal/system"
+
+	"github.com/klauspost/compress/zstd"
+	"github.com/spf13/cobra"
+)
+
+func (d *deps) newInternalCommand() *cobra.Command {
+	cmd := &cobra.Command{
+		Use:    "internal",
+		Hidden: true,
+		RunE:   helpNoArgs,
+	}
+	cmd.AddCommand(
+		newInternalNATCommand(),
+		newInternalWorkSeedCommand(),
+		newInternalSSHKeyPathCommand(),
+		newInternalFirecrackerPathCommand(),
+		newInternalVSockAgentPathCommand(),
+		newInternalMakeBundleCommand(),
+	)
+	return cmd
+}
+
+func newInternalSSHKeyPathCommand() *cobra.Command {
+	return &cobra.Command{
+		Use:    "ssh-key-path",
+		Hidden: true,
+		Args:   noArgsUsage("usage: banger internal ssh-key-path"),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			layout, err := paths.Resolve()
+			if err != nil {
+				return err
+			}
+			cfg, err := config.Load(layout)
+			if err != nil {
+				return err
+			}
+			_, err = fmt.Fprintln(cmd.OutOrStdout(), cfg.SSHKeyPath)
+			return err
+		},
+	}
+}
+
+func newInternalFirecrackerPathCommand() *cobra.Command {
+	return &cobra.Command{
+		Use:    "firecracker-path",
+		Hidden: true,
+		Args:   noArgsUsage("usage: banger internal firecracker-path"),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			layout, err := paths.Resolve()
+			if err != nil {
+				return err
+			}
+			cfg, err := config.Load(layout)
+			if err != nil {
+				return err
+			}
+			if strings.TrimSpace(cfg.FirecrackerBin) == "" {
+				return errors.New("firecracker binary not configured; install firecracker or set firecracker_bin")
+			}
+			_, err = fmt.Fprintln(cmd.OutOrStdout(), cfg.FirecrackerBin)
+			return err
+		},
+	}
+}
+
+func newInternalVSockAgentPathCommand() *cobra.Command {
+	return &cobra.Command{
+		Use:    "vsock-agent-path",
+		Hidden: true,
+		Args:   noArgsUsage("usage: banger internal vsock-agent-path"),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			path, err := paths.CompanionBinaryPath("banger-vsock-agent")
+			if err != nil {
+				return err
+			}
+			_, err = fmt.Fprintln(cmd.OutOrStdout(), path)
+			return err
+		},
+	}
+}
+
+func newInternalMakeBundleCommand() *cobra.Command {
+	var (
+		rootfsTarPath string
+		name          string
+		distro        string
+		arch          string
+		kernelRef     string
+		description   string
+		sizeSpec      string
+		outPath       string
+	)
+	cmd := &cobra.Command{
+		Use:    "make-bundle",
+		Hidden: true,
+		Short:  "Build a banger image bundle (.tar.zst) from a flat rootfs tar",
+		Args:   noArgsUsage("usage: banger internal make-bundle --rootfs-tar <file|-> --name <n> --out <bundle.tar.zst>"),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			return runInternalMakeBundle(cmd, internalMakeBundleOpts{
+				rootfsTarPath: rootfsTarPath,
+				name:          name,
+				distro:        distro,
+				arch:          arch,
+				kernelRef:     kernelRef,
+				description:   description,
+				sizeSpec:      sizeSpec,
+				outPath:       outPath,
+			})
+		},
+	}
+	cmd.Flags().StringVar(&rootfsTarPath, "rootfs-tar", "", "flat rootfs tar file, or '-' for stdin")
+	cmd.Flags().StringVar(&name, "name", "", "bundle name (filesystem-safe identifier)")
+	cmd.Flags().StringVar(&distro, "distro", "", "distro label (e.g. debian)")
+	cmd.Flags().StringVar(&arch, "arch", "x86_64", "architecture label")
+	cmd.Flags().StringVar(&kernelRef, "kernel-ref", "", "kernelcat entry name this image pairs with")
+	cmd.Flags().StringVar(&description, "description", "", "short description")
+	cmd.Flags().StringVar(&sizeSpec, "size", "", "rootfs ext4 size (e.g. 4G); defaults to tree size + 25%")
+	cmd.Flags().StringVar(&outPath, "out", "", "output bundle path (.tar.zst)")
+	return cmd
+}
+
+type internalMakeBundleOpts struct {
+	rootfsTarPath string
+	name          string
+	distro        string
+	arch          string
+	kernelRef     string
+	description   string
+	sizeSpec      string
+	outPath       string
+}
+
+func runInternalMakeBundle(cmd *cobra.Command, opts internalMakeBundleOpts) error {
+	if err := imagecat.ValidateName(opts.name); err != nil {
+		return err
+	}
+	if strings.TrimSpace(opts.rootfsTarPath) == "" {
+		return errors.New("--rootfs-tar is required")
+	}
+	if strings.TrimSpace(opts.outPath) == "" {
+		return errors.New("--out is required")
+	}
+	if strings.TrimSpace(opts.arch) == "" {
+		opts.arch = "x86_64"
+	}
+
+	var sizeBytes int64
+	if s := strings.TrimSpace(opts.sizeSpec); s != "" {
+		n, err := model.ParseSize(s)
+		if err != nil {
+			return fmt.Errorf("parse --size: %w", err)
+		}
+		sizeBytes = n
+	}
+
+	ctx := cmd.Context()
+	stagingRoot, err := os.MkdirTemp("", "banger-mkbundle-")
+	if err != nil {
+		return err
+	}
+	defer os.RemoveAll(stagingRoot)
+	rootfsTree := filepath.Join(stagingRoot, "rootfs")
+	if err := os.MkdirAll(rootfsTree, 0o755); err != nil {
+		return err
+	}
+
+	var tarReader io.Reader
+	if opts.rootfsTarPath == "-" {
+		tarReader = cmd.InOrStdin()
+	} else {
+		f, err := os.Open(opts.rootfsTarPath)
+		if err != nil {
+			return fmt.Errorf("open rootfs tar: %w", err)
+		}
+		defer f.Close()
+		tarReader = f
+	}
+
+	fmt.Fprintln(cmd.ErrOrStderr(), "[make-bundle] extracting rootfs")
+	meta, err := imagepull.FlattenTar(ctx, tarReader, rootfsTree)
+	if err != nil {
+		return fmt.Errorf("flatten rootfs: %w", err)
+	}
+
+	// docker create drops /.dockerenv (and containerd drops
+	// /run/.containerenv) into the container's writable layer, so
+	// `docker export` includes them in the tar. systemd-detect-virt
+	// reads those files and flags the boot as virtualization=docker,
+	// which disables udev device-unit activation (including the work-
+	// disk dev-vdb.device) and leaves systemd waiting forever. Strip
+	// them before building the ext4.
+	for _, marker := range []string{".dockerenv", "run/.containerenv"} {
+		path := filepath.Join(rootfsTree, marker)
+		if err := os.Remove(path); err != nil && !os.IsNotExist(err) {
+			return fmt.Errorf("strip %s: %w", marker, err)
+		}
+		delete(meta.Entries, marker)
+	}
+
+	if sizeBytes <= 0 {
+		treeSize, err := dirSize(rootfsTree)
+		if err != nil {
+			return fmt.Errorf("size rootfs tree: %w", err)
+		}
+		// +50% headroom for ext4 overhead (inode tables, block-group
+		// descriptors, journal, 5% reserved margin).
+		sizeBytes = treeSize + treeSize/2
+		if sizeBytes < imagepull.MinExt4Size {
+			sizeBytes = imagepull.MinExt4Size
+		}
+	}
+
+	ext4Path := filepath.Join(stagingRoot, imagecat.RootfsFilename)
+	runner := system.NewRunner()
+	fmt.Fprintf(cmd.ErrOrStderr(), "[make-bundle] building rootfs.ext4 (%d bytes)\n", sizeBytes)
+	if err := imagepull.BuildExt4(ctx, runner, rootfsTree, ext4Path, sizeBytes); err != nil {
+		return fmt.Errorf("build ext4: %w", err)
+	}
+	fmt.Fprintln(cmd.ErrOrStderr(), "[make-bundle] applying ownership fixup")
+	if err := imagepull.ApplyOwnership(ctx, runner, ext4Path, meta); err != nil {
+		return fmt.Errorf("apply ownership: %w", err)
+	}
+	fmt.Fprintln(cmd.ErrOrStderr(), "[make-bundle] injecting guest agents")
+	vsockBin, err := paths.CompanionBinaryPath("banger-vsock-agent")
+	if err != nil {
+		return fmt.Errorf("locate vsock agent: %w", err)
+	}
+	if err := imagepull.InjectGuestAgents(ctx, runner, ext4Path, imagepull.GuestAgentAssets{VsockAgentBin: vsockBin}); err != nil {
+		return fmt.Errorf("inject guest agents: %w", err)
+	}
+
+	manifest := imagecat.Manifest{
+		Name:        opts.name,
+		Distro:      strings.TrimSpace(opts.distro),
+		Arch:        opts.arch,
+		KernelRef:   strings.TrimSpace(opts.kernelRef),
+		Description: strings.TrimSpace(opts.description),
+	}
+	manifestPath := filepath.Join(stagingRoot, imagecat.ManifestFilename)
+	manifestData, err := json.MarshalIndent(manifest, "", "  ")
+	if err != nil {
+		return err
+	}
+	if err := os.WriteFile(manifestPath, append(manifestData, '\n'), 0o644); err != nil {
+		return err
+	}
+
+	fmt.Fprintln(cmd.ErrOrStderr(), "[make-bundle] packaging bundle")
+	if err := writeBundleTarZst(opts.outPath, ext4Path, manifestPath); err != nil {
+		return fmt.Errorf("write bundle: %w", err)
+	}
+
+	sum, err := sha256HexFile(opts.outPath)
+	if err != nil {
+		return err
+	}
+	stat, err := os.Stat(opts.outPath)
+	if err != nil {
+		return err
+	}
+	fmt.Fprintf(cmd.OutOrStdout(), "bundle: %s\nsha256: %s\nsize:   %d\n", opts.outPath, sum, stat.Size())
+	return nil
+}
+
+func dirSize(root string) (int64, error) {
+	var total int64
+	err := filepath.WalkDir(root, func(_ string, d fs.DirEntry, err error) error {
+		if err != nil {
+			return err
+		}
+		if !d.Type().IsRegular() {
+			return nil
+		}
+		info, err := d.Info()
+		if err != nil {
+			return err
+		}
+		total += info.Size()
+		return nil
+	})
+	return total, err
+}
+
+func writeBundleTarZst(outPath, rootfsPath, manifestPath string) error {
+	if err := os.MkdirAll(filepath.Dir(outPath), 0o755); err != nil {
+		return err
+	}
+	out, err := os.OpenFile(outPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o644)
+	if err != nil {
+		return err
+	}
+	defer out.Close()
+	zw, err := zstd.NewWriter(out, zstd.WithEncoderLevel(zstd.SpeedBestCompression))
+	if err != nil {
+		return err
+	}
+	tw := tar.NewWriter(zw)
+	for _, src := range []struct{ path, name string }{
+		{rootfsPath, imagecat.RootfsFilename},
+		{manifestPath, imagecat.ManifestFilename},
+	} {
+		if err := writeBundleFile(tw, src.path, src.name); err != nil {
+			_ = tw.Close()
+			_ = zw.Close()
+			return err
+		}
+	}
+	if err := tw.Close(); err != nil {
+		_ = zw.Close()
+		return err
+	}
+	if err := zw.Close(); err != nil {
+		return err
+	}
+	return out.Close()
+}
+
+func writeBundleFile(tw *tar.Writer, src, name string) error {
+	f, err := os.Open(src)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+	fi, err := f.Stat()
+	if err != nil {
+		return err
+	}
+	if err := tw.WriteHeader(&tar.Header{
+		Name:     name,
+		Size:     fi.Size(),
+		Mode:     0o644,
+		Typeflag: tar.TypeReg,
+		ModTime:  fi.ModTime(),
+	}); err != nil {
+		return err
+	}
+	_, err = io.Copy(tw, f)
+	return err
+}
+
+func sha256HexFile(path string) (string, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		return "", err
+	}
+	defer f.Close()
+	h := sha256.New()
+	if _, err := io.Copy(h, f); err != nil {
+		return "", err
+	}
+	return hex.EncodeToString(h.Sum(nil)), nil
+}
+
+func newInternalWorkSeedCommand() *cobra.Command {
+	var rootfsPath string
+	var outPath string
+	cmd := &cobra.Command{
+		Use:    "work-seed",
+		Hidden: true,
+		Args:   noArgsUsage("usage: banger internal work-seed --rootfs <path> [--out <path>]"),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			rootfsPath = strings.TrimSpace(rootfsPath)
+			outPath = strings.TrimSpace(outPath)
+			if rootfsPath == "" {
+				return errors.New("rootfs path is required")
+			}
+			if outPath == "" {
+				outPath = system.WorkSeedPath(rootfsPath)
+			}
+			if err := system.EnsureSudo(cmd.Context()); err != nil {
+				return err
+			}
+			return system.BuildWorkSeedImage(cmd.Context(), system.NewRunner(), rootfsPath, outPath)
+		},
+	}
+	cmd.Flags().StringVar(&rootfsPath, "rootfs", "", "rootfs image path")
+	cmd.Flags().StringVar(&outPath, "out", "", "output work-seed image path")
+	return cmd
+}
+
+func newInternalNATCommand() *cobra.Command {
+	cmd := &cobra.Command{
+		Use:    "nat",
+		Hidden: true,
+		RunE:   helpNoArgs,
+	}
+	cmd.AddCommand(
+		newInternalNATActionCommand("up", true),
+		newInternalNATActionCommand("down", false),
+	)
+	return cmd
+}
+
+func newInternalNATActionCommand(use string, enable bool) *cobra.Command {
+	var guestIP string
+	var tapDevice string
+	cmd := &cobra.Command{
+		Use:    use,
+		Hidden: true,
+		Args:   noArgsUsage("usage: banger internal nat " + use + " --guest-ip <ip> --tap <tap-device>"),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			guestIP = strings.TrimSpace(guestIP)
+			tapDevice = strings.TrimSpace(tapDevice)
+			if guestIP == "" {
+				return errors.New("guest IP is required")
+			}
+			if tapDevice == "" {
+				return errors.New("tap device is required")
+			}
+			if err := system.EnsureSudo(cmd.Context()); err != nil {
+				return err
+			}
+			return hostnat.Ensure(cmd.Context(), system.NewRunner(), guestIP, tapDevice, enable)
+		},
+	}
+	cmd.Flags().StringVar(&guestIP, "guest-ip", "", "guest IPv4 address")
+	cmd.Flags().StringVar(&tapDevice, "tap", "", "tap device name")
+	return cmd
+}
--- a/internal/cli/commands_kernel.go
+++ b/internal/cli/commands_kernel.go
@ -0,0 +1,185 @@
+package cli
+
+import (
+	"errors"
+	"fmt"
+	"path/filepath"
+	"strings"
+
+	"banger/internal/api"
+	"banger/internal/rpc"
+
+	"github.com/spf13/cobra"
+)
+
+func (d *deps) newKernelCommand() *cobra.Command {
+	cmd := &cobra.Command{
+		Use:   "kernel",
+		Short: "Pull and manage Firecracker-compatible kernels",
+		Long: strings.TrimSpace(`
+Banger boots guests with a separate kernel artifact (vmlinux, plus
+optional initrd + modules). Kernels are tracked by name in a local
+catalog so multiple images can share one.
+
+Most users never run these commands directly: 'banger image pull'
+auto-pulls the kernel referenced by the catalog entry. Use these
+commands when you want to inspect what's installed, switch a VM to
+a different kernel via 'image register --kernel-ref', or import a
+kernel built locally with scripts/make-*-kernel.sh.
+
+Subcommands:
+  pull       download a cataloged kernel by name
+  list       show what's installed (or --available for the catalog)
+  show       inspect one entry as JSON
+  rm         remove a local kernel
+  import     register a kernel built from scripts/make-*-kernel.sh
+`),
+		Example: strings.TrimSpace(`
+  banger kernel list --available
+  banger kernel pull generic-6.12
+  banger kernel import void-kernel --from build/manual/void-kernel
+`),
+		RunE: helpNoArgs,
+	}
+	cmd.AddCommand(
+		d.newKernelListCommand(),
+		d.newKernelShowCommand(),
+		d.newKernelRmCommand(),
+		d.newKernelImportCommand(),
+		d.newKernelPullCommand(),
+	)
+	return cmd
+}
+
+func (d *deps) newKernelPullCommand() *cobra.Command {
+	var force bool
+	cmd := &cobra.Command{
+		Use:               "pull <name>",
+		Short:             "Download a cataloged kernel bundle",
+		Args:              exactArgsUsage(1, "usage: banger kernel pull <name> [--force]"),
+		ValidArgsFunction: d.completeKernelCatalogNameOnlyAtPos0,
+		RunE: func(cmd *cobra.Command, args []string) error {
+			layout, _, err := d.ensureDaemon(cmd.Context())
+			if err != nil {
+				return err
+			}
+			var result api.KernelShowResult
+			err = withHeartbeat(cmd.ErrOrStderr(), "kernel pull", func() error {
+				var callErr error
+				result, callErr = rpc.Call[api.KernelShowResult](cmd.Context(), layout.SocketPath, "kernel.pull", api.KernelPullParams{Name: args[0], Force: force})
+				return callErr
+			})
+			if err != nil {
+				return err
+			}
+			return printJSON(cmd.OutOrStdout(), result.Entry)
+		},
+	}
+	cmd.Flags().BoolVar(&force, "force", false, "re-pull even if already present")
+	return cmd
+}
+
+func (d *deps) newKernelImportCommand() *cobra.Command {
+	var params api.KernelImportParams
+	cmd := &cobra.Command{
+		Use:   "import <name>",
+		Short: "Import a kernel bundle produced by scripts/make-*-kernel.sh",
+		Long:  "Copy the kernel, optional initrd, and optional modules directory from <from> into the local kernel catalog keyed by <name>. <from> is usually build/manual/void-kernel or build/manual/alpine-kernel.",
+		Args:  exactArgsUsage(1, "usage: banger kernel import <name> --from <dir>"),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			params.Name = args[0]
+			if strings.TrimSpace(params.FromDir) == "" {
+				return errors.New("--from <dir> is required")
+			}
+			abs, err := filepath.Abs(params.FromDir)
+			if err != nil {
+				return err
+			}
+			params.FromDir = abs
+			layout, _, err := d.ensureDaemon(cmd.Context())
+			if err != nil {
+				return err
+			}
+			result, err := rpc.Call[api.KernelShowResult](cmd.Context(), layout.SocketPath, "kernel.import", params)
+			if err != nil {
+				return err
+			}
+			return printJSON(cmd.OutOrStdout(), result.Entry)
+		},
+	}
+	cmd.Flags().StringVar(&params.FromDir, "from", "", "directory produced by make-*-kernel.sh (e.g. build/manual/void-kernel)")
+	cmd.Flags().StringVar(&params.Distro, "distro", "", "distribution label stored in the manifest (e.g. void, alpine)")
+	cmd.Flags().StringVar(&params.Arch, "arch", "", "architecture label stored in the manifest (e.g. x86_64)")
+	return cmd
+}
+
+func (d *deps) newKernelListCommand() *cobra.Command {
+	var available bool
+	cmd := &cobra.Command{
+		Use:     "list",
+		Aliases: []string{"ls"},
+		Short:   "List kernels (local by default, or --available for the catalog)",
+		Args:    noArgsUsage("usage: banger kernel list [--available]"),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			layout, _, err := d.ensureDaemon(cmd.Context())
+			if err != nil {
+				return err
+			}
+			if available {
+				result, err := rpc.Call[api.KernelCatalogResult](cmd.Context(), layout.SocketPath, "kernel.catalog", api.Empty{})
+				if err != nil {
+					return err
+				}
+				return printKernelCatalogTable(cmd.OutOrStdout(), result.Entries)
+			}
+			result, err := rpc.Call[api.KernelListResult](cmd.Context(), layout.SocketPath, "kernel.list", api.Empty{})
+			if err != nil {
+				return err
+			}
+			return printKernelListTable(cmd.OutOrStdout(), result.Entries)
+		},
+	}
+	cmd.Flags().BoolVar(&available, "available", false, "show the built-in catalog (with pulled/available status) instead of local entries")
+	return cmd
+}
+
+func (d *deps) newKernelShowCommand() *cobra.Command {
+	return &cobra.Command{
+		Use:               "show <name>",
+		Short:             "Show kernel catalog entry details",
+		Args:              exactArgsUsage(1, "usage: banger kernel show <name>"),
+		ValidArgsFunction: d.completeKernelNameOnlyAtPos0,
+		RunE: func(cmd *cobra.Command, args []string) error {
+			layout, _, err := d.ensureDaemon(cmd.Context())
+			if err != nil {
+				return err
+			}
+			result, err := rpc.Call[api.KernelShowResult](cmd.Context(), layout.SocketPath, "kernel.show", api.KernelRefParams{Name: args[0]})
+			if err != nil {
+				return err
+			}
+			return printJSON(cmd.OutOrStdout(), result.Entry)
+		},
+	}
+}
+
+func (d *deps) newKernelRmCommand() *cobra.Command {
+	return &cobra.Command{
+		Use:               "rm <name>",
+		Aliases:           []string{"remove", "delete"},
+		Short:             "Remove a kernel catalog entry",
+		Args:              exactArgsUsage(1, "usage: banger kernel rm <name>"),
+		ValidArgsFunction: d.completeKernelNameOnlyAtPos0,
+		RunE: func(cmd *cobra.Command, args []string) error {
+			layout, _, err := d.ensureDaemon(cmd.Context())
+			if err != nil {
+				return err
+			}
+			if _, err := rpc.Call[api.Empty](cmd.Context(), layout.SocketPath, "kernel.delete", api.KernelRefParams{Name: args[0]}); err != nil {
+				return err
+			}
+			_, err = fmt.Fprintf(cmd.OutOrStdout(), "removed %s\n", args[0])
+			return err
+		},
+	}
+}
--- a/internal/cli/commands_ssh_config.go
+++ b/internal/cli/commands_ssh_config.go
@ -0,0 +1,102 @@
+package cli
+
+import (
+	"fmt"
+	"strings"
+
+	"banger/internal/config"
+	"banger/internal/daemon"
+	"banger/internal/paths"
+
+	"github.com/spf13/cobra"
+)
+
+// newSSHConfigCommand exposes the opt-in ergonomics for `ssh <name>.vm`.
+// Default mode prints current status + the exact Include line the user
+// can paste into ~/.ssh/config themselves. --install does the include
+// for them inside a marker-fenced block; --uninstall reverses it.
+func newSSHConfigCommand() *cobra.Command {
+	var (
+		install   bool
+		uninstall bool
+	)
+	cmd := &cobra.Command{
+		Use:   "ssh-config",
+		Short: "Enable plain 'ssh <name>.vm' from any terminal",
+		Long: `Banger keeps a self-contained SSH client config under its own config
+directory (never touching ~/.ssh/config on its own). Opt in to the
+convenience shortcut that lets you run 'ssh <name>.vm' from any
+terminal, bypassing 'banger vm ssh':
+
+  banger ssh-config              # print status + copy-paste snippet
+  banger ssh-config --install    # add an Include line to ~/.ssh/config
+  banger ssh-config --uninstall  # remove banger's Include from ~/.ssh/config
+
+After --install, 'ssh agent.vm' works the same as 'banger vm ssh
+agent', including for tools like rsync, scp, and editor remotes.
+`,
+		Example: strings.TrimSpace(`
+  banger ssh-config --install
+  ssh agent.vm
+  rsync -avz ./code agent.vm:/root/repo/
+`),
+		Args: noArgsUsage("usage: banger ssh-config [--install|--uninstall]"),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			if install && uninstall {
+				return fmt.Errorf("use only one of --install or --uninstall")
+			}
+			layout, err := paths.Resolve()
+			if err != nil {
+				return err
+			}
+			cfg, err := config.Load(layout)
+			if err != nil {
+				return err
+			}
+			if err := daemon.SyncVMSSHClientConfig(layout, cfg.SSHKeyPath); err != nil {
+				return err
+			}
+			bangerConfig := daemon.BangerSSHConfigPath(layout)
+			switch {
+			case install:
+				if err := daemon.InstallUserSSHInclude(layout); err != nil {
+					return err
+				}
+				_, err = fmt.Fprintf(cmd.OutOrStdout(),
+					"added Include %s to ~/.ssh/config — `ssh <name>.vm` will now route through banger\n",
+					bangerConfig,
+				)
+				return err
+			case uninstall:
+				if err := daemon.UninstallUserSSHInclude(); err != nil {
+					return err
+				}
+				_, err = fmt.Fprintln(cmd.OutOrStdout(), "removed banger's entries from ~/.ssh/config")
+				return err
+			default:
+				installed, err := daemon.UserSSHIncludeInstalled()
+				if err != nil {
+					return err
+				}
+				out := cmd.OutOrStdout()
+				fmt.Fprintf(out, "banger ssh_config: %s\n", bangerConfig)
+				if installed {
+					fmt.Fprintln(out, "status:            included from ~/.ssh/config")
+					fmt.Fprintln(out, "")
+					fmt.Fprintln(out, "`ssh <name>.vm` is enabled. Run `banger ssh-config --uninstall` to revert.")
+				} else {
+					fmt.Fprintln(out, "status:            not included (opt-in)")
+					fmt.Fprintln(out, "")
+					fmt.Fprintln(out, "Enable `ssh <name>.vm` in two ways:")
+					fmt.Fprintln(out, "  banger ssh-config --install")
+					fmt.Fprintln(out, "or add this line to ~/.ssh/config yourself:")
+					fmt.Fprintf(out, "  Include %s\n", bangerConfig)
+				}
+				return nil
+			}
+		},
+	}
+	cmd.Flags().BoolVar(&install, "install", false, "add an Include line to ~/.ssh/config")
+	cmd.Flags().BoolVar(&uninstall, "uninstall", false, "remove banger's Include from ~/.ssh/config")
+	return cmd
+}
--- a/internal/cli/commands_system.go
+++ b/internal/cli/commands_system.go
@ -0,0 +1,485 @@
+package cli
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"text/tabwriter"
+
+	"banger/internal/buildinfo"
+	"banger/internal/installmeta"
+	"banger/internal/model"
+	"banger/internal/paths"
+	"banger/internal/system"
+
+	"github.com/spf13/cobra"
+)
+
+const (
+	systemBangerBin      = "/usr/local/bin/banger"
+	systemBangerdBin     = "/usr/local/bin/bangerd"
+	systemCompanionDir   = "/usr/local/lib/banger"
+	systemCompanionAgent = systemCompanionDir + "/banger-vsock-agent"
+	systemdUserUnitPath  = "/etc/systemd/system/" + installmeta.DefaultService
+	systemdRootUnitPath  = "/etc/systemd/system/" + installmeta.DefaultRootHelperService
+	systemCoverDirEnv    = "BANGER_SYSTEM_GOCOVERDIR"
+	rootCoverDirEnv      = "BANGER_ROOT_HELPER_GOCOVERDIR"
+)
+
+func (d *deps) newSystemCommand() *cobra.Command {
+	var owner string
+	var purge bool
+	cmd := &cobra.Command{
+		Use:   "system",
+		Short: "Install banger's owner-daemon and root-helper systemd units",
+		Long: strings.TrimSpace(`
+Banger ships as two services: an owner-user daemon for
+orchestration and a narrow root helper for bridge/tap, NAT, and
+Firecracker launch. 'banger system' installs, restarts, inspects,
+and removes them.
+
+First-run flow (must be run as root):
+
+  sudo banger system install --owner $USER     install both services
+  banger system status                         confirm they're up
+  banger doctor                                check host readiness
+
+After 'install', the owner user can run 'banger ...' day to day
+without sudo. Subsequent invocations:
+
+  sudo banger system restart                   bounce both services
+  sudo banger system uninstall                 remove services + binaries
+  sudo banger system uninstall --purge         also delete /var/lib/banger
+
+See docs/privileges.md for the full trust model.
+`),
+		Example: strings.TrimSpace(`
+  sudo banger system install --owner alice
+  banger system status
+  sudo banger system uninstall --purge
+`),
+		RunE: helpNoArgs,
+	}
+	installCmd := &cobra.Command{
+		Use:   "install",
+		Short: "Install or refresh the owner daemon and root helper",
+		Args:  noArgsUsage("usage: banger system install [--owner USER]"),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			return d.runSystemInstall(cmd.Context(), cmd.OutOrStdout(), owner)
+		},
+	}
+	installCmd.Flags().StringVar(&owner, "owner", "", "login user who will operate banger day-to-day")
+
+	statusCmd := &cobra.Command{
+		Use:   "status",
+		Short: "Show owner-daemon and root-helper status",
+		Args:  noArgsUsage("usage: banger system status"),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			return d.runSystemStatus(cmd.Context(), cmd.OutOrStdout())
+		},
+	}
+
+	restartCmd := &cobra.Command{
+		Use:   "restart",
+		Short: "Restart the installed banger services",
+		Args:  noArgsUsage("usage: banger system restart"),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			if err := requireRoot(); err != nil {
+				return err
+			}
+			if err := d.runSystemctl(cmd.Context(), "restart", installmeta.DefaultRootHelperService); err != nil {
+				return err
+			}
+			if err := d.runSystemctl(cmd.Context(), "restart", installmeta.DefaultService); err != nil {
+				return err
+			}
+			if err := d.waitForDaemonReady(cmd.Context(), paths.ResolveSystem().SocketPath); err != nil {
+				return err
+			}
+			_, err := fmt.Fprintln(cmd.OutOrStdout(), "restarted")
+			return err
+		},
+	}
+
+	uninstallCmd := &cobra.Command{
+		Use:   "uninstall",
+		Short: "Remove the installed banger services",
+		Args:  noArgsUsage("usage: banger system uninstall [--purge]"),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			return d.runSystemUninstall(cmd.Context(), cmd.OutOrStdout(), purge)
+		},
+	}
+	uninstallCmd.Flags().BoolVar(&purge, "purge", false, "also delete system-owned banger state and cache")
+
+	cmd.AddCommand(installCmd, statusCmd, restartCmd, uninstallCmd)
+	return cmd
+}
+
+func (d *deps) runSystemInstall(ctx context.Context, out io.Writer, ownerFlag string) error {
+	if err := requireRoot(); err != nil {
+		return err
+	}
+	meta, err := resolveInstallOwner(ownerFlag)
+	if err != nil {
+		return err
+	}
+	info := buildinfo.Current()
+	meta.Version = info.Version
+	meta.Commit = info.Commit
+	meta.BuiltAt = info.BuiltAt
+	meta.InstalledAt = model.Now()
+
+	bangerBin, err := paths.BangerPath()
+	if err != nil {
+		return err
+	}
+	bangerdBin, err := paths.BangerdPath()
+	if err != nil {
+		return err
+	}
+	agentBin, err := paths.CompanionBinaryPath("banger-vsock-agent")
+	if err != nil {
+		return err
+	}
+	if err := os.MkdirAll(filepath.Dir(systemBangerBin), 0o755); err != nil {
+		return err
+	}
+	if err := os.MkdirAll(systemCompanionDir, 0o755); err != nil {
+		return err
+	}
+	if err := installFile(bangerBin, systemBangerBin, 0o755); err != nil {
+		return err
+	}
+	if err := installFile(bangerdBin, systemBangerdBin, 0o755); err != nil {
+		return err
+	}
+	if err := installFile(agentBin, systemCompanionAgent, 0o755); err != nil {
+		return err
+	}
+	if err := installmeta.Save(installmeta.DefaultPath, meta); err != nil {
+		return err
+	}
+	if err := paths.EnsureSystem(paths.ResolveSystem()); err != nil {
+		return err
+	}
+	if err := os.WriteFile(systemdRootUnitPath, []byte(renderRootHelperSystemdUnit()), 0o644); err != nil {
+		return err
+	}
+	if err := os.WriteFile(systemdUserUnitPath, []byte(renderSystemdUnit(meta)), 0o644); err != nil {
+		return err
+	}
+	if err := d.runSystemctl(ctx, "daemon-reload"); err != nil {
+		return err
+	}
+	if err := d.runSystemctl(ctx, "enable", installmeta.DefaultRootHelperService); err != nil {
+		return err
+	}
+	if err := d.runSystemctl(ctx, "enable", installmeta.DefaultService); err != nil {
+		return err
+	}
+	if err := d.runSystemctl(ctx, "restart", installmeta.DefaultRootHelperService); err != nil {
+		return err
+	}
+	if err := d.runSystemctl(ctx, "restart", installmeta.DefaultService); err != nil {
+		return err
+	}
+	if err := d.waitForDaemonReady(ctx, installmeta.DefaultSocketPath); err != nil {
+		return err
+	}
+	if _, err := fmt.Fprintln(out, "installed"); err != nil {
+		return err
+	}
+	w := tabwriter.NewWriter(out, 0, 8, 2, ' ', 0)
+	fmt.Fprintf(w, "owner\t%s\n", meta.OwnerUser)
+	fmt.Fprintf(w, "socket\t%s\n", installmeta.DefaultSocketPath)
+	fmt.Fprintf(w, "helper_socket\t%s\n", installmeta.DefaultRootHelperSocketPath)
+	fmt.Fprintf(w, "service\t%s\n", installmeta.DefaultService)
+	fmt.Fprintf(w, "helper_service\t%s\n", installmeta.DefaultRootHelperService)
+	return w.Flush()
+}
+
+func (d *deps) runSystemStatus(ctx context.Context, out io.Writer) error {
+	layout := paths.ResolveSystem()
+	active := d.systemctlQuery(ctx, "is-active", installmeta.DefaultService)
+	if active == "" {
+		active = "unknown"
+	}
+	enabled := d.systemctlQuery(ctx, "is-enabled", installmeta.DefaultService)
+	if enabled == "" {
+		enabled = "unknown"
+	}
+	helperActive := d.systemctlQuery(ctx, "is-active", installmeta.DefaultRootHelperService)
+	if helperActive == "" {
+		helperActive = "unknown"
+	}
+	helperEnabled := d.systemctlQuery(ctx, "is-enabled", installmeta.DefaultRootHelperService)
+	if helperEnabled == "" {
+		helperEnabled = "unknown"
+	}
+	w := tabwriter.NewWriter(out, 0, 8, 2, ' ', 0)
+	fmt.Fprintf(w, "service\t%s\n", installmeta.DefaultService)
+	fmt.Fprintf(w, "enabled\t%s\n", enabled)
+	fmt.Fprintf(w, "active\t%s\n", active)
+	fmt.Fprintf(w, "helper_service\t%s\n", installmeta.DefaultRootHelperService)
+	fmt.Fprintf(w, "helper_enabled\t%s\n", helperEnabled)
+	fmt.Fprintf(w, "helper_active\t%s\n", helperActive)
+	fmt.Fprintf(w, "socket\t%s\n", layout.SocketPath)
+	fmt.Fprintf(w, "helper_socket\t%s\n", installmeta.DefaultRootHelperSocketPath)
+	fmt.Fprintf(w, "log\tjournalctl -u %s -u %s\n", installmeta.DefaultService, installmeta.DefaultRootHelperService)
+	if ping, err := d.daemonPing(ctx, layout.SocketPath); err == nil {
+		info := buildinfo.Normalize(ping.Version, ping.Commit, ping.BuiltAt)
+		fmt.Fprintf(w, "pid\t%d\n", ping.PID)
+		fmt.Fprintf(w, "version\t%s\n", info.Version)
+		if info.Commit != "" {
+			fmt.Fprintf(w, "commit\t%s\n", info.Commit)
+		}
+		if info.BuiltAt != "" {
+			fmt.Fprintf(w, "built_at\t%s\n", info.BuiltAt)
+		}
+	}
+	return w.Flush()
+}
+
+func (d *deps) runSystemUninstall(ctx context.Context, out io.Writer, purge bool) error {
+	if err := requireRoot(); err != nil {
+		return err
+	}
+	_ = d.runSystemctl(ctx, "disable", "--now", installmeta.DefaultService, installmeta.DefaultRootHelperService)
+	_ = os.Remove(systemdUserUnitPath)
+	_ = os.Remove(systemdRootUnitPath)
+	_ = os.Remove(installmeta.DefaultPath)
+	_ = os.Remove(installmeta.DefaultDir)
+	_ = d.runSystemctl(ctx, "daemon-reload")
+	_ = os.Remove(systemBangerdBin)
+	_ = os.Remove(systemBangerBin)
+	_ = os.RemoveAll(systemCompanionDir)
+	if purge {
+		_ = os.RemoveAll(paths.ResolveSystem().StateDir)
+		_ = os.RemoveAll(paths.ResolveSystem().CacheDir)
+		_ = os.RemoveAll(paths.ResolveSystem().RuntimeDir)
+	}
+	msg := "uninstalled"
+	if purge {
+		msg += " (purged state)"
+	}
+	_, err := fmt.Fprintln(out, msg)
+	return err
+}
+
+func resolveInstallOwner(ownerFlag string) (installmeta.Metadata, error) {
+	owner := strings.TrimSpace(ownerFlag)
+	if owner == "" {
+		owner = strings.TrimSpace(os.Getenv("SUDO_USER"))
+	}
+	if owner == "" {
+		return installmeta.Metadata{}, errors.New("owner is required; pass --owner USER when installing without sudo")
+	}
+	if owner == "root" {
+		return installmeta.Metadata{}, errors.New("refusing to install with root as the banger owner")
+	}
+	return installmeta.LookupOwner(owner)
+}
+
+func renderSystemdUnit(meta installmeta.Metadata) string {
+	lines := []string{
+		"[Unit]",
+		"Description=banger daemon",
+		"After=network-online.target",
+		"Wants=network-online.target " + installmeta.DefaultRootHelperService,
+		"After=" + installmeta.DefaultRootHelperService,
+		"Requires=" + installmeta.DefaultRootHelperService,
+		"",
+		"[Service]",
+		"Type=simple",
+		"User=" + meta.OwnerUser,
+		"ExecStart=" + systemBangerdBin + " --system",
+		"Restart=on-failure",
+		"RestartSec=1s",
+		// KillMode=process: only signal the main PID on stop/restart.
+		// The default (control-group) sends SIGKILL to every process in
+		// the unit's cgroup, including descendants — and during `banger
+		// update` we restart this unit, which would terminate any
+		// in-flight subprocesses spawned by the daemon. The daemon
+		// shuts its own children down explicitly when needed.
+		"KillMode=process",
+		"Environment=PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
+		"Environment=TMPDIR=/run/banger",
+		"UMask=0077",
+		"NoNewPrivileges=yes",
+		"PrivateMounts=yes",
+		"ProtectSystem=strict",
+		"ProtectHome=read-only",
+		"ProtectControlGroups=yes",
+		"ProtectKernelLogs=yes",
+		"ProtectKernelModules=yes",
+		"ProtectClock=yes",
+		"ProtectHostname=yes",
+		"RestrictSUIDSGID=yes",
+		"LockPersonality=yes",
+		"SystemCallArchitectures=native",
+		"RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6 AF_NETLINK AF_VSOCK",
+		"StateDirectory=banger",
+		"StateDirectoryMode=0700",
+		"CacheDirectory=banger",
+		"CacheDirectoryMode=0700",
+		"RuntimeDirectory=banger",
+		"RuntimeDirectoryMode=0700",
+		// Keep /run/banger across stop/restart so the api-sock symlinks
+		// the helper creates for live VMs aren't wiped between the daemon
+		// stopping and the new daemon's reconcile re-attaching to them.
+		// Without this, `banger update` restarts the daemon, /run/banger
+		// is wiped, the api-sock symlinks vanish, and rediscoverHandles
+		// can't resolve the chroot path it needs to read jailer's pidfile.
+		"RuntimeDirectoryPreserve=yes",
+	}
+	if coverDir := strings.TrimSpace(os.Getenv(systemCoverDirEnv)); coverDir != "" {
+		lines = append(lines, "Environment=GOCOVERDIR="+systemdQuote(coverDir))
+	}
+	if home := strings.TrimSpace(meta.OwnerHome); home != "" {
+		lines = append(lines, "ReadOnlyPaths="+systemdQuote(home))
+	}
+	lines = append(lines,
+		"",
+		"[Install]",
+		"WantedBy=multi-user.target",
+		"",
+	)
+	return strings.Join(lines, "\n")
+}
+
+func renderRootHelperSystemdUnit() string {
+	lines := []string{
+		"[Unit]",
+		"Description=banger root helper",
+		"After=network-online.target",
+		"Wants=network-online.target",
+		"",
+		"[Service]",
+		"Type=simple",
+		"ExecStart=" + systemBangerdBin + " --root-helper",
+		"Restart=on-failure",
+		"RestartSec=1s",
+		// KillMode=process + SendSIGKILL=no together make the helper
+		// safe to restart while banger-launched firecrackers are
+		// running. firecracker lives in this unit's cgroup (jailer
+		// doesn't open a sub-cgroup), so:
+		//
+		//   - Default control-group mode SIGKILLs every process in
+		//     the cgroup on stop.
+		//   - KillMode=process limits the initial SIGTERM to the
+		//     helper main PID; systemd leaves remaining cgroup
+		//     processes alone (and logs "Unit process N (firecracker)
+		//     remains running after unit stopped").
+		//   - SendSIGKILL=no disables the FinalKillSignal escalation
+		//     that would otherwise SIGKILL leftovers after the timeout.
+		//
+		// One more pitfall: the firecracker SDK installs a default
+		// signal-forwarding goroutine in the helper that catches
+		// SIGTERM (etc.) and forwards it to every firecracker child.
+		// We disable that explicitly via ForwardSignals: []os.Signal{}
+		// in firecracker.buildConfig — without that override, systemd
+		// signaling the helper main would propagate to every running
+		// VM regardless of what these directives do.
+		//
+		// `banger system uninstall` and the daemon's vm-stop path
+		// explicitly stop firecracker processes when actually needed,
+		// so we don't lose the systemd-driven kill as a real safety
+		// net — banger drives those kills itself.
+		"KillMode=process",
+		"SendSIGKILL=no",
+		"Environment=PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
+		"Environment=TMPDIR=" + installmeta.DefaultRootHelperRuntimeDir,
+		"UMask=0077",
+		"NoNewPrivileges=yes",
+		"PrivateTmp=yes",
+		"PrivateMounts=yes",
+		"ProtectSystem=strict",
+		"ProtectHome=yes",
+		"ProtectControlGroups=yes",
+		"ProtectKernelLogs=yes",
+		"ProtectKernelModules=yes",
+		"ProtectClock=yes",
+		"ProtectHostname=yes",
+		"RestrictSUIDSGID=yes",
+		"LockPersonality=yes",
+		"SystemCallArchitectures=native",
+		"RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6 AF_NETLINK AF_VSOCK",
+		"CapabilityBoundingSet=CAP_CHOWN CAP_DAC_OVERRIDE CAP_FOWNER CAP_KILL CAP_MKNOD CAP_NET_ADMIN CAP_NET_RAW CAP_SETGID CAP_SETUID CAP_SYS_ADMIN CAP_SYS_CHROOT",
+		"ReadWritePaths=/var/lib/banger",
+		"RuntimeDirectory=banger-root",
+		"RuntimeDirectoryMode=0711",
+		// Same rationale as bangerd.service: the helper-managed
+		// /run/banger-root holds the helper's RPC socket and any
+		// per-VM scratch state; preserving it across restart keeps
+		// the daemon's reconnect path and reconcile re-attachment
+		// from racing against systemd's runtime-dir cleanup.
+		"RuntimeDirectoryPreserve=yes",
+	}
+	if coverDir := strings.TrimSpace(os.Getenv(rootCoverDirEnv)); coverDir != "" {
+		lines = append(lines, "Environment=GOCOVERDIR="+systemdQuote(coverDir))
+	}
+	lines = append(lines,
+		"",
+		"[Install]",
+		"WantedBy=multi-user.target",
+		"",
+	)
+	return strings.Join(lines, "\n")
+}
+
+func systemdQuote(value string) string {
+	return strconv.Quote(strings.TrimSpace(value))
+}
+
+func installFile(sourcePath, targetPath string, mode os.FileMode) error {
+	if err := os.MkdirAll(filepath.Dir(targetPath), 0o755); err != nil {
+		return err
+	}
+	tempPath := targetPath + ".tmp"
+	_ = os.Remove(tempPath)
+	if err := system.CopyFilePreferClone(sourcePath, tempPath); err != nil {
+		return err
+	}
+	if err := os.Chmod(tempPath, mode); err != nil {
+		_ = os.Remove(tempPath)
+		return err
+	}
+	if err := os.Rename(tempPath, targetPath); err != nil {
+		_ = os.Remove(tempPath)
+		return err
+	}
+	return nil
+}
+
+func requireRoot() error {
+	if os.Geteuid() == 0 {
+		return nil
+	}
+	return errors.New("this command requires root; run it with sudo")
+}
+
+func (d *deps) runSystemctl(ctx context.Context, args ...string) error {
+	_, err := d.hostCommandOutput(ctx, "systemctl", args...)
+	return err
+}
+
+func (d *deps) systemctlQuery(ctx context.Context, args ...string) string {
+	output, err := d.hostCommandOutput(ctx, "systemctl", args...)
+	if err == nil {
+		return strings.TrimSpace(string(output))
+	}
+	msg := strings.TrimSpace(string(output))
+	if msg != "" {
+		return msg
+	}
+	msg = strings.TrimSpace(err.Error())
+	if idx := strings.LastIndex(msg, ": "); idx >= 0 {
+		return strings.TrimSpace(msg[idx+2:])
+	}
+	return msg
+}
--- a/internal/cli/commands_update.go
+++ b/internal/cli/commands_update.go
@ -0,0 +1,420 @@
+package cli
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"time"
+
+	"banger/internal/api"
+	"banger/internal/buildinfo"
+	"banger/internal/installmeta"
+	"banger/internal/paths"
+	"banger/internal/rpc"
+	"banger/internal/updater"
+
+	"github.com/spf13/cobra"
+)
+
+// stagingTarballName is what the staged release tarball is saved as
+// inside the staging dir. Doesn't really matter (the path is internal
+// and ephemeral) but a stable name makes it easy to find for
+// debugging a stuck update.
+const stagingTarballName = "release.tar.gz"
+
+func (d *deps) newUpdateCommand() *cobra.Command {
+	var (
+		checkOnly   bool
+		dryRun      bool
+		force       bool
+		toVersion   string
+		manifestURL string
+		pubkeyFile  string
+	)
+	cmd := &cobra.Command{
+		Use:   "update",
+		Short: "Download and install a newer banger release",
+		Long: strings.TrimSpace(`
+Replace the running banger install with a newer release published
+to ` + updater.ManifestURL() + `.
+
+Flow:
+  1. Fetch the release manifest.
+  2. Refuse if any banger operation is in flight (use --force to skip).
+  3. Download tarball + SHA256SUMS, verify hashes.
+  4. Sanity-run the staged binaries; refuse if --check-migrations
+     reports the new bangerd can't open this host's state DB.
+  5. Atomically swap binaries; restart bangerd-root + bangerd.
+  6. Run banger doctor; auto-roll back on failure.
+  7. Update install metadata with the new version triple.
+
+Steps 1-4 are non-destructive — failures abort with the install
+untouched. Step 5+ is the cutover; auto-rollback in step 6 covers
+the half-failed-update case.
+
+Requires root: the swap writes /usr/local/bin and the restart
+talks to systemd. Run with sudo.
+`),
+		Example: strings.TrimSpace(`
+  banger update --check
+  sudo banger update
+  sudo banger update --to v0.1.1
+  sudo banger update --dry-run
+`),
+		Args: noArgsUsage("usage: banger update [--check] [--dry-run] [--force] [--to vX.Y.Z]"),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			return d.runUpdate(cmd, runUpdateOpts{
+				checkOnly:   checkOnly,
+				dryRun:      dryRun,
+				force:       force,
+				toVersion:   toVersion,
+				manifestURL: manifestURL,
+				pubkeyFile:  pubkeyFile,
+			})
+		},
+	}
+	cmd.Flags().BoolVar(&checkOnly, "check", false, "report whether a newer release is available, then exit")
+	cmd.Flags().BoolVar(&dryRun, "dry-run", false, "fetch and verify, but do not swap or restart anything")
+	cmd.Flags().BoolVar(&force, "force", false, "skip in-flight-op refusal and post-restart doctor verification")
+	cmd.Flags().StringVar(&toVersion, "to", "", "specific release version to install (default: latest_stable from manifest)")
+	// Hidden test/dev hooks: redirect the updater at a non-default
+	// manifest URL and trust a non-default cosign public key. Used by
+	// the smoke suite to drive a real update against locally-built
+	// release artefacts. Production users have no reason to touch
+	// these; they are not advertised in --help.
+	cmd.Flags().StringVar(&manifestURL, "manifest-url", "", "")
+	cmd.Flags().StringVar(&pubkeyFile, "pubkey-file", "", "")
+	_ = cmd.Flags().MarkHidden("manifest-url")
+	_ = cmd.Flags().MarkHidden("pubkey-file")
+	return cmd
+}
+
+type runUpdateOpts struct {
+	checkOnly   bool
+	dryRun      bool
+	force       bool
+	toVersion   string
+	manifestURL string
+	pubkeyFile  string
+}
+
+func (d *deps) runUpdate(cmd *cobra.Command, opts runUpdateOpts) error {
+	ctx := cmd.Context()
+	out := cmd.OutOrStdout()
+
+	// Resolve the test/dev override flags up front so a bad
+	// --pubkey-file fails fast before any network round-trips.
+	pubKeyPEM := updater.BangerReleasePublicKey
+	if strings.TrimSpace(opts.pubkeyFile) != "" {
+		body, err := os.ReadFile(opts.pubkeyFile)
+		if err != nil {
+			return fmt.Errorf("read --pubkey-file: %w", err)
+		}
+		pubKeyPEM = string(body)
+	}
+
+	// Discover.
+	client := &http.Client{Timeout: 30 * time.Second}
+	var (
+		manifest updater.Manifest
+		err      error
+	)
+	if strings.TrimSpace(opts.manifestURL) != "" {
+		manifest, err = updater.FetchManifestFrom(ctx, client, opts.manifestURL)
+	} else {
+		manifest, err = updater.FetchManifest(ctx, client)
+	}
+	if err != nil {
+		return fmt.Errorf("discover: %w", err)
+	}
+	var target updater.Release
+	if strings.TrimSpace(opts.toVersion) != "" {
+		target, err = manifest.LookupRelease(opts.toVersion)
+	} else {
+		target, err = manifest.Latest()
+	}
+	if err != nil {
+		return fmt.Errorf("resolve target release: %w", err)
+	}
+
+	currentVersion := buildinfo.Current().Version
+	if opts.checkOnly {
+		return reportCheckResult(out, currentVersion, target.Version)
+	}
+	if currentVersion == target.Version {
+		fmt.Fprintf(out, "already on %s\n", target.Version)
+		return nil
+	}
+
+	// Past this point we're going to mutate the host. Require root.
+	if err := requireRoot(); err != nil {
+		return err
+	}
+	socketPath := paths.ResolveSystem().SocketPath
+
+	// Refuse if anything is in flight.
+	if !opts.force {
+		if err := refuseIfInFlight(ctx, socketPath); err != nil {
+			return err
+		}
+	}
+
+	// Stage the download.
+	stagingDir := updater.DefaultStagingDir(paths.ResolveSystem().CacheDir)
+	if err := updater.PrepareCleanStaging(stagingDir); err != nil {
+		return fmt.Errorf("staging: %w", err)
+	}
+	tarballPath := filepath.Join(stagingDir, stagingTarballName)
+	fmt.Fprintf(out, "downloading %s …\n", target.TarballURL)
+	sumsBody, err := updater.DownloadRelease(ctx, client, target, tarballPath)
+	if err != nil {
+		return fmt.Errorf("download: %w", err)
+	}
+	if err := updater.FetchAndVerifySignatureWithKey(ctx, client, target, sumsBody, pubKeyPEM); err != nil {
+		// Don't leave the staged tarball around — it failed
+		// signature verification and shouldn't be re-runnable.
+		_ = os.Remove(tarballPath)
+		return fmt.Errorf("signature: %w", err)
+	}
+	stagedDir := filepath.Join(stagingDir, "staged")
+	if err := os.RemoveAll(stagedDir); err != nil && !os.IsNotExist(err) {
+		return err
+	}
+	staged, err := updater.StageTarball(tarballPath, stagedDir)
+	if err != nil {
+		return fmt.Errorf("stage: %w", err)
+	}
+
+	// Sanity-run the staged binaries.
+	if err := sanityRunStaged(ctx, staged, target.Version); err != nil {
+		return fmt.Errorf("sanity check: %w", err)
+	}
+
+	if opts.dryRun {
+		fmt.Fprintf(out, "dry-run: would install %s → %s, restart services, run doctor\n", currentVersion, target.Version)
+		return nil
+	}
+
+	// Swap.
+	targets := updater.DefaultInstallTargets()
+	swap, err := updater.Swap(staged, targets)
+	if err != nil {
+		// Best-effort rollback of any partial swap that did land
+		// before failure. If rollback also fails we surface both.
+		if rbErr := updater.Rollback(swap); rbErr != nil {
+			return fmt.Errorf("swap: %w (rollback also failed: %v)", err, rbErr)
+		}
+		return fmt.Errorf("swap: %w (rolled back)", err)
+	}
+
+	// Restart services + wait for the new daemon. A `systemctl restart`
+	// that fails has typically already STOPPED the unit, so the prior
+	// binary on disk isn't running anywhere — Rollback() must be paired
+	// with a re-restart to bring the rolled-back binary back into a
+	// running state. That's rollbackAndRestart's job; rollbackAndWrap
+	// is for the swap-step failures earlier where the restart never
+	// fired and the old binary is still in memory.
+	if err := d.runSystemctl(ctx, "restart", installmeta.DefaultRootHelperService); err != nil {
+		return rollbackAndRestart(ctx, d, swap, "restart helper", err)
+	}
+	if err := d.runSystemctl(ctx, "restart", installmeta.DefaultService); err != nil {
+		return rollbackAndRestart(ctx, d, swap, "restart daemon", err)
+	}
+	if err := d.waitForDaemonReady(ctx, socketPath); err != nil {
+		return rollbackAndRestart(ctx, d, swap, "wait daemon ready", err)
+	}
+
+	// Verify with doctor unless --force says otherwise.
+	if !opts.force {
+		if err := runPostUpdateDoctor(ctx, d, cmd); err != nil {
+			return rollbackAndRestart(ctx, d, swap, "post-update doctor", err)
+		}
+	}
+
+	// Finalise: refresh install metadata, drop backups, clean staging.
+	// Read the new binary's identity by exec'ing it; buildinfo.Current()
+	// reflects the OLD running CLI (we're it), so the commit + built_at
+	// have to come from the freshly-swapped /usr/local/bin/banger or
+	// install.toml ends up with mixed-version fields.
+	newInfo, err := readInstalledBuildinfo(ctx, targets.Banger)
+	if err != nil {
+		fmt.Fprintf(out, "warning: read installed buildinfo: %v\n", err)
+		// Fall back to the manifest version + the running binary's
+		// commit/built_at. install.toml drift is a doctor warning,
+		// not a broken host, so don't fail the update.
+		old := buildinfo.Current()
+		newInfo = buildinfo.Info{Version: target.Version, Commit: old.Commit, BuiltAt: old.BuiltAt}
+	}
+	if err := installmeta.UpdateBuildInfo(installmeta.DefaultPath, newInfo.Version, newInfo.Commit, newInfo.BuiltAt); err != nil {
+		fmt.Fprintf(out, "warning: update install metadata: %v\n", err)
+	}
+	if err := updater.CleanupBackups(swap); err != nil {
+		fmt.Fprintf(out, "warning: cleanup backups: %v\n", err)
+	}
+	_ = os.RemoveAll(stagingDir)
+
+	fmt.Fprintf(out, "updated %s → %s\n", currentVersion, target.Version)
+	return nil
+}
+
+func reportCheckResult(out io.Writer, current, latest string) error {
+	if current == latest {
+		fmt.Fprintf(out, "up to date (%s)\n", current)
+		return nil
+	}
+	fmt.Fprintf(out, "update available: %s → %s\n", current, latest)
+	return nil
+}
+
+// refuseIfInFlight asks the running daemon for in-flight operations
+// and refuses the update if any are not Done. Per the v0.1.0 plan:
+// no wait, no drain — the operator runs `banger update` on an idle
+// host or passes --force.
+func refuseIfInFlight(ctx context.Context, socketPath string) error {
+	res, err := rpc.Call[api.OperationsListResult](ctx, socketPath, "daemon.operations.list", nil)
+	if err != nil {
+		// A daemon that's down or unreachable is itself a reason to
+		// refuse — we'd be unable to verify anything. Surface that
+		// clearly rather than blindly proceeding.
+		return fmt.Errorf("contact daemon: %w (use --force to override)", err)
+	}
+	pending := []string{}
+	for _, op := range res.Operations {
+		if op.Done {
+			continue
+		}
+		pending = append(pending, fmt.Sprintf("%s/%s (stage=%s)", op.Kind, op.ID, op.Stage))
+	}
+	if len(pending) > 0 {
+		return fmt.Errorf("refusing update: %d in-flight operation(s): %s", len(pending), strings.Join(pending, ", "))
+	}
+	return nil
+}
+
+// sanityRunStaged executes the staged banger and bangerd to confirm
+// they can at least print their own version + report schema state.
+// Catches obvious-broken binaries (wrong arch, missing libs,
+// embedded panics) before we swap them into place.
+func sanityRunStaged(ctx context.Context, staged updater.StagedRelease, expectedVersion string) error {
+	// banger --version: must succeed and mention the expected version
+	// somewhere (the format is "banger vX.Y.Z (commit ..., built ...)").
+	out, err := exec.CommandContext(ctx, staged.BangerPath, "--version").CombinedOutput()
+	if err != nil {
+		return fmt.Errorf("staged banger --version: %w (%s)", err, strings.TrimSpace(string(out)))
+	}
+	if !strings.Contains(string(out), expectedVersion) {
+		return fmt.Errorf("staged banger --version reported %q, expected to mention %s", strings.TrimSpace(string(out)), expectedVersion)
+	}
+
+	// bangerd --check-migrations against the configured DB. Exit 2
+	// means incompatible — we refuse to swap. Exit 0 (compatible) and
+	// exit 1 (migrations needed; will auto-apply on first Open) are
+	// both acceptable.
+	out, err = exec.CommandContext(ctx, staged.BangerdPath, "--check-migrations", "--system").CombinedOutput()
+	if err != nil {
+		var exitErr *exec.ExitError
+		if errors.As(err, &exitErr) && exitErr.ExitCode() == 1 {
+			return nil // migrations-needed; safe to proceed
+		}
+		if errors.As(err, &exitErr) && exitErr.ExitCode() == 2 {
+			return fmt.Errorf("staged bangerd would not open this host's state DB: %s", strings.TrimSpace(string(out)))
+		}
+		return fmt.Errorf("staged bangerd --check-migrations: %w (%s)", err, strings.TrimSpace(string(out)))
+	}
+	return nil
+}
+
+// readInstalledBuildinfo execs the just-swapped banger binary, parses
+// its three-line `version` output, and returns the parsed identity.
+// Used to refresh install.toml after an update so the on-disk record
+// reflects the binary that's actually installed — buildinfo.Current()
+// in the running process is the OLD binary's identity, not the one we
+// just put on disk.
+//
+// Output shape (from internal/cli/banger.go versionString):
+//
+//	version: vX.Y.Z
+//	commit: <sha>
+//	built_at: <RFC3339>
+func readInstalledBuildinfo(ctx context.Context, bangerPath string) (buildinfo.Info, error) {
+	out, err := exec.CommandContext(ctx, bangerPath, "version").Output()
+	if err != nil {
+		return buildinfo.Info{}, fmt.Errorf("exec %s version: %w", bangerPath, err)
+	}
+	return parseVersionOutput(string(out))
+}
+
+// parseVersionOutput extracts the three identity fields from
+// `banger version`. Split out of readInstalledBuildinfo so it can be
+// unit-tested without exec'ing a real binary.
+func parseVersionOutput(out string) (buildinfo.Info, error) {
+	var info buildinfo.Info
+	for _, line := range strings.Split(out, "\n") {
+		k, v, ok := strings.Cut(line, ":")
+		if !ok {
+			continue
+		}
+		switch strings.TrimSpace(k) {
+		case "version":
+			info.Version = strings.TrimSpace(v)
+		case "commit":
+			info.Commit = strings.TrimSpace(v)
+		case "built_at":
+			info.BuiltAt = strings.TrimSpace(v)
+		}
+	}
+	if info.Version == "" || info.Commit == "" || info.BuiltAt == "" {
+		return buildinfo.Info{}, fmt.Errorf("could not parse version/commit/built_at from %q", strings.TrimSpace(out))
+	}
+	return info, nil
+}
+
+// runPostUpdateDoctor invokes `banger doctor` on the JUST-INSTALLED
+// CLI (not d.doctor — that's the in-process implementation; we want
+// to exercise the new binary end-to-end).
+func runPostUpdateDoctor(ctx context.Context, d *deps, cmd *cobra.Command) error {
+	out, err := exec.CommandContext(ctx, "/usr/local/bin/banger", "doctor").CombinedOutput()
+	if err != nil {
+		return fmt.Errorf("doctor: %w\n%s", err, string(out))
+	}
+	// banger doctor prints to stdout regardless of pass/fail; print
+	// it through so the operator can see the new install's check
+	// result. (Doctor's exit code is what we trust; printing is
+	// just operator UX.)
+	fmt.Fprintln(cmd.OutOrStdout(), strings.TrimSpace(string(out)))
+	return nil
+}
+
+// rollbackAndWrap is for failures BEFORE we restarted services. The
+// previous binaries are still on disk under .previous; restoring them
+// is an atomic-rename, no service involvement needed (the OLD daemon
+// is still running because the restart never happened).
+func rollbackAndWrap(swap updater.SwapResult, stage string, err error) error {
+	if rbErr := updater.Rollback(swap); rbErr != nil {
+		return fmt.Errorf("%s failed: %w (rollback also failed: %v; install is broken)", stage, err, rbErr)
+	}
+	return fmt.Errorf("%s failed: %w (rolled back to previous install)", stage, err)
+}
+
+// rollbackAndRestart is for failures AFTER the service restart. We
+// roll back binaries AND re-restart so the OLD versions take over
+// again. If even that fails, the install is broken; surface
+// everything we know.
+func rollbackAndRestart(ctx context.Context, d *deps, swap updater.SwapResult, stage string, err error) error {
+	if rbErr := updater.Rollback(swap); rbErr != nil {
+		return fmt.Errorf("%s failed: %w (rollback also failed: %v; install is broken)", stage, err, rbErr)
+	}
+	if rsErr := d.runSystemctl(ctx, "restart", installmeta.DefaultRootHelperService); rsErr != nil {
+		return fmt.Errorf("%s failed: %w (restored binaries but failed to restart helper: %v)", stage, err, rsErr)
+	}
+	if rsErr := d.runSystemctl(ctx, "restart", installmeta.DefaultService); rsErr != nil {
+		return fmt.Errorf("%s failed: %w (restored binaries but failed to restart daemon: %v)", stage, err, rsErr)
+	}
+	return fmt.Errorf("%s failed: %w (rolled back to previous install)", stage, err)
+}
--- a/internal/cli/commands_update_test.go
+++ b/internal/cli/commands_update_test.go
@ -0,0 +1,79 @@
+package cli
+
+import "testing"
+
+func TestParseVersionOutput(t *testing.T) {
+	cases := []struct {
+		name        string
+		in          string
+		wantVersion string
+		wantCommit  string
+		wantBuilt   string
+		wantErr     bool
+	}{
+		{
+			name: "happy path — three-line shape from banger version",
+			in: `version: v0.1.2
+commit: a0b5c7fa3ca95a37ba99b35280fc75e5647b59e8
+built_at: 2026-04-29T17:34:45Z
+`,
+			wantVersion: "v0.1.2",
+			wantCommit:  "a0b5c7fa3ca95a37ba99b35280fc75e5647b59e8",
+			wantBuilt:   "2026-04-29T17:34:45Z",
+		},
+		{
+			name: "tolerates extra whitespace around the values",
+			in: `  version :   v0.1.2
+  commit :    abc123
+  built_at :  2026-01-01T00:00:00Z`,
+			wantVersion: "v0.1.2",
+			wantCommit:  "abc123",
+			wantBuilt:   "2026-01-01T00:00:00Z",
+		},
+		{
+			name:    "missing commit field is rejected",
+			in:      "version: v0.1.2\nbuilt_at: 2026-01-01T00:00:00Z\n",
+			wantErr: true,
+		},
+		{
+			name:    "empty input is rejected",
+			in:      "",
+			wantErr: true,
+		},
+		{
+			name: "unrelated lines are ignored",
+			in: `banger v0.1.2
+some other diagnostic line: with a colon
+version: v0.1.2
+commit: abc
+built_at: 2026-01-01T00:00:00Z
+`,
+			wantVersion: "v0.1.2",
+			wantCommit:  "abc",
+			wantBuilt:   "2026-01-01T00:00:00Z",
+		},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			got, err := parseVersionOutput(tc.in)
+			if tc.wantErr {
+				if err == nil {
+					t.Fatalf("want error, got nil; parsed=%+v", got)
+				}
+				return
+			}
+			if err != nil {
+				t.Fatalf("unexpected error: %v", err)
+			}
+			if got.Version != tc.wantVersion {
+				t.Errorf("Version: got %q, want %q", got.Version, tc.wantVersion)
+			}
+			if got.Commit != tc.wantCommit {
+				t.Errorf("Commit: got %q, want %q", got.Commit, tc.wantCommit)
+			}
+			if got.BuiltAt != tc.wantBuilt {
+				t.Errorf("BuiltAt: got %q, want %q", got.BuiltAt, tc.wantBuilt)
+			}
+		})
+	}
+}
--- a/internal/cli/commands_vm.go
+++ b/internal/cli/commands_vm.go
--- a/internal/cli/completion.go
+++ b/internal/cli/completion.go
@ -0,0 +1,191 @@
+package cli
+
+import (
+	"context"
+
+	"banger/internal/api"
+	"banger/internal/paths"
+	"banger/internal/rpc"
+
+	"github.com/spf13/cobra"
+)
+
+// Completion helpers. Design notes:
+//
+//   - Never auto-start the daemon. If it isn't running, return no
+//     suggestions + NoFileComp so the shell doesn't fall back to file
+//     completion (there are no local files that would plausibly match a
+//     VM or image name).
+//   - Filter out names already in args — avoids suggesting the same VM
+//     twice on variadic commands like `vm stop a b <tab>`.
+//   - Fail silently. Completion is advisory; any error path returns an
+//     empty suggestion list rather than propagating to the user.
+
+// defaultCompletionLister backs the *deps.completionLister field;
+// tests inject their own fake via the struct instead of mutating
+// package-level vars.
+func defaultCompletionLister(ctx context.Context, socketPath, method string) ([]string, error) {
+	switch method {
+	case "vm.list":
+		result, err := rpc.Call[api.VMListResult](ctx, socketPath, method, api.Empty{})
+		if err != nil {
+			return nil, err
+		}
+		names := make([]string, 0, len(result.VMs))
+		for _, vm := range result.VMs {
+			if vm.Name != "" {
+				names = append(names, vm.Name)
+			}
+		}
+		return names, nil
+	case "image.list":
+		result, err := rpc.Call[api.ImageListResult](ctx, socketPath, method, api.Empty{})
+		if err != nil {
+			return nil, err
+		}
+		names := make([]string, 0, len(result.Images))
+		for _, image := range result.Images {
+			if image.Name != "" {
+				names = append(names, image.Name)
+			}
+		}
+		return names, nil
+	case "kernel.list":
+		result, err := rpc.Call[api.KernelListResult](ctx, socketPath, method, api.Empty{})
+		if err != nil {
+			return nil, err
+		}
+		names := make([]string, 0, len(result.Entries))
+		for _, entry := range result.Entries {
+			if entry.Name != "" {
+				names = append(names, entry.Name)
+			}
+		}
+		return names, nil
+	}
+	return nil, nil
+}
+
+// daemonSocketForCompletion returns the socket path IFF the daemon is
+// already running. Returns "", false when no daemon is up — completion
+// callers use this as the bail signal.
+func (d *deps) daemonSocketForCompletion(ctx context.Context) (string, bool) {
+	layout := paths.ResolveSystem()
+	if _, err := d.daemonPing(ctx, layout.SocketPath); err != nil {
+		return "", false
+	}
+	return layout.SocketPath, true
+}
+
+// filterPrefix returns the subset of candidates starting with toComplete
+// that aren't in exclude. Comparison is case-sensitive because VM/image
+// names preserve case.
+func filterPrefix(candidates, exclude []string, toComplete string) []string {
+	excludeSet := make(map[string]struct{}, len(exclude))
+	for _, e := range exclude {
+		excludeSet[e] = struct{}{}
+	}
+	out := make([]string, 0, len(candidates))
+	for _, c := range candidates {
+		if _, skip := excludeSet[c]; skip {
+			continue
+		}
+		if toComplete == "" || hasPrefix(c, toComplete) {
+			out = append(out, c)
+		}
+	}
+	return out
+}
+
+func hasPrefix(s, prefix string) bool {
+	return len(s) >= len(prefix) && s[:len(prefix)] == prefix
+}
+
+func (d *deps) completeVMNames(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
+	socket, ok := d.daemonSocketForCompletion(cmd.Context())
+	if !ok {
+		return nil, cobra.ShellCompDirectiveNoFileComp
+	}
+	names, err := d.completionLister(cmd.Context(), socket, "vm.list")
+	if err != nil {
+		return nil, cobra.ShellCompDirectiveNoFileComp
+	}
+	return filterPrefix(names, args, toComplete), cobra.ShellCompDirectiveNoFileComp
+}
+
+// completeVMNameOnlyAtPos0 restricts VM-name completion to the first
+// positional argument. Used by commands like `vm ssh <vm> [ssh args...]`
+// where args after pos 0 are free-form.
+func (d *deps) completeVMNameOnlyAtPos0(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
+	if len(args) > 0 {
+		return nil, cobra.ShellCompDirectiveNoFileComp
+	}
+	return d.completeVMNames(cmd, args, toComplete)
+}
+
+func (d *deps) completeImageNameOnlyAtPos0(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
+	if len(args) > 0 {
+		return nil, cobra.ShellCompDirectiveNoFileComp
+	}
+	return d.completeImageNames(cmd, args, toComplete)
+}
+
+func (d *deps) completeKernelNameOnlyAtPos0(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
+	if len(args) > 0 {
+		return nil, cobra.ShellCompDirectiveNoFileComp
+	}
+	return d.completeKernelNames(cmd, args, toComplete)
+}
+
+func (d *deps) completeImageNames(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
+	socket, ok := d.daemonSocketForCompletion(cmd.Context())
+	if !ok {
+		return nil, cobra.ShellCompDirectiveNoFileComp
+	}
+	names, err := d.completionLister(cmd.Context(), socket, "image.list")
+	if err != nil {
+		return nil, cobra.ShellCompDirectiveNoFileComp
+	}
+	return filterPrefix(names, args, toComplete), cobra.ShellCompDirectiveNoFileComp
+}
+
+func (d *deps) completeKernelNames(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
+	socket, ok := d.daemonSocketForCompletion(cmd.Context())
+	if !ok {
+		return nil, cobra.ShellCompDirectiveNoFileComp
+	}
+	names, err := d.completionLister(cmd.Context(), socket, "kernel.list")
+	if err != nil {
+		return nil, cobra.ShellCompDirectiveNoFileComp
+	}
+	return filterPrefix(names, args, toComplete), cobra.ShellCompDirectiveNoFileComp
+}
+
+// completeKernelCatalogNameOnlyAtPos0 completes kernel names from the
+// remote catalog (pulled + available) at position 0 only.
+func (d *deps) completeKernelCatalogNameOnlyAtPos0(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
+	if len(args) > 0 {
+		return nil, cobra.ShellCompDirectiveNoFileComp
+	}
+	socket, ok := d.daemonSocketForCompletion(cmd.Context())
+	if !ok {
+		return nil, cobra.ShellCompDirectiveNoFileComp
+	}
+	result, err := rpc.Call[api.KernelCatalogResult](cmd.Context(), socket, "kernel.catalog", api.Empty{})
+	if err != nil {
+		return nil, cobra.ShellCompDirectiveNoFileComp
+	}
+	names := make([]string, 0, len(result.Entries))
+	for _, entry := range result.Entries {
+		if entry.Name != "" {
+			names = append(names, entry.Name)
+		}
+	}
+	return filterPrefix(names, args, toComplete), cobra.ShellCompDirectiveNoFileComp
+}
+
+// completeImageCatalogNameOnlyAtPos0 falls back to the locally-installed
+// image list (there is no remote image catalog RPC today).
+func (d *deps) completeImageCatalogNameOnlyAtPos0(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
+	return d.completeImageNameOnlyAtPos0(cmd, args, toComplete)
+}
--- a/internal/cli/completion_test.go
+++ b/internal/cli/completion_test.go
@ -0,0 +1,175 @@
+package cli
+
+import (
+	"context"
+	"errors"
+	"reflect"
+	"testing"
+
+	"banger/internal/api"
+
+	"github.com/spf13/cobra"
+)
+
+// stubCompletionSeams installs test doubles for the daemon ping + lister
+// seams on the caller's *deps. Tests opt into the sub-functions they
+// actually need.
+func stubCompletionSeams(
+	t *testing.T,
+	d *deps,
+	pingErr error,
+	names map[string][]string,
+	listErr error) {
+	t.Helper()
+
+	d.daemonPing = func(ctx context.Context, socketPath string) (api.PingResult, error) {
+		if pingErr != nil {
+			return api.PingResult{}, pingErr
+		}
+		return api.PingResult{}, nil
+	}
+	d.completionLister = func(ctx context.Context, socketPath, method string) ([]string, error) {
+		if listErr != nil {
+			return nil, listErr
+		}
+		return names[method], nil
+	}
+}
+
+func TestFilterPrefix(t *testing.T) {
+	cases := []struct {
+		name       string
+		candidates []string
+		exclude    []string
+		prefix     string
+		want       []string
+	}{
+		{"no filter", []string{"a", "b"}, nil, "", []string{"a", "b"}},
+		{"prefix match", []string{"apple", "banana", "apricot"}, nil, "ap", []string{"apple", "apricot"}},
+		{"exclude already entered", []string{"a", "b", "c"}, []string{"b"}, "", []string{"a", "c"}},
+		{"prefix + exclude", []string{"alpha", "avocado", "banana"}, []string{"alpha"}, "a", []string{"avocado"}},
+		{"exact case sensitive", []string{"VM", "vm"}, nil, "v", []string{"vm"}},
+		{"empty candidates", nil, nil, "any", nil},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			got := filterPrefix(tc.candidates, tc.exclude, tc.prefix)
+			if !reflect.DeepEqual(got, tc.want) {
+				// Allow nil == empty
+				if len(got) == 0 && len(tc.want) == 0 {
+					return
+				}
+				t.Errorf("got %v, want %v", got, tc.want)
+			}
+		})
+	}
+}
+
+func testCmdWithCtx() *cobra.Command {
+	cmd := &cobra.Command{Use: "test"}
+	cmd.SetContext(context.Background())
+	return cmd
+}
+
+func TestCompleteVMNamesHappyPath(t *testing.T) {
+	d := defaultDeps()
+	stubCompletionSeams(t, d, nil, map[string][]string{"vm.list": {"alpha", "beta", "gamma"}}, nil)
+
+	got, directive := d.completeVMNames(testCmdWithCtx(), nil, "")
+	if directive != cobra.ShellCompDirectiveNoFileComp {
+		t.Errorf("directive = %d, want NoFileComp", directive)
+	}
+	if !reflect.DeepEqual(got, []string{"alpha", "beta", "gamma"}) {
+		t.Errorf("got %v", got)
+	}
+}
+
+func TestCompleteVMNamesDaemonDown(t *testing.T) {
+	d := defaultDeps()
+	stubCompletionSeams(t, d, errors.New("connection refused"), nil, nil)
+
+	got, directive := d.completeVMNames(testCmdWithCtx(), nil, "")
+	if len(got) != 0 {
+		t.Errorf("daemon-down should return no suggestions, got %v", got)
+	}
+	if directive != cobra.ShellCompDirectiveNoFileComp {
+		t.Errorf("directive = %d, want NoFileComp", directive)
+	}
+}
+
+func TestCompleteVMNamesRPCError(t *testing.T) {
+	d := defaultDeps()
+	stubCompletionSeams(t, d, nil, nil, errors.New("rpc failed"))
+
+	got, _ := d.completeVMNames(testCmdWithCtx(), nil, "")
+	if len(got) != 0 {
+		t.Errorf("rpc error should return no suggestions, got %v", got)
+	}
+}
+
+func TestCompleteVMNamesExcludesAlreadyEntered(t *testing.T) {
+	d := defaultDeps()
+	stubCompletionSeams(t, d, nil, map[string][]string{"vm.list": {"alpha", "beta", "gamma"}}, nil)
+
+	got, _ := d.completeVMNames(testCmdWithCtx(), []string{"alpha"}, "")
+	want := []string{"beta", "gamma"}
+	if !reflect.DeepEqual(got, want) {
+		t.Errorf("got %v, want %v", got, want)
+	}
+}
+
+func TestCompleteVMNamesPrefixFilter(t *testing.T) {
+	d := defaultDeps()
+	stubCompletionSeams(t, d, nil, map[string][]string{"vm.list": {"alpha", "beta", "alphabet"}}, nil)
+
+	got, _ := d.completeVMNames(testCmdWithCtx(), nil, "alp")
+	want := []string{"alpha", "alphabet"}
+	if !reflect.DeepEqual(got, want) {
+		t.Errorf("got %v, want %v", got, want)
+	}
+}
+
+func TestCompleteVMNameOnlyAtPos0(t *testing.T) {
+	d := defaultDeps()
+	stubCompletionSeams(t, d, nil, map[string][]string{"vm.list": {"alpha"}}, nil)
+
+	atPos0, _ := d.completeVMNameOnlyAtPos0(testCmdWithCtx(), nil, "")
+	if len(atPos0) != 1 || atPos0[0] != "alpha" {
+		t.Errorf("pos 0: got %v", atPos0)
+	}
+
+	atPos1, _ := d.completeVMNameOnlyAtPos0(testCmdWithCtx(), []string{"alpha"}, "")
+	if len(atPos1) != 0 {
+		t.Errorf("pos 1+ should be silent, got %v", atPos1)
+	}
+}
+
+func TestCompleteImageNames(t *testing.T) {
+	d := defaultDeps()
+	stubCompletionSeams(t, d, nil, map[string][]string{"image.list": {"debian-bookworm", "alpine"}}, nil)
+
+	got, _ := d.completeImageNames(testCmdWithCtx(), nil, "")
+	if !reflect.DeepEqual(got, []string{"debian-bookworm", "alpine"}) {
+		t.Errorf("got %v", got)
+	}
+}
+
+func TestCompleteKernelNames(t *testing.T) {
+	d := defaultDeps()
+	stubCompletionSeams(t, d, nil, map[string][]string{"kernel.list": {"generic-6.12"}}, nil)
+
+	got, _ := d.completeKernelNames(testCmdWithCtx(), nil, "")
+	if len(got) != 1 || got[0] != "generic-6.12" {
+		t.Errorf("got %v", got)
+	}
+}
+
+func TestCompleteImageNameOnlyAtPos0SilentAfterFirst(t *testing.T) {
+	d := defaultDeps()
+	stubCompletionSeams(t, d, nil, map[string][]string{"image.list": {"alpine"}}, nil)
+
+	after, _ := d.completeImageNameOnlyAtPos0(testCmdWithCtx(), []string{"alpine"}, "")
+	if len(after) != 0 {
+		t.Errorf("expected silence at pos 1+, got %v", after)
+	}
+}
--- a/internal/cli/daemon_lifecycle.go
+++ b/internal/cli/daemon_lifecycle.go
@ -0,0 +1,93 @@
+package cli
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"os"
+	"strings"
+	"time"
+
+	"banger/internal/config"
+	"banger/internal/installmeta"
+	"banger/internal/model"
+	"banger/internal/paths"
+)
+
+const (
+	daemonReadyTimeout      = 15 * time.Second
+	daemonReadyPollInterval = 100 * time.Millisecond
+)
+
+// waitForDaemonReady blocks until the daemon at socketPath answers
+// ping, the context is cancelled, or daemonReadyTimeout elapses.
+// Used by `system install` and `system restart` so they don't return
+// before the daemon has actually finished binding its socket — the
+// systemd Type=simple unit reports "active" the moment the binary
+// is exec()'d, well before bangerd has read its config and listened
+// on the unix socket.
+func (d *deps) waitForDaemonReady(ctx context.Context, socketPath string) error {
+	deadline := time.Now().Add(daemonReadyTimeout)
+	pingCtx, cancel := context.WithDeadline(ctx, deadline)
+	defer cancel()
+	for {
+		if _, err := d.daemonPing(pingCtx, socketPath); err == nil {
+			return nil
+		}
+		if time.Now().After(deadline) {
+			return fmt.Errorf("daemon did not become ready at %s within %s", socketPath, daemonReadyTimeout)
+		}
+		select {
+		case <-pingCtx.Done():
+			return fmt.Errorf("daemon did not become ready at %s: %w", socketPath, pingCtx.Err())
+		case <-time.After(daemonReadyPollInterval):
+		}
+	}
+}
+
+var (
+	loadInstallMetadata = func() (installmeta.Metadata, error) {
+		return installmeta.Load(installmeta.DefaultPath)
+	}
+	currentUID = os.Getuid
+)
+
+// ensureDaemon validates that the current CLI user matches the
+// installed banger owner, then pings the system socket. Every CLI
+// command that needs to talk to the daemon routes through here.
+func (d *deps) ensureDaemon(ctx context.Context) (paths.Layout, model.DaemonConfig, error) {
+	meta, metaErr := loadInstallMetadata()
+	if metaErr == nil && currentUID() != meta.OwnerUID {
+		return paths.Layout{}, model.DaemonConfig{}, fmt.Errorf("banger is installed for %s; switch to that user or reinstall with `sudo banger system install --owner %s`", meta.OwnerUser, userHint())
+	}
+	if metaErr != nil && !errors.Is(metaErr, os.ErrNotExist) {
+		return paths.Layout{}, model.DaemonConfig{}, fmt.Errorf("load %s: %w", installmeta.DefaultPath, metaErr)
+	}
+
+	userLayout, err := paths.Resolve()
+	if err != nil {
+		return paths.Layout{}, model.DaemonConfig{}, err
+	}
+	cfg, err := config.Load(userLayout)
+	if err != nil {
+		return paths.Layout{}, model.DaemonConfig{}, err
+	}
+	layout := paths.ResolveSystem()
+	if _, err := d.daemonPing(ctx, layout.SocketPath); err == nil {
+		return layout, cfg, nil
+	}
+	if metaErr == nil {
+		return paths.Layout{}, model.DaemonConfig{}, fmt.Errorf("banger service not reachable at %s; run `sudo banger system restart`", layout.SocketPath)
+	}
+	return paths.Layout{}, model.DaemonConfig{}, fmt.Errorf("banger service not running at %s; run `sudo banger system install`", layout.SocketPath)
+}
+
+func userHint() string {
+	if sudoUser := strings.TrimSpace(os.Getenv("SUDO_USER")); sudoUser != "" {
+		return sudoUser
+	}
+	if user := strings.TrimSpace(os.Getenv("USER")); user != "" {
+		return user
+	}
+	return "<user>"
+}
--- a/internal/cli/daemon_lifecycle_test.go
+++ b/internal/cli/daemon_lifecycle_test.go
@ -0,0 +1,227 @@
+package cli
+
+import (
+	"context"
+	"errors"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"banger/internal/api"
+	"banger/internal/installmeta"
+)
+
+func TestEnsureDaemonRequiresSystemInstallWhenMetadataMissing(t *testing.T) {
+	t.Setenv("XDG_CONFIG_HOME", filepath.Join(t.TempDir(), "config"))
+	t.Setenv("XDG_STATE_HOME", filepath.Join(t.TempDir(), "state"))
+	t.Setenv("XDG_CACHE_HOME", filepath.Join(t.TempDir(), "cache"))
+	t.Setenv("XDG_RUNTIME_DIR", filepath.Join(t.TempDir(), "run"))
+
+	restoreLoad := loadInstallMetadata
+	restoreUID := currentUID
+	t.Cleanup(func() {
+		loadInstallMetadata = restoreLoad
+		currentUID = restoreUID
+	})
+
+	loadInstallMetadata = func() (installmeta.Metadata, error) {
+		return installmeta.Metadata{}, os.ErrNotExist
+	}
+	currentUID = os.Getuid
+
+	d := defaultDeps()
+	d.daemonPing = func(context.Context, string) (api.PingResult, error) {
+		return api.PingResult{}, errors.New("dial unix /run/banger/bangerd.sock: no such file")
+	}
+
+	_, _, err := d.ensureDaemon(context.Background())
+	if err == nil || !strings.Contains(err.Error(), "sudo banger system install") {
+		t.Fatalf("ensureDaemon error = %v, want install guidance", err)
+	}
+}
+
+func TestEnsureDaemonSuggestsRestartWhenInstalledButUnavailable(t *testing.T) {
+	t.Setenv("XDG_CONFIG_HOME", filepath.Join(t.TempDir(), "config"))
+	t.Setenv("XDG_STATE_HOME", filepath.Join(t.TempDir(), "state"))
+	t.Setenv("XDG_CACHE_HOME", filepath.Join(t.TempDir(), "cache"))
+	t.Setenv("XDG_RUNTIME_DIR", filepath.Join(t.TempDir(), "run"))
+
+	restoreLoad := loadInstallMetadata
+	restoreUID := currentUID
+	t.Cleanup(func() {
+		loadInstallMetadata = restoreLoad
+		currentUID = restoreUID
+	})
+
+	loadInstallMetadata = func() (installmeta.Metadata, error) {
+		return installmeta.Metadata{
+			OwnerUser: "tester",
+			OwnerUID:  os.Getuid(),
+			OwnerGID:  os.Getgid(),
+			OwnerHome: t.TempDir(),
+		}, nil
+	}
+	currentUID = os.Getuid
+
+	d := defaultDeps()
+	d.daemonPing = func(context.Context, string) (api.PingResult, error) {
+		return api.PingResult{}, errors.New("dial unix /run/banger/bangerd.sock: connection refused")
+	}
+
+	_, _, err := d.ensureDaemon(context.Background())
+	if err == nil || !strings.Contains(err.Error(), "sudo banger system restart") {
+		t.Fatalf("ensureDaemon error = %v, want restart guidance", err)
+	}
+}
+
+func TestEnsureDaemonRejectsNonOwnerUser(t *testing.T) {
+	restoreLoad := loadInstallMetadata
+	restoreUID := currentUID
+	t.Cleanup(func() {
+		loadInstallMetadata = restoreLoad
+		currentUID = restoreUID
+	})
+
+	loadInstallMetadata = func() (installmeta.Metadata, error) {
+		return installmeta.Metadata{
+			OwnerUser: "alice",
+			OwnerUID:  os.Getuid() + 1,
+			OwnerGID:  os.Getgid(),
+			OwnerHome: t.TempDir(),
+		}, nil
+	}
+	currentUID = os.Getuid
+
+	d := defaultDeps()
+	d.daemonPing = func(context.Context, string) (api.PingResult, error) {
+		t.Fatal("daemonPing should not be called for a non-owner user")
+		return api.PingResult{}, nil
+	}
+
+	_, _, err := d.ensureDaemon(context.Background())
+	if err == nil || !strings.Contains(err.Error(), "installed for alice") {
+		t.Fatalf("ensureDaemon error = %v, want owner mismatch guidance", err)
+	}
+}
+
+func TestSystemSubcommandFlagsAreScoped(t *testing.T) {
+	root := NewBangerCommand()
+
+	systemCmd, _, err := root.Find([]string{"system"})
+	if err != nil {
+		t.Fatalf("find system: %v", err)
+	}
+	installCmd, _, err := systemCmd.Find([]string{"install"})
+	if err != nil {
+		t.Fatalf("find system install: %v", err)
+	}
+	uninstallCmd, _, err := systemCmd.Find([]string{"uninstall"})
+	if err != nil {
+		t.Fatalf("find system uninstall: %v", err)
+	}
+	if installCmd.Flags().Lookup("owner") == nil {
+		t.Fatal("system install is missing --owner")
+	}
+	if uninstallCmd.Flags().Lookup("purge") == nil {
+		t.Fatal("system uninstall is missing --purge")
+	}
+}
+
+func TestRenderSystemdUnitIncludesHardeningDirectives(t *testing.T) {
+	unit := renderSystemdUnit(installmeta.Metadata{
+		OwnerUser: "alice",
+		OwnerUID:  1000,
+		OwnerGID:  1000,
+		OwnerHome: "/home/alice/dev home",
+	})
+
+	for _, want := range []string{
+		"ExecStart=/usr/local/bin/bangerd --system",
+		"User=alice",
+		"Wants=network-online.target bangerd-root.service",
+		"After=bangerd-root.service",
+		"Requires=bangerd-root.service",
+		"KillMode=process",
+		"UMask=0077",
+		"Environment=TMPDIR=/run/banger",
+		"NoNewPrivileges=yes",
+		"PrivateMounts=yes",
+		"ProtectSystem=strict",
+		"ProtectHome=read-only",
+		"ProtectControlGroups=yes",
+		"ProtectKernelLogs=yes",
+		"ProtectKernelModules=yes",
+		"ProtectClock=yes",
+		"ProtectHostname=yes",
+		"RestrictSUIDSGID=yes",
+		"LockPersonality=yes",
+		"SystemCallArchitectures=native",
+		"RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6 AF_NETLINK AF_VSOCK",
+		"StateDirectory=banger",
+		"StateDirectoryMode=0700",
+		"CacheDirectory=banger",
+		"CacheDirectoryMode=0700",
+		"RuntimeDirectory=banger",
+		"RuntimeDirectoryMode=0700",
+		"RuntimeDirectoryPreserve=yes",
+		`ReadOnlyPaths="/home/alice/dev home"`,
+	} {
+		if !strings.Contains(unit, want) {
+			t.Fatalf("unit = %q, want %q", unit, want)
+		}
+	}
+}
+
+func TestRenderRootHelperSystemdUnitIncludesRequiredCapabilities(t *testing.T) {
+	unit := renderRootHelperSystemdUnit()
+
+	for _, want := range []string{
+		"ExecStart=/usr/local/bin/bangerd --root-helper",
+		// Both directives are load-bearing for "VM survives helper
+		// restart": KillMode=process limits the initial SIGTERM to
+		// the helper main, SendSIGKILL=no disables the SIGKILL
+		// escalation. The helper itself does the cgroup reparent
+		// (see roothelper.reparentToBangerFCCgroup) — without
+		// that, even these directives leave firecracker exposed to
+		// systemd's stop-time cleanup.
+		"KillMode=process",
+		"SendSIGKILL=no",
+		"Environment=TMPDIR=/run/banger-root",
+		"NoNewPrivileges=yes",
+		"PrivateTmp=yes",
+		"PrivateMounts=yes",
+		"ProtectSystem=strict",
+		"ProtectHome=yes",
+		"RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6 AF_NETLINK AF_VSOCK",
+		"CapabilityBoundingSet=CAP_CHOWN CAP_DAC_OVERRIDE CAP_FOWNER CAP_KILL CAP_MKNOD CAP_NET_ADMIN CAP_NET_RAW CAP_SETGID CAP_SETUID CAP_SYS_ADMIN CAP_SYS_CHROOT",
+		"ReadWritePaths=/var/lib/banger",
+		"RuntimeDirectory=banger-root",
+		"RuntimeDirectoryMode=0711",
+		"RuntimeDirectoryPreserve=yes",
+	} {
+		if !strings.Contains(unit, want) {
+			t.Fatalf("unit = %q, want %q", unit, want)
+		}
+	}
+}
+
+func TestRenderSystemdUnitsIncludeOptionalCoverageEnv(t *testing.T) {
+	t.Setenv(systemCoverDirEnv, "/var/lib/banger")
+	t.Setenv(rootCoverDirEnv, "/var/lib/banger")
+
+	userUnit := renderSystemdUnit(installmeta.Metadata{
+		OwnerUser: "alice",
+		OwnerUID:  1000,
+		OwnerGID:  1000,
+		OwnerHome: "/home/alice",
+	})
+	if !strings.Contains(userUnit, `Environment=GOCOVERDIR="/var/lib/banger"`) {
+		t.Fatalf("user unit = %q, want GOCOVERDIR env", userUnit)
+	}
+
+	rootUnit := renderRootHelperSystemdUnit()
+	if !strings.Contains(rootUnit, `Environment=GOCOVERDIR="/var/lib/banger"`) {
+		t.Fatalf("root unit = %q, want GOCOVERDIR env", rootUnit)
+	}
+}
--- a/internal/cli/deps.go
+++ b/internal/cli/deps.go
@ -0,0 +1,139 @@
+package cli
+
+import (
+	"context"
+	"fmt"
+	"io"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"time"
+
+	"banger/internal/api"
+	"banger/internal/daemon"
+	"banger/internal/daemon/workspace"
+	"banger/internal/guest"
+	"banger/internal/paths"
+	"banger/internal/rpc"
+	"banger/internal/system"
+	"banger/internal/toolingplan"
+)
+
+// deps holds the function seams production code dispatches through and
+// tests replace with fakes. Keeping these on a per-invocation struct
+// (instead of package-level mutable vars) makes the CLI's external
+// surface explicit and lets tests run in parallel without leaking fakes
+// across test cases.
+//
+// Every command builder, orchestrator, and helper that touches the RPC
+// socket, spawns a subprocess, or reads host state hangs off a *deps
+// receiver. Pure helpers (formatters, path resolvers, arg-count
+// validators) stay package-level because they hold no references to
+// external systems.
+type deps struct {
+	bangerdPath           func() (string, error)
+	daemonExePath         func(pid int) string
+	doctor                func(ctx context.Context) (system.Report, error)
+	sshExec               func(ctx context.Context, stdin io.Reader, stdout, stderr io.Writer, args []string) error
+	hostCommandOutput     func(ctx context.Context, name string, args ...string) ([]byte, error)
+	vmHealth              func(ctx context.Context, socketPath, idOrName string) (api.VMHealthResult, error)
+	vmSSH                 func(ctx context.Context, socketPath, idOrName string) (api.VMSSHResult, error)
+	vmDelete              func(ctx context.Context, socketPath, idOrName string) error
+	vmList                func(ctx context.Context, socketPath string) (api.VMListResult, error)
+	daemonPing            func(ctx context.Context, socketPath string) (api.PingResult, error)
+	vmCreateBegin         func(ctx context.Context, socketPath string, params api.VMCreateParams) (api.VMCreateBeginResult, error)
+	vmCreateStatus        func(ctx context.Context, socketPath, operationID string) (api.VMCreateStatusResult, error)
+	vmCreateCancel        func(ctx context.Context, socketPath, operationID string) error
+	vmPorts               func(ctx context.Context, socketPath, idOrName string) (api.VMPortsResult, error)
+	vmWorkspacePrepare    func(ctx context.Context, socketPath string, params api.VMWorkspacePrepareParams) (api.VMWorkspacePrepareResult, error)
+	vmWorkspaceExport     func(ctx context.Context, socketPath string, params api.WorkspaceExportParams) (api.WorkspaceExportResult, error)
+	guestWaitForSSH       func(ctx context.Context, address, privateKeyPath string, interval time.Duration) error
+	guestDial             func(ctx context.Context, address, privateKeyPath string) (vmRunGuestClient, error)
+	buildVMRunToolingPlan func(ctx context.Context, repoRoot string) toolingplan.Plan
+	cwd                   func() (string, error)
+	completionLister      func(ctx context.Context, socketPath, method string) ([]string, error)
+	// repoInspector is the CLI's single workspace-package Inspector.
+	// Every code path that needs to shell out to git on the host
+	// (preflight, dry-run, untracked-count note) goes through it, so
+	// tests inject a stub Runner via this field instead of mutating a
+	// package global.
+	repoInspector *workspace.Inspector
+}
+
+func defaultDeps() *deps {
+	return &deps{
+		bangerdPath: paths.BangerdPath,
+		daemonExePath: func(pid int) string {
+			return filepath.Join("/proc", fmt.Sprintf("%d", pid), "exe")
+		},
+		doctor: daemon.Doctor,
+		sshExec: func(ctx context.Context, stdin io.Reader, stdout, stderr io.Writer, args []string) error {
+			sshCmd := exec.CommandContext(ctx, "ssh", args...)
+			sshCmd.Stdout = stdout
+			sshCmd.Stderr = stderr
+			sshCmd.Stdin = stdin
+			return sshCmd.Run()
+		},
+		hostCommandOutput: func(ctx context.Context, name string, args ...string) ([]byte, error) {
+			cmd := exec.CommandContext(ctx, name, args...)
+			output, err := cmd.CombinedOutput()
+			if err == nil {
+				return output, nil
+			}
+			command := strings.TrimSpace(strings.Join(append([]string{name}, args...), " "))
+			detail := strings.TrimSpace(string(output))
+			if detail == "" {
+				return output, fmt.Errorf("%s: %w", command, err)
+			}
+			return output, fmt.Errorf("%s: %w: %s", command, err, detail)
+		},
+		vmHealth: func(ctx context.Context, socketPath, idOrName string) (api.VMHealthResult, error) {
+			return rpc.Call[api.VMHealthResult](ctx, socketPath, "vm.health", api.VMRefParams{IDOrName: idOrName})
+		},
+		vmSSH: func(ctx context.Context, socketPath, idOrName string) (api.VMSSHResult, error) {
+			return rpc.Call[api.VMSSHResult](ctx, socketPath, "vm.ssh", api.VMRefParams{IDOrName: idOrName})
+		},
+		vmDelete: func(ctx context.Context, socketPath, idOrName string) error {
+			_, err := rpc.Call[api.VMShowResult](ctx, socketPath, "vm.delete", api.VMRefParams{IDOrName: idOrName})
+			return err
+		},
+		vmList: func(ctx context.Context, socketPath string) (api.VMListResult, error) {
+			return rpc.Call[api.VMListResult](ctx, socketPath, "vm.list", api.Empty{})
+		},
+		daemonPing: func(ctx context.Context, socketPath string) (api.PingResult, error) {
+			return rpc.Call[api.PingResult](ctx, socketPath, "ping", api.Empty{})
+		},
+		vmCreateBegin: func(ctx context.Context, socketPath string, params api.VMCreateParams) (api.VMCreateBeginResult, error) {
+			return rpc.Call[api.VMCreateBeginResult](ctx, socketPath, "vm.create.begin", params)
+		},
+		vmCreateStatus: func(ctx context.Context, socketPath, operationID string) (api.VMCreateStatusResult, error) {
+			return rpc.Call[api.VMCreateStatusResult](ctx, socketPath, "vm.create.status", api.VMCreateStatusParams{ID: operationID})
+		},
+		vmCreateCancel: func(ctx context.Context, socketPath, operationID string) error {
+			_, err := rpc.Call[api.Empty](ctx, socketPath, "vm.create.cancel", api.VMCreateStatusParams{ID: operationID})
+			return err
+		},
+		vmPorts: func(ctx context.Context, socketPath, idOrName string) (api.VMPortsResult, error) {
+			return rpc.Call[api.VMPortsResult](ctx, socketPath, "vm.ports", api.VMRefParams{IDOrName: idOrName})
+		},
+		vmWorkspacePrepare: func(ctx context.Context, socketPath string, params api.VMWorkspacePrepareParams) (api.VMWorkspacePrepareResult, error) {
+			return rpc.Call[api.VMWorkspacePrepareResult](ctx, socketPath, "vm.workspace.prepare", params)
+		},
+		vmWorkspaceExport: func(ctx context.Context, socketPath string, params api.WorkspaceExportParams) (api.WorkspaceExportResult, error) {
+			return rpc.Call[api.WorkspaceExportResult](ctx, socketPath, "vm.workspace.export", params)
+		},
+		guestWaitForSSH: func(ctx context.Context, address, privateKeyPath string, interval time.Duration) error {
+			knownHosts, _ := bangerKnownHostsPath()
+			return guest.WaitForSSH(ctx, address, privateKeyPath, knownHosts, interval)
+		},
+		guestDial: func(ctx context.Context, address, privateKeyPath string) (vmRunGuestClient, error) {
+			knownHosts, _ := bangerKnownHostsPath()
+			return guest.Dial(ctx, address, privateKeyPath, knownHosts)
+		},
+		buildVMRunToolingPlan: toolingplan.Build,
+		cwd:                   os.Getwd,
+		completionLister:      defaultCompletionLister,
+		repoInspector:         workspace.NewInspector(),
+	}
+}
--- a/internal/cli/errors.go
+++ b/internal/cli/errors.go
@ -0,0 +1,90 @@
+package cli
+
+import (
+	"errors"
+	"strings"
+
+	"banger/internal/cli/style"
+	"banger/internal/rpc"
+	"io"
+)
+
+// TranslateError is the public entry point used by cmd/banger/main.go
+// to render any error reaching the top of the cobra tree. Forwards
+// to the package-internal helper so tests can reach it directly.
+func TranslateError(w io.Writer, err error) string {
+	return translateRPCError(w, err)
+}
+
+// translateRPCError turns an error returned by rpc.Call into a
+// user-facing string. Known codes get short, friendly prefixes;
+// unknown codes pass through verbatim so debuggability is preserved.
+// When the daemon attached an op_id the helper appends it in parens
+// so an operator can paste it into journalctl --grep.
+//
+// Color is applied only when w is a TTY (and NO_COLOR is unset).
+// The returned string never includes a trailing newline — caller
+// chooses where it goes.
+func translateRPCError(w io.Writer, err error) string {
+	if err == nil {
+		return ""
+	}
+	var rpcErr *rpc.ErrorResponse
+	if !errors.As(err, &rpcErr) || rpcErr == nil {
+		// Non-RPC failures (dialing the socket, decode errors,
+		// context cancellation, ...) come through as plain Go
+		// errors. Surface them verbatim — they already mention
+		// the underlying cause clearly enough.
+		return err.Error()
+	}
+	prefix := errorCodePrefix(rpcErr.Code)
+	body := rpcErr.Message
+	if prefix != "" {
+		body = prefix + ": " + rpcErr.Message
+	} else if rpcErr.Message == "" {
+		// Defensive: a server that returned a code with no
+		// message still has SOMETHING to report; default to the
+		// raw code so we never print an empty error.
+		body = rpcErr.Code
+	}
+	if rpcErr.OpID != "" {
+		body = body + " (" + style.Dim(w, rpcErr.OpID) + ")"
+	}
+	return body
+}
+
+// errorCodePrefix maps the small set of codes the daemon emits to
+// short user-facing labels. Unknown codes return "" so the message
+// alone is shown — keeps the door open for future codes the CLI
+// hasn't been updated to recognise.
+//
+// "operation_failed" is the catch-all the generic dispatcher uses
+// when a service returned an error; the message is already self-
+// explanatory, so we strip the code entirely. Specialised codes
+// (not_found, already_exists, ...) keep a label because the
+// message body alone may not say what kind of failure it is.
+func errorCodePrefix(code string) string {
+	switch strings.TrimSpace(code) {
+	case "", "operation_failed":
+		return ""
+	case "not_found":
+		return "not found"
+	case "not_running":
+		return "not running"
+	case "already_exists":
+		return "already exists"
+	case "bad_request", "bad_params":
+		return "bad request"
+	case "bad_version":
+		return "version mismatch"
+	case "unauthorized":
+		return "unauthorized"
+	case "unknown_method":
+		return "unknown method"
+	default:
+		// Surface the raw code so an operator filing a bug has
+		// something concrete to grep for. Strips the boilerplate
+		// "operation_failed" but keeps anything novel.
+		return code
+	}
+}
--- a/internal/cli/errors_test.go
+++ b/internal/cli/errors_test.go
@ -0,0 +1,60 @@
+package cli
+
+import (
+	"bytes"
+	"errors"
+	"strings"
+	"testing"
+
+	"banger/internal/rpc"
+)
+
+// TestTranslateRPCError pins the user-facing error rendering for
+// every code the daemon emits today plus the catch-all unknown-code
+// path. Buffer is non-TTY so style helpers no-op and assertions
+// stay readable.
+func TestTranslateRPCError(t *testing.T) {
+	var buf bytes.Buffer
+	cases := []struct {
+		name   string
+		code   string
+		msg    string
+		opID   string
+		expect string
+	}{
+		{"operation_failed strips code", "operation_failed", "vm running", "", "vm running"},
+		{"empty code drops prefix", "", "raw boom", "", "raw boom"},
+		{"not_found", "not_found", `vm "x" not found`, "", `not found: vm "x" not found`},
+		{"not_running", "not_running", "vm is not running", "", "not running: vm is not running"},
+		{"already_exists", "already_exists", "image foo", "", "already exists: image foo"},
+		{"bad_request", "bad_request", "missing rootfs", "", "bad request: missing rootfs"},
+		{"bad_params", "bad_params", "invalid tap name", "", "bad request: invalid tap name"},
+		{"bad_version", "bad_version", "unsupported version 99", "", "version mismatch: unsupported version 99"},
+		{"unauthorized", "unauthorized", "uid 1000 not allowed", "", "unauthorized: uid 1000 not allowed"},
+		{"unknown_method", "unknown_method", "no.such.method", "", "unknown method: no.such.method"},
+		{"unknown code falls through", "weird_new_code", "boom", "", "weird_new_code: boom"},
+		{"op_id appended in parens", "operation_failed", "boom", "op-deadbeef00ff", "boom (op-deadbeef00ff)"},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			err := &rpc.ErrorResponse{Code: tc.code, Message: tc.msg, OpID: tc.opID}
+			got := translateRPCError(&buf, err)
+			if got != tc.expect {
+				t.Errorf("got %q, want %q", got, tc.expect)
+			}
+		})
+	}
+}
+
+// TestTranslateRPCErrorPassesThroughNonRPCErrors covers the dial
+// failure / decode failure paths where rpc.Call returns a plain Go
+// error rather than *rpc.ErrorResponse. The translator must not
+// hide the original message — that's the only signal an operator
+// has when the daemon is down.
+func TestTranslateRPCErrorPassesThroughNonRPCErrors(t *testing.T) {
+	var buf bytes.Buffer
+	got := translateRPCError(&buf, errors.New("dial unix /run/banger/bangerd.sock: connect: no such file or directory"))
+	if !strings.Contains(got, "no such file or directory") {
+		t.Fatalf("plain error lost: got %q", got)
+	}
+}
--- a/internal/cli/formatters_test.go
+++ b/internal/cli/formatters_test.go
@ -0,0 +1,287 @@
+package cli
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"strings"
+	"testing"
+
+	"banger/internal/api"
+	"banger/internal/model"
+
+	"github.com/spf13/cobra"
+)
+
+func TestHumanSize(t *testing.T) {
+	cases := []struct {
+		bytes int64
+		want  string
+	}{
+		{-1, "-"},
+		{0, "-"},
+		{1, "1 B"},
+		{1023, "1023 B"},
+		{1024, "1.0 KiB"},
+		{2048, "2.0 KiB"},
+		{1024 * 1024, "1.0 MiB"},
+		{5 * 1024 * 1024, "5.0 MiB"},
+		{1024 * 1024 * 1024, "1.0 GiB"},
+		{3 * 1024 * 1024 * 1024, "3.0 GiB"},
+	}
+	for _, tc := range cases {
+		if got := humanSize(tc.bytes); got != tc.want {
+			t.Errorf("humanSize(%d) = %q, want %q", tc.bytes, got, tc.want)
+		}
+	}
+}
+
+func TestDashIfEmpty(t *testing.T) {
+	cases := map[string]string{
+		"":         "-",
+		"   ":      "-",
+		"\t\n":     "-",
+		"value":    "value",
+		"  hello ": "  hello ",
+	}
+	for in, want := range cases {
+		if got := dashIfEmpty(in); got != want {
+			t.Errorf("dashIfEmpty(%q) = %q, want %q", in, got, want)
+		}
+	}
+}
+
+func TestExitCodeErrorError(t *testing.T) {
+	e := ExitCodeError{Code: 42}
+	got := e.Error()
+	if !strings.Contains(got, "42") {
+		t.Fatalf("error %q missing code", got)
+	}
+
+	var target ExitCodeError
+	if !errors.As(error(e), &target) {
+		t.Fatal("errors.As failed to match ExitCodeError")
+	}
+	if target.Code != 42 {
+		t.Fatalf("target.Code = %d, want 42", target.Code)
+	}
+}
+
+func TestShortID(t *testing.T) {
+	cases := map[string]string{
+		"":                     "",
+		"abc":                  "abc",
+		"0123456789ab":         "0123456789ab",
+		"0123456789abcd":       "0123456789ab",
+		"0123456789abcdefghij": "0123456789ab",
+	}
+	for in, want := range cases {
+		if got := shortID(in); got != want {
+			t.Errorf("shortID(%q) = %q, want %q", in, got, want)
+		}
+	}
+}
+
+func TestImageNameIndex(t *testing.T) {
+	images := []model.Image{
+		{ID: "id-a", Name: "alpha"},
+		{ID: "id-b", Name: "beta"},
+	}
+	idx := imageNameIndex(images)
+	if len(idx) != 2 {
+		t.Fatalf("len = %d, want 2", len(idx))
+	}
+	if idx["id-a"] != "alpha" || idx["id-b"] != "beta" {
+		t.Fatalf("unexpected index %v", idx)
+	}
+
+	empty := imageNameIndex(nil)
+	if empty == nil || len(empty) != 0 {
+		t.Fatalf("expected empty non-nil map, got %v", empty)
+	}
+}
+
+func TestHelpNoArgs(t *testing.T) {
+	called := false
+	cmd := &cobra.Command{
+		Use: "x",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			called = true
+			return nil
+		},
+	}
+	cmd.SetOut(&bytes.Buffer{})
+	cmd.SetErr(&bytes.Buffer{})
+
+	if err := helpNoArgs(cmd, nil); err != nil {
+		t.Fatalf("helpNoArgs(nil): %v", err)
+	}
+	if called {
+		t.Fatal("helpNoArgs should not invoke Run")
+	}
+
+	if err := helpNoArgs(cmd, []string{"bogus"}); err == nil {
+		t.Fatal("expected error for unexpected args")
+	}
+}
+
+func TestArgsValidators(t *testing.T) {
+	cmd := &cobra.Command{Use: "x"}
+
+	exact := exactArgsUsage(2, "need exactly two")
+	if err := exact(cmd, []string{"a", "b"}); err != nil {
+		t.Fatalf("exact(2 args): %v", err)
+	}
+	if err := exact(cmd, []string{"a"}); err == nil {
+		t.Fatal("expected error for 1 arg with exactArgsUsage(2)")
+	}
+
+	minArgs := minArgsUsage(1, "need at least one")
+	if err := minArgs(cmd, []string{"a"}); err != nil {
+		t.Fatalf("min(1 arg): %v", err)
+	}
+	if err := minArgs(cmd, nil); err == nil {
+		t.Fatal("expected error for 0 args with minArgsUsage(1)")
+	}
+
+	maxArgs := maxArgsUsage(1, "at most one")
+	if err := maxArgs(cmd, []string{"a"}); err != nil {
+		t.Fatalf("max(1 arg): %v", err)
+	}
+	if err := maxArgs(cmd, []string{"a", "b"}); err == nil {
+		t.Fatal("expected error for 2 args with maxArgsUsage(1)")
+	}
+
+	noArgs := noArgsUsage("none allowed")
+	if err := noArgs(cmd, nil); err != nil {
+		t.Fatalf("no args: %v", err)
+	}
+	if err := noArgs(cmd, []string{"a"}); err == nil {
+		t.Fatal("expected error for args with noArgsUsage")
+	}
+}
+
+func TestPrintKernelListTable(t *testing.T) {
+	var buf bytes.Buffer
+	entries := []api.KernelEntry{
+		{Name: "generic-6.12", Distro: "debian", Arch: "x86_64", KernelVersion: "6.12", ImportedAt: "2026-01-01"},
+		{Name: "bare"},
+	}
+	if err := printKernelListTable(&buf, entries); err != nil {
+		t.Fatalf("printKernelListTable: %v", err)
+	}
+	got := buf.String()
+	for _, want := range []string{"NAME", "DISTRO", "generic-6.12", "bare"} {
+		if !strings.Contains(got, want) {
+			t.Errorf("output missing %q:\n%s", want, got)
+		}
+	}
+	// Empty fields render as "-".
+	if !strings.Contains(got, "-") {
+		t.Errorf("expected dash for empty fields, got:\n%s", got)
+	}
+}
+
+func TestPrintKernelCatalogTable(t *testing.T) {
+	var buf bytes.Buffer
+	entries := []api.KernelCatalogEntry{
+		{Name: "generic-6.12", Arch: "x86_64", KernelVersion: "6.12", SizeBytes: 2 * 1024 * 1024, Pulled: true},
+		{Name: "new-kernel", SizeBytes: 0, Pulled: false},
+	}
+	if err := printKernelCatalogTable(&buf, entries); err != nil {
+		t.Fatalf("printKernelCatalogTable: %v", err)
+	}
+	got := buf.String()
+	for _, want := range []string{"generic-6.12", "pulled", "available", "new-kernel"} {
+		if !strings.Contains(got, want) {
+			t.Errorf("output missing %q:\n%s", want, got)
+		}
+	}
+	if !strings.Contains(got, "2.0 MiB") {
+		t.Errorf("expected humanSize(2 MiB), got:\n%s", got)
+	}
+}
+
+func TestPrintJSON(t *testing.T) {
+	var buf bytes.Buffer
+	if err := printJSON(&buf, map[string]int{"a": 1, "b": 2}); err != nil {
+		t.Fatalf("printJSON: %v", err)
+	}
+	got := buf.String()
+	if !strings.Contains(got, `"a": 1`) || !strings.Contains(got, `"b": 2`) {
+		t.Errorf("unexpected JSON output:\n%s", got)
+	}
+	if !strings.HasSuffix(got, "\n") {
+		t.Error("printJSON should terminate with newline")
+	}
+}
+
+func TestPrintJSONUnmarshalableValue(t *testing.T) {
+	var buf bytes.Buffer
+	// Channels are not JSON-marshalable.
+	err := printJSON(&buf, make(chan int))
+	if err == nil {
+		t.Fatal("expected error for unmarshalable value")
+	}
+}
+
+func TestPrintVMSummary(t *testing.T) {
+	var buf bytes.Buffer
+	vm := model.VMRecord{
+		ID:    "0123456789abcdef",
+		Name:  "demo",
+		State: model.VMStateRunning,
+	}
+	vm.Runtime.GuestIP = "172.16.0.5"
+	vm.Runtime.DNSName = "demo.vm"
+	vm.Spec.WorkDiskSizeBytes = 0
+	if err := printVMSummary(&buf, vm); err != nil {
+		t.Fatalf("printVMSummary: %v", err)
+	}
+	got := buf.String()
+	for _, want := range []string{"0123456789ab", "demo", "172.16.0.5", "demo.vm"} {
+		if !strings.Contains(got, want) {
+			t.Errorf("summary missing %q:\n%s", want, got)
+		}
+	}
+}
+
+func TestPrintImageSummary(t *testing.T) {
+	var buf bytes.Buffer
+	img := model.Image{ID: "img-id", Name: "debian-bookworm", Managed: true, RootfsPath: "/var/rootfs.ext4"}
+	if err := printImageSummary(&buf, img); err != nil {
+		t.Fatalf("printImageSummary: %v", err)
+	}
+	got := buf.String()
+	for _, want := range []string{"debian-bookworm", "true", "/var/rootfs.ext4"} {
+		if !strings.Contains(got, want) {
+			t.Errorf("summary missing %q:\n%s", want, got)
+		}
+	}
+}
+
+func TestVMImageLabel(t *testing.T) {
+	names := map[string]string{"img-1": "debian"}
+	if got := vmImageLabel("img-1", names); got != "debian" {
+		t.Errorf("got %q, want debian", got)
+	}
+	if got := vmImageLabel("img-2", names); got != "img-2" {
+		t.Errorf("fallback: got %q, want img-2", got)
+	}
+}
+
+// failWriter lets us exercise io-error branches of the printers.
+type failWriter struct{}
+
+func (failWriter) Write([]byte) (int, error) { return 0, fmt.Errorf("boom") }
+
+func TestPrintersPropagateWriteErrors(t *testing.T) {
+	kernels := []api.KernelEntry{{Name: "k"}}
+	if err := printKernelListTable(failWriter{}, kernels); err == nil {
+		t.Error("expected write error from printKernelListTable")
+	}
+	catalog := []api.KernelCatalogEntry{{Name: "k"}}
+	if err := printKernelCatalogTable(failWriter{}, catalog); err == nil {
+		t.Error("expected write error from printKernelCatalogTable")
+	}
+}
--- a/internal/cli/known_hosts.go
+++ b/internal/cli/known_hosts.go
@ -0,0 +1,26 @@
+package cli
+
+import (
+	"strings"
+
+	"banger/internal/guest"
+	"banger/internal/model"
+)
+
+func removeUserKnownHosts(vm model.VMRecord) error {
+	knownHostsPath, err := bangerKnownHostsPath()
+	if err != nil {
+		return err
+	}
+	var hosts []string
+	if ip := strings.TrimSpace(vm.Runtime.GuestIP); ip != "" {
+		hosts = append(hosts, ip)
+	}
+	if dns := strings.TrimSpace(vm.Runtime.DNSName); dns != "" {
+		hosts = append(hosts, dns)
+	}
+	if len(hosts) == 0 {
+		return nil
+	}
+	return guest.RemoveKnownHosts(knownHostsPath, hosts...)
+}
--- a/internal/cli/make_bundle_test.go
+++ b/internal/cli/make_bundle_test.go
@ -0,0 +1,320 @@
+package cli
+
+import (
+	"archive/tar"
+	"bytes"
+	"context"
+	"crypto/sha256"
+	"encoding/hex"
+	"encoding/json"
+	"io"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"banger/internal/imagecat"
+
+	"github.com/klauspost/compress/zstd"
+)
+
+func TestInternalMakeBundleFlagsExist(t *testing.T) {
+	root := NewBangerCommand()
+	internal, _, err := root.Find([]string{"internal"})
+	if err != nil {
+		t.Fatalf("find internal: %v", err)
+	}
+	mk, _, err := internal.Find([]string{"make-bundle"})
+	if err != nil {
+		t.Fatalf("find make-bundle: %v", err)
+	}
+	for _, name := range []string{"rootfs-tar", "name", "distro", "arch", "kernel-ref", "description", "size", "out"} {
+		if mk.Flags().Lookup(name) == nil {
+			t.Errorf("missing flag %q", name)
+		}
+	}
+}
+
+func TestMakeBundleRequiresName(t *testing.T) {
+	cmd := NewBangerCommand()
+	cmd.SetArgs([]string{"internal", "make-bundle", "--rootfs-tar", "some.tar", "--out", "out.tar.zst"})
+	cmd.SetOut(&bytes.Buffer{})
+	cmd.SetErr(&bytes.Buffer{})
+	err := cmd.Execute()
+	if err == nil || !strings.Contains(err.Error(), "image name is required") {
+		t.Fatalf("execute error = %v, want image-name-required", err)
+	}
+}
+
+func TestMakeBundleRequiresRootfsTar(t *testing.T) {
+	cmd := NewBangerCommand()
+	cmd.SetArgs([]string{"internal", "make-bundle", "--name", "x", "--out", "out.tar.zst"})
+	cmd.SetOut(&bytes.Buffer{})
+	cmd.SetErr(&bytes.Buffer{})
+	err := cmd.Execute()
+	if err == nil || !strings.Contains(err.Error(), "--rootfs-tar is required") {
+		t.Fatalf("execute error = %v, want --rootfs-tar required", err)
+	}
+}
+
+func TestMakeBundleRequiresOut(t *testing.T) {
+	cmd := NewBangerCommand()
+	cmd.SetArgs([]string{"internal", "make-bundle", "--name", "x", "--rootfs-tar", "-"})
+	cmd.SetOut(&bytes.Buffer{})
+	cmd.SetErr(&bytes.Buffer{})
+	err := cmd.Execute()
+	if err == nil || !strings.Contains(err.Error(), "--out is required") {
+		t.Fatalf("execute error = %v, want --out required", err)
+	}
+}
+
+func TestWriteBundleTarZstRoundTrip(t *testing.T) {
+	stage := t.TempDir()
+	rootfsContent := []byte("fake-rootfs-bytes")
+	rootfsPath := filepath.Join(stage, "rootfs.ext4")
+	if err := os.WriteFile(rootfsPath, rootfsContent, 0o644); err != nil {
+		t.Fatal(err)
+	}
+	manifest := imagecat.Manifest{Name: "debian-bookworm", Distro: "debian"}
+	manifestJSON, _ := json.Marshal(manifest)
+	manifestPath := filepath.Join(stage, "manifest.json")
+	if err := os.WriteFile(manifestPath, manifestJSON, 0o644); err != nil {
+		t.Fatal(err)
+	}
+
+	bundlePath := filepath.Join(stage, "bundle.tar.zst")
+	if err := writeBundleTarZst(bundlePath, rootfsPath, manifestPath); err != nil {
+		t.Fatalf("writeBundleTarZst: %v", err)
+	}
+
+	// Decode and verify.
+	raw, err := os.Open(bundlePath)
+	if err != nil {
+		t.Fatal(err)
+	}
+	t.Cleanup(func() { raw.Close() })
+	zr, err := zstd.NewReader(raw)
+	if err != nil {
+		t.Fatal(err)
+	}
+	tr := tar.NewReader(zr)
+	got := map[string][]byte{}
+	for {
+		hdr, err := tr.Next()
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			t.Fatal(err)
+		}
+		b, _ := io.ReadAll(tr)
+		got[hdr.Name] = b
+	}
+	if !bytes.Equal(got[imagecat.RootfsFilename], rootfsContent) {
+		t.Errorf("rootfs mismatch: got %q want %q", got[imagecat.RootfsFilename], rootfsContent)
+	}
+	if !bytes.Equal(got[imagecat.ManifestFilename], manifestJSON) {
+		t.Errorf("manifest mismatch: got %q want %q", got[imagecat.ManifestFilename], manifestJSON)
+	}
+}
+
+func TestSha256HexFile(t *testing.T) {
+	dir := t.TempDir()
+	content := []byte("hello world")
+	p := filepath.Join(dir, "f")
+	if err := os.WriteFile(p, content, 0o644); err != nil {
+		t.Fatal(err)
+	}
+	got, err := sha256HexFile(p)
+	if err != nil {
+		t.Fatal(err)
+	}
+	expected := sha256.Sum256(content)
+	if got != hex.EncodeToString(expected[:]) {
+		t.Fatalf("sha256 = %q, want %q", got, hex.EncodeToString(expected[:]))
+	}
+}
+
+func TestDirSize(t *testing.T) {
+	dir := t.TempDir()
+	_ = os.MkdirAll(filepath.Join(dir, "sub"), 0o755)
+	_ = os.WriteFile(filepath.Join(dir, "a"), []byte("abc"), 0o644)          // 3
+	_ = os.WriteFile(filepath.Join(dir, "sub", "b"), []byte("defgh"), 0o644) // 5
+	// Symlink must not be counted.
+	_ = os.Symlink(filepath.Join(dir, "a"), filepath.Join(dir, "link"))
+	n, err := dirSize(dir)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if n != 8 {
+		t.Fatalf("dirSize = %d, want 8", n)
+	}
+}
+
+// TestMakeBundleEndToEnd exercises the full pipeline against a tiny
+// synthesized rootfs tar. Skips if any external tool (mkfs.ext4 /
+// debugfs) or the companion banger-vsock-agent binary is unavailable.
+func TestMakeBundleEndToEnd(t *testing.T) {
+	if _, err := exec.LookPath("mkfs.ext4"); err != nil {
+		t.Skip("mkfs.ext4 not installed")
+	}
+	if _, err := exec.LookPath("debugfs"); err != nil {
+		t.Skip("debugfs not installed")
+	}
+	// Build companion binary if the build tree doesn't already have one.
+	buildDir := findBuildBinDir(t)
+	if buildDir == "" {
+		t.Skip("build/bin not found; run `make build` to enable this test")
+	}
+	if _, err := os.Stat(filepath.Join(buildDir, "banger-vsock-agent")); err != nil {
+		t.Skip("banger-vsock-agent not in build/bin; run `make build`")
+	}
+	// Ensure the banger binary also exists so CompanionBinaryPath
+	// resolves (it looks alongside the banger binary).
+	if _, err := os.Stat(filepath.Join(buildDir, "banger")); err != nil {
+		t.Skip("banger not in build/bin; run `make build`")
+	}
+
+	// Build a minimal rootfs tar: just /etc/os-release and /tmp (a dir).
+	dir := t.TempDir()
+	tarPath := filepath.Join(dir, "rootfs.tar")
+	if err := writeMinimalTar(tarPath); err != nil {
+		t.Fatal(err)
+	}
+	outPath := filepath.Join(dir, "bundle.tar.zst")
+
+	// Invoke via the cobra command to cover arg handling too.
+	cmd := NewBangerCommand()
+	cmd.SetArgs([]string{
+		"internal", "make-bundle",
+		"--rootfs-tar", tarPath,
+		"--name", "test-bundle",
+		"--distro", "debian",
+		"--arch", "x86_64",
+		"--kernel-ref", "generic-6.12",
+		"--size", "64M",
+		"--out", outPath,
+	})
+	var stderr bytes.Buffer
+	cmd.SetOut(&bytes.Buffer{})
+	cmd.SetErr(&stderr)
+	// paths.CompanionBinaryPath looks alongside the banger binary, but
+	// the test binary lives elsewhere. Use the env override instead.
+	t.Setenv("BANGER_VSOCK_AGENT_BIN", filepath.Join(buildDir, "banger-vsock-agent"))
+	cmd.SetContext(context.Background())
+	if err := cmd.Execute(); err != nil {
+		t.Fatalf("execute: %v\nstderr:\n%s", err, stderr.String())
+	}
+
+	if stat, err := os.Stat(outPath); err != nil {
+		t.Fatalf("output not written: %v", err)
+	} else if stat.Size() < 1024 {
+		t.Fatalf("output suspiciously small: %d bytes", stat.Size())
+	}
+
+	// Verify we can fetch-reparse it (mirror of imagecat.Fetch logic,
+	// but reading straight from disk instead of HTTP).
+	extractDir := t.TempDir()
+	verifyBundle(t, outPath, extractDir)
+}
+
+// findBuildBinDir returns the absolute path to the project's build/bin,
+// or "" if it can't be located. Walks up from CWD to find go.mod.
+func findBuildBinDir(t *testing.T) string {
+	t.Helper()
+	cwd, err := os.Getwd()
+	if err != nil {
+		return ""
+	}
+	for d := cwd; d != "/" && d != "."; d = filepath.Dir(d) {
+		if _, err := os.Stat(filepath.Join(d, "go.mod")); err == nil {
+			return filepath.Join(d, "build", "bin")
+		}
+	}
+	return ""
+}
+
+func writeMinimalTar(path string) error {
+	f, err := os.Create(path)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+	tw := tar.NewWriter(f)
+	defer tw.Close()
+
+	// /etc dir
+	if err := tw.WriteHeader(&tar.Header{
+		Name: "etc/", Typeflag: tar.TypeDir, Mode: 0o755, Uid: 0, Gid: 0,
+	}); err != nil {
+		return err
+	}
+	// /etc/os-release
+	body := []byte(`ID=debian` + "\n" + `PRETTY_NAME="banger test"` + "\n")
+	if err := tw.WriteHeader(&tar.Header{
+		Name: "etc/os-release", Typeflag: tar.TypeReg, Mode: 0o644,
+		Size: int64(len(body)), Uid: 0, Gid: 0,
+	}); err != nil {
+		return err
+	}
+	if _, err := tw.Write(body); err != nil {
+		return err
+	}
+	// /tmp dir
+	return tw.WriteHeader(&tar.Header{
+		Name: "tmp/", Typeflag: tar.TypeDir, Mode: 0o1777, Uid: 0, Gid: 0,
+	})
+}
+
+func verifyBundle(t *testing.T, bundlePath, extractDir string) {
+	t.Helper()
+	f, err := os.Open(bundlePath)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer f.Close()
+	zr, err := zstd.NewReader(f)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer zr.Close()
+	tr := tar.NewReader(zr)
+	seen := map[string]bool{}
+	for {
+		hdr, err := tr.Next()
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			t.Fatal(err)
+		}
+		dst := filepath.Join(extractDir, hdr.Name)
+		if err := os.MkdirAll(filepath.Dir(dst), 0o755); err != nil {
+			t.Fatal(err)
+		}
+		out, err := os.Create(dst)
+		if err != nil {
+			t.Fatal(err)
+		}
+		if _, err := io.Copy(out, tr); err != nil {
+			t.Fatal(err)
+		}
+		out.Close()
+		seen[hdr.Name] = true
+	}
+	if !seen[imagecat.RootfsFilename] || !seen[imagecat.ManifestFilename] {
+		t.Fatalf("bundle missing expected files: seen=%v", seen)
+	}
+	manifestData, err := os.ReadFile(filepath.Join(extractDir, imagecat.ManifestFilename))
+	if err != nil {
+		t.Fatal(err)
+	}
+	var m imagecat.Manifest
+	if err := json.Unmarshal(manifestData, &m); err != nil {
+		t.Fatal(err)
+	}
+	if m.Name != "test-bundle" || m.KernelRef != "generic-6.12" || m.Distro != "debian" {
+		t.Fatalf("manifest = %+v", m)
+	}
+}
--- a/internal/cli/printers.go
+++ b/internal/cli/printers.go
@ -0,0 +1,338 @@
+package cli
+
+import (
+	"encoding/json"
+	"fmt"
+	"io"
+	"os"
+	"sort"
+	"strings"
+	"text/tabwriter"
+
+	"banger/internal/api"
+	"banger/internal/cli/style"
+	"banger/internal/model"
+	"banger/internal/system"
+)
+
+// anyWriter is the minimal writer surface every printer needs. Split
+// out from io.Writer because some of our callers already hold a
+// tabwriter/bytes.Buffer by value.
+type anyWriter interface {
+	Write(p []byte) (n int, err error)
+}
+
+// -- small helpers --------------------------------------------------
+
+func humanSize(bytes int64) string {
+	if bytes <= 0 {
+		return "-"
+	}
+	const (
+		kib = 1024
+		mib = 1024 * kib
+		gib = 1024 * mib
+	)
+	switch {
+	case bytes >= gib:
+		return fmt.Sprintf("%.1f GiB", float64(bytes)/float64(gib))
+	case bytes >= mib:
+		return fmt.Sprintf("%.1f MiB", float64(bytes)/float64(mib))
+	case bytes >= kib:
+		return fmt.Sprintf("%.1f KiB", float64(bytes)/float64(kib))
+	default:
+		return fmt.Sprintf("%d B", bytes)
+	}
+}
+
+func dashIfEmpty(s string) string {
+	if strings.TrimSpace(s) == "" {
+		return "-"
+	}
+	return s
+}
+
+// -- generic printers -----------------------------------------------
+
+func printJSON(out anyWriter, v any) error {
+	data, err := json.MarshalIndent(v, "", "  ")
+	if err != nil {
+		return err
+	}
+	_, err = fmt.Fprintln(out, string(data))
+	return err
+}
+
+// -- VM printers ----------------------------------------------------
+
+func printVMSummary(out anyWriter, vm model.VMRecord) error {
+	_, err := fmt.Fprintf(
+		out,
+		"%s\t%s\t%s\t%s\t%s\t%s\n",
+		shortID(vm.ID),
+		vm.Name,
+		vm.State,
+		vm.Runtime.GuestIP,
+		model.FormatSizeBytes(vm.Spec.WorkDiskSizeBytes),
+		vm.Runtime.DNSName,
+	)
+	return err
+}
+
+func printVMIDList(out anyWriter, vms []model.VMRecord) error {
+	for _, vm := range vms {
+		if _, err := fmt.Fprintln(out, vm.ID); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func printVMListTable(out anyWriter, vms []model.VMRecord, imageNames map[string]string) error {
+	w := tabwriter.NewWriter(out, 0, 8, 2, ' ', 0)
+	if _, err := fmt.Fprintln(w, "ID\tNAME\tSTATE\tIMAGE\tIP\tVCPU\tMEM\tDISK\tWORKSPACE\tCREATED"); err != nil {
+		return err
+	}
+	for _, vm := range vms {
+		if _, err := fmt.Fprintf(
+			w,
+			"%s\t%s\t%s\t%s\t%s\t%d\t%d MiB\t%s\t%s\t%s\n",
+			shortID(vm.ID),
+			vm.Name,
+			vm.State,
+			vmImageLabel(vm.ImageID, imageNames),
+			vm.Runtime.GuestIP,
+			vm.Spec.VCPUCount,
+			vm.Spec.MemoryMiB,
+			model.FormatSizeBytes(vm.Spec.WorkDiskSizeBytes),
+			dashIfEmpty(vm.Workspace.GuestPath),
+			relativeTime(vm.CreatedAt),
+		); err != nil {
+			return err
+		}
+	}
+	return w.Flush()
+}
+
+func printVMPortsTable(out anyWriter, result api.VMPortsResult) error {
+	type portRow struct {
+		Proto    string
+		Endpoint string
+		Process  string
+		Command  string
+		Port     int
+	}
+	rows := make([]portRow, 0, len(result.Ports))
+	for _, port := range result.Ports {
+		rows = append(rows, portRow{
+			Proto:    port.Proto,
+			Endpoint: port.Endpoint,
+			Process:  port.Process,
+			Command:  port.Command,
+			Port:     port.Port,
+		})
+	}
+	sort.Slice(rows, func(i, j int) bool {
+		if rows[i].Proto != rows[j].Proto {
+			return rows[i].Proto < rows[j].Proto
+		}
+		if rows[i].Port != rows[j].Port {
+			return rows[i].Port < rows[j].Port
+		}
+		if rows[i].Process != rows[j].Process {
+			return rows[i].Process < rows[j].Process
+		}
+		return rows[i].Command < rows[j].Command
+	})
+	if len(rows) == 0 {
+		return nil
+	}
+
+	w := tabwriter.NewWriter(out, 0, 8, 2, ' ', 0)
+	if _, err := fmt.Fprintln(w, "PROTO\tENDPOINT\tPROCESS\tCOMMAND"); err != nil {
+		return err
+	}
+	for _, row := range rows {
+		if _, err := fmt.Fprintf(
+			w,
+			"%s\t%s\t%s\t%s\n",
+			row.Proto,
+			dashIfEmpty(row.Endpoint),
+			dashIfEmpty(row.Process),
+			dashIfEmpty(row.Command),
+		); err != nil {
+			return err
+		}
+	}
+	return w.Flush()
+}
+
+// -- image printers -------------------------------------------------
+
+func printImageSummary(out anyWriter, image model.Image) error {
+	_, err := fmt.Fprintf(out, "%s\t%s\t%t\t%s\n", shortID(image.ID), image.Name, image.Managed, image.RootfsPath)
+	return err
+}
+
+func imageNameIndex(images []model.Image) map[string]string {
+	index := make(map[string]string, len(images))
+	for _, image := range images {
+		index[image.ID] = image.Name
+	}
+	return index
+}
+
+func vmImageLabel(imageID string, imageNames map[string]string) string {
+	if name := strings.TrimSpace(imageNames[imageID]); name != "" {
+		return name
+	}
+	return shortID(imageID)
+}
+
+func printImageListTable(out anyWriter, images []model.Image) error {
+	w := tabwriter.NewWriter(out, 0, 8, 2, ' ', 0)
+	if _, err := fmt.Fprintln(w, "ID\tNAME\tMANAGED\tROOTFS SIZE\tCREATED"); err != nil {
+		return err
+	}
+	for _, image := range images {
+		if _, err := fmt.Fprintf(
+			w,
+			"%s\t%s\t%t\t%s\t%s\n",
+			shortID(image.ID),
+			image.Name,
+			image.Managed,
+			rootfsSizeLabel(image.RootfsPath),
+			relativeTime(image.CreatedAt),
+		); err != nil {
+			return err
+		}
+	}
+	return w.Flush()
+}
+
+func rootfsSizeLabel(path string) string {
+	info, err := os.Stat(path)
+	if err != nil {
+		return "-"
+	}
+	if info.Size() <= 0 {
+		return "0"
+	}
+	return model.FormatSizeBytes(info.Size())
+}
+
+// -- kernel printers ------------------------------------------------
+
+func printKernelListTable(out anyWriter, entries []api.KernelEntry) error {
+	w := tabwriter.NewWriter(out, 0, 8, 2, ' ', 0)
+	if _, err := fmt.Fprintln(w, "NAME\tDISTRO\tARCH\tKERNEL\tIMPORTED"); err != nil {
+		return err
+	}
+	for _, entry := range entries {
+		if _, err := fmt.Fprintf(
+			w,
+			"%s\t%s\t%s\t%s\t%s\n",
+			entry.Name,
+			dashIfEmpty(entry.Distro),
+			dashIfEmpty(entry.Arch),
+			dashIfEmpty(entry.KernelVersion),
+			dashIfEmpty(entry.ImportedAt),
+		); err != nil {
+			return err
+		}
+	}
+	return w.Flush()
+}
+
+func printKernelCatalogTable(out anyWriter, entries []api.KernelCatalogEntry) error {
+	w := tabwriter.NewWriter(out, 0, 8, 2, ' ', 0)
+	if _, err := fmt.Fprintln(w, "NAME\tDISTRO\tARCH\tKERNEL\tSIZE\tSTATE"); err != nil {
+		return err
+	}
+	for _, entry := range entries {
+		state := "available"
+		if entry.Pulled {
+			state = "pulled"
+		}
+		if _, err := fmt.Fprintf(
+			w,
+			"%s\t%s\t%s\t%s\t%s\t%s\n",
+			entry.Name,
+			dashIfEmpty(entry.Distro),
+			dashIfEmpty(entry.Arch),
+			dashIfEmpty(entry.KernelVersion),
+			humanSize(entry.SizeBytes),
+			state,
+		); err != nil {
+			return err
+		}
+	}
+	return w.Flush()
+}
+
+// -- doctor printer -------------------------------------------------
+
+func printDoctorReport(out anyWriter, report system.Report, verbose bool) error {
+	colorWriter, _ := out.(io.Writer)
+
+	var passes, warns, fails int
+	for _, c := range report.Checks {
+		switch c.Status {
+		case system.CheckStatusPass:
+			passes++
+		case system.CheckStatusWarn:
+			warns++
+		case system.CheckStatusFail:
+			fails++
+		}
+	}
+
+	if !verbose && warns == 0 && fails == 0 {
+		msg := fmt.Sprintf("all %d checks passed", passes)
+		if colorWriter != nil {
+			msg = style.Pass(colorWriter, msg)
+		}
+		_, err := fmt.Fprintln(out, msg)
+		return err
+	}
+
+	for _, check := range report.Checks {
+		if !verbose && check.Status == system.CheckStatusPass {
+			continue
+		}
+		status := strings.ToUpper(string(check.Status))
+		if colorWriter != nil {
+			switch check.Status {
+			case system.CheckStatusPass:
+				status = style.Pass(colorWriter, status)
+			case system.CheckStatusFail:
+				status = style.Fail(colorWriter, status)
+			case system.CheckStatusWarn:
+				status = style.Warn(colorWriter, status)
+			}
+		}
+		if _, err := fmt.Fprintf(out, "%s\t%s\n", status, check.Name); err != nil {
+			return err
+		}
+		for _, detail := range check.Details {
+			if _, err := fmt.Fprintf(out, "  - %s\n", detail); err != nil {
+				return err
+			}
+		}
+	}
+
+	if !verbose {
+		if _, err := fmt.Fprintf(out, "\n%d passed, %s, %s\n", passes, pluralCount(warns, "warning"), pluralCount(fails, "failure")); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+func pluralCount(n int, word string) string {
+	if n == 1 {
+		return fmt.Sprintf("%d %s", n, word)
+	}
+	return fmt.Sprintf("%d %ss", n, word)
+}
--- a/internal/cli/printers_test.go
+++ b/internal/cli/printers_test.go
@ -0,0 +1,88 @@
+package cli
+
+import (
+	"bytes"
+	"strings"
+	"testing"
+
+	"banger/internal/system"
+)
+
+func TestPrintDoctorReport_BriefAllPass(t *testing.T) {
+	report := system.Report{}
+	report.AddPass("first", "detail one")
+	report.AddPass("second", "detail two")
+	report.AddPass("third")
+
+	var buf bytes.Buffer
+	if err := printDoctorReport(&buf, report, false); err != nil {
+		t.Fatalf("printDoctorReport: %v", err)
+	}
+
+	got := buf.String()
+	want := "all 3 checks passed\n"
+	if got != want {
+		t.Fatalf("brief all-pass output\n got: %q\nwant: %q", got, want)
+	}
+}
+
+func TestPrintDoctorReport_BriefHidesPassDetails(t *testing.T) {
+	report := system.Report{}
+	report.AddPass("first", "detail one")
+	report.AddWarn("second", "warn detail")
+	report.AddPass("third", "detail three")
+	report.AddFail("fourth", "fail detail")
+
+	var buf bytes.Buffer
+	if err := printDoctorReport(&buf, report, false); err != nil {
+		t.Fatalf("printDoctorReport: %v", err)
+	}
+
+	got := buf.String()
+	if strings.Contains(got, "PASS") || strings.Contains(got, "first") || strings.Contains(got, "third") {
+		t.Fatalf("brief mode leaked PASS rows: %q", got)
+	}
+	for _, want := range []string{"WARN\tsecond", "warn detail", "FAIL\tfourth", "fail detail"} {
+		if !strings.Contains(got, want) {
+			t.Fatalf("missing %q in brief output: %q", want, got)
+		}
+	}
+	if !strings.Contains(got, "2 passed, 1 warning, 1 failure") {
+		t.Fatalf("missing summary footer in: %q", got)
+	}
+}
+
+func TestPrintDoctorReport_BriefSummaryPlurals(t *testing.T) {
+	report := system.Report{}
+	report.AddPass("a")
+	report.AddWarn("b")
+	report.AddWarn("c")
+
+	var buf bytes.Buffer
+	if err := printDoctorReport(&buf, report, false); err != nil {
+		t.Fatalf("printDoctorReport: %v", err)
+	}
+	if !strings.Contains(buf.String(), "1 passed, 2 warnings, 0 failures") {
+		t.Fatalf("plural counts wrong: %q", buf.String())
+	}
+}
+
+func TestPrintDoctorReport_VerboseShowsEverything(t *testing.T) {
+	report := system.Report{}
+	report.AddPass("first", "detail one")
+	report.AddWarn("second", "warn detail")
+
+	var buf bytes.Buffer
+	if err := printDoctorReport(&buf, report, true); err != nil {
+		t.Fatalf("printDoctorReport: %v", err)
+	}
+	got := buf.String()
+	for _, want := range []string{"PASS\tfirst", "detail one", "WARN\tsecond", "warn detail"} {
+		if !strings.Contains(got, want) {
+			t.Fatalf("verbose mode missing %q: %q", want, got)
+		}
+	}
+	if strings.Contains(got, "passed,") {
+		t.Fatalf("verbose mode should not print summary footer: %q", got)
+	}
+}
--- a/internal/cli/prune_test.go
+++ b/internal/cli/prune_test.go
@ -0,0 +1,205 @@
+package cli
+
+import (
+	"bytes"
+	"context"
+	"errors"
+	"fmt"
+	"strings"
+	"testing"
+
+	"banger/internal/api"
+	"banger/internal/model"
+
+	"github.com/spf13/cobra"
+)
+
+// stubPruneSeams installs list + delete fakes onto the caller's *deps
+// and returns a pointer to a slice that records every ID passed to the
+// delete fake.
+func stubPruneSeams(t *testing.T, d *deps, vms []model.VMRecord, listErr error, deleteErr map[string]error) *[]string {
+	t.Helper()
+
+	var deleted []string
+	d.vmList = func(ctx context.Context, socketPath string) (api.VMListResult, error) {
+		return api.VMListResult{VMs: vms}, listErr
+	}
+	d.vmDelete = func(ctx context.Context, socketPath, idOrName string) error {
+		if err, ok := deleteErr[idOrName]; ok {
+			return err
+		}
+		deleted = append(deleted, idOrName)
+		return nil
+	}
+	return &deleted
+}
+
+func newPruneTestCmd(stdin string) (*cobra.Command, *bytes.Buffer, *bytes.Buffer) {
+	cmd := &cobra.Command{Use: "prune"}
+	cmd.SetContext(context.Background())
+	stdout := &bytes.Buffer{}
+	stderr := &bytes.Buffer{}
+	cmd.SetIn(strings.NewReader(stdin))
+	cmd.SetOut(stdout)
+	cmd.SetErr(stderr)
+	return cmd, stdout, stderr
+}
+
+func TestPromptYesNo(t *testing.T) {
+	cases := map[string]bool{
+		"y\n":        true,
+		"Y\n":        true,
+		"yes\n":      true,
+		"YES\n":      true,
+		"  y  \n":    true,
+		"n\n":        false,
+		"no\n":       false,
+		"\n":         false,
+		"anything\n": false,
+	}
+	for input, want := range cases {
+		out := &bytes.Buffer{}
+		got, err := promptYesNo(strings.NewReader(input), out, "go? ")
+		if err != nil {
+			t.Errorf("input %q: error %v", input, err)
+			continue
+		}
+		if got != want {
+			t.Errorf("input %q: got %v, want %v", input, got, want)
+		}
+		if !strings.Contains(out.String(), "go?") {
+			t.Errorf("input %q: prompt not written; got %q", input, out.String())
+		}
+	}
+}
+
+func TestPromptYesNoEOF(t *testing.T) {
+	got, err := promptYesNo(strings.NewReader(""), &bytes.Buffer{}, "? ")
+	if err != nil {
+		t.Fatalf("EOF should not error: %v", err)
+	}
+	if got {
+		t.Fatal("EOF should be treated as no")
+	}
+}
+
+func TestRunVMPruneNoVictims(t *testing.T) {
+	d := defaultDeps()
+	stubPruneSeams(t, d, []model.VMRecord{
+		{ID: "id-1", Name: "running-vm", State: model.VMStateRunning},
+	}, nil, nil)
+
+	cmd, stdout, _ := newPruneTestCmd("")
+	if err := d.runVMPrune(cmd, "sock", false); err != nil {
+		t.Fatalf("d.runVMPrune: %v", err)
+	}
+	if !strings.Contains(stdout.String(), "no non-running VMs") {
+		t.Errorf("expected no-op message, got %q", stdout.String())
+	}
+}
+
+func TestRunVMPruneAbortedByUser(t *testing.T) {
+	d := defaultDeps()
+	deleted := stubPruneSeams(t, d, []model.VMRecord{
+		{ID: "id-1", Name: "stale", State: model.VMStateStopped},
+	}, nil, nil)
+
+	cmd, stdout, _ := newPruneTestCmd("n\n")
+	if err := d.runVMPrune(cmd, "sock", false); err != nil {
+		t.Fatalf("d.runVMPrune: %v", err)
+	}
+	if !strings.Contains(stdout.String(), "aborted") {
+		t.Errorf("expected 'aborted' output, got %q", stdout.String())
+	}
+	if len(*deleted) != 0 {
+		t.Errorf("should not have deleted anything, got %v", *deleted)
+	}
+}
+
+func TestRunVMPruneConfirmedDeletesNonRunning(t *testing.T) {
+	d := defaultDeps()
+	deleted := stubPruneSeams(t, d, []model.VMRecord{
+		{ID: "id-run", Name: "keeper", State: model.VMStateRunning},
+		{ID: "id-stop", Name: "stale", State: model.VMStateStopped},
+		{ID: "id-err", Name: "broken", State: model.VMStateError},
+		{ID: "id-created", Name: "fresh", State: model.VMStateCreated},
+	}, nil, nil)
+
+	cmd, stdout, _ := newPruneTestCmd("y\n")
+	if err := d.runVMPrune(cmd, "sock", false); err != nil {
+		t.Fatalf("d.runVMPrune: %v", err)
+	}
+	// Deleted must be exactly the three non-running IDs, in list order.
+	want := []string{"id-stop", "id-err", "id-created"}
+	if len(*deleted) != len(want) {
+		t.Fatalf("deleted = %v, want %v", *deleted, want)
+	}
+	for i, id := range want {
+		if (*deleted)[i] != id {
+			t.Errorf("deleted[%d] = %q, want %q", i, (*deleted)[i], id)
+		}
+	}
+	for _, want := range []string{"stale", "broken", "fresh"} {
+		if !strings.Contains(stdout.String(), "deleted "+want) {
+			t.Errorf("output missing 'deleted %s':\n%s", want, stdout.String())
+		}
+	}
+	if strings.Contains(stdout.String(), "deleted keeper") {
+		t.Errorf("running VM should not be deleted:\n%s", stdout.String())
+	}
+}
+
+func TestRunVMPruneForceSkipsPrompt(t *testing.T) {
+	d := defaultDeps()
+	deleted := stubPruneSeams(t, d, []model.VMRecord{
+		{ID: "id-1", Name: "stale", State: model.VMStateStopped},
+	}, nil, nil)
+
+	// Empty stdin + force=true: must not block on prompt.
+	cmd, stdout, _ := newPruneTestCmd("")
+	if err := d.runVMPrune(cmd, "sock", true); err != nil {
+		t.Fatalf("d.runVMPrune: %v", err)
+	}
+	if len(*deleted) != 1 || (*deleted)[0] != "id-1" {
+		t.Errorf("deleted = %v, want [id-1]", *deleted)
+	}
+	// Prompt should not appear in output.
+	if strings.Contains(stdout.String(), "Delete these VMs?") {
+		t.Errorf("force=true should skip prompt:\n%s", stdout.String())
+	}
+}
+
+func TestRunVMPruneReportsPartialFailure(t *testing.T) {
+	d := defaultDeps()
+	stubPruneSeams(t, d,
+		[]model.VMRecord{
+			{ID: "id-a", Name: "a", State: model.VMStateStopped},
+			{ID: "id-b", Name: "b", State: model.VMStateStopped},
+		},
+		nil,
+		map[string]error{"id-a": errors.New("simulated")},
+	)
+
+	cmd, _, stderr := newPruneTestCmd("")
+	err := d.runVMPrune(cmd, "sock", true)
+	if err == nil {
+		t.Fatal("expected non-zero exit when any delete fails")
+	}
+	if !strings.Contains(err.Error(), "1 VM(s) failed") {
+		t.Errorf("unexpected error: %v", err)
+	}
+	if !strings.Contains(stderr.String(), "delete a:") {
+		t.Errorf("stderr missing failure log: %q", stderr.String())
+	}
+}
+
+func TestRunVMPruneListErrorPropagates(t *testing.T) {
+	d := defaultDeps()
+	stubPruneSeams(t, d, nil, fmt.Errorf("rpc failed"), nil)
+
+	cmd, _, _ := newPruneTestCmd("")
+	err := d.runVMPrune(cmd, "sock", true)
+	if err == nil || !strings.Contains(err.Error(), "rpc failed") {
+		t.Fatalf("expected rpc error to propagate, got %v", err)
+	}
+}
--- a/internal/cli/ssh.go
+++ b/internal/cli/ssh.go
@ -0,0 +1,138 @@
+package cli
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"io"
+	"os/exec"
+	"strings"
+	"time"
+
+	"banger/internal/model"
+	"banger/internal/paths"
+	"banger/internal/system"
+	"banger/internal/vsockagent"
+)
+
+// runSSHSession executes ssh with the given args. On exit it decides
+// whether to print the "vm is still running" reminder: we skip it if
+// the caller asked (e.g. --rm is about to delete the VM), if the
+// ctx is already done, or if the ssh error isn't the one that
+// typically means "user disconnected cleanly".
+func (d *deps) runSSHSession(ctx context.Context, socketPath, vmRef string, stdin io.Reader, stdout, stderr io.Writer, sshArgs []string, skipReminder bool) error {
+	sshErr := d.sshExec(ctx, stdin, stdout, stderr, sshArgs)
+	if skipReminder || !shouldCheckSSHReminder(sshErr) || ctx.Err() != nil {
+		return sshErr
+	}
+	pingCtx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
+	defer cancel()
+	health, err := d.vmHealth(pingCtx, socketPath, vmRef)
+	if err != nil {
+		_, _ = fmt.Fprintln(stderr, vsockagent.WarningMessage(vmRef, err))
+		return sshErr
+	}
+	if health.Healthy {
+		name := health.Name
+		if strings.TrimSpace(name) == "" {
+			name = vmRef
+		}
+		_, _ = fmt.Fprintln(stderr, vsockagent.ReminderMessage(name))
+	}
+	return sshErr
+}
+
+func shouldCheckSSHReminder(err error) bool {
+	if err == nil {
+		return true
+	}
+	var exitErr *exec.ExitError
+	if !errors.As(err, &exitErr) {
+		return false
+	}
+	return exitErr.ExitCode() != 255
+}
+
+// sshCommandArgs builds the argv for `ssh` invocations against a VM.
+// Host-key verification uses a banger-owned known_hosts file
+// populated by the daemon's first successful Go-SSH dial to each VM
+// (trust-on-first-use). `accept-new` means: accept-and-pin on first
+// contact; strict-verify afterwards. The user's own
+// ~/.ssh/known_hosts is never touched.
+func sshCommandArgs(cfg model.DaemonConfig, guestIP string, extra []string) ([]string, error) {
+	if guestIP == "" {
+		return nil, errors.New("vm has no guest IP")
+	}
+	args := []string{}
+	args = append(args, "-F", "/dev/null")
+	if cfg.SSHKeyPath != "" {
+		args = append(args, "-i", cfg.SSHKeyPath)
+	}
+	knownHosts, khErr := bangerKnownHostsPath()
+	args = append(
+		args,
+		"-o", "IdentitiesOnly=yes",
+		"-o", "BatchMode=yes",
+		"-o", "PreferredAuthentications=publickey",
+		"-o", "PasswordAuthentication=no",
+		"-o", "KbdInteractiveAuthentication=no",
+	)
+	if khErr == nil {
+		args = append(args,
+			"-o", "UserKnownHostsFile="+knownHosts,
+			"-o", "StrictHostKeyChecking=accept-new",
+		)
+	} else {
+		// If we can't resolve the banger path (unusual — paths.Resolve
+		// basically can't fail), fall through to a hard-fail posture
+		// rather than silently disabling verification.
+		args = append(args,
+			"-o", "StrictHostKeyChecking=yes",
+		)
+	}
+	args = append(args, "root@"+guestIP)
+	// ssh(1) concatenates every argument after the host with spaces
+	// before sending to the remote shell. That means passing extra
+	// args raw — `ssh host sh -c 'exit 42'` — re-tokenises on the
+	// remote side to `sh -c exit 42`, where `42` is $0 for the
+	// already-completed `exit`, and the rc the user asked for is
+	// lost. Shell-quote each element and join them ourselves so the
+	// remote shell sees exactly the argv the user typed locally.
+	if len(extra) > 0 {
+		quoted := make([]string, len(extra))
+		for i, a := range extra {
+			quoted[i] = shellQuote(a)
+		}
+		args = append(args, strings.Join(quoted, " "))
+	}
+	return args, nil
+}
+
+// bangerKnownHostsPath resolves the TOFU file the daemon writes into
+// and the CLI reads back. Both sides must agree on the path or the
+// pin doesn't round-trip.
+func bangerKnownHostsPath() (string, error) {
+	layout, err := paths.Resolve()
+	if err != nil {
+		return "", err
+	}
+	return layout.KnownHostsPath, nil
+}
+
+func validateSSHPrereqs(cfg model.DaemonConfig) error {
+	checks := system.NewPreflight()
+	checks.RequireCommand("ssh", "install openssh-client")
+	if strings.TrimSpace(cfg.SSHKeyPath) != "" {
+		checks.RequireFile(cfg.SSHKeyPath, "ssh private key", `set "ssh_key_path" or let banger create its default key`)
+	}
+	return checks.Err("ssh preflight failed")
+}
+
+func validateVMRunPrereqs(cfg model.DaemonConfig) error {
+	checks := system.NewPreflight()
+	checks.RequireCommand("git", "install git")
+	if strings.TrimSpace(cfg.SSHKeyPath) != "" {
+		checks.RequireFile(cfg.SSHKeyPath, "ssh private key", `set "ssh_key_path" or let banger create its default key`)
+	}
+	return checks.Err("vm run preflight failed")
+}
--- a/internal/cli/style/style.go
+++ b/internal/cli/style/style.go
@ -0,0 +1,70 @@
+// Package style provides a tiny, conservative ANSI-color helper for
+// banger's CLI. The contract:
+//
+//   - Each helper takes the writer the styled string is going to and
+//     returns either the wrapped string or the plain one.
+//   - "Wrapped" only happens when the writer is a TTY AND the
+//     NO_COLOR environment variable is unset.
+//   - No 256-color or truecolor; no theme system; no external dep.
+//
+// Banger's CLI uses these for status (pass/fail/warn), error
+// prefixes, and dim secondary text. Anything richer belongs in a
+// dedicated TUI layer that this package isn't.
+package style
+
+import (
+	"io"
+	"os"
+	"strings"
+)
+
+// ANSI escape sequences. Kept private — callers compose meaning via
+// the named helpers (Pass/Fail/Warn/...), not raw codes.
+const (
+	ansiReset = "\x1b[0m"
+	ansiBold  = "\x1b[1m"
+	ansiDim   = "\x1b[2m"
+	ansiRed   = "\x1b[31m"
+	ansiGreen = "\x1b[32m"
+	ansiYel   = "\x1b[33m"
+)
+
+// Pass wraps s in green when w is a TTY and NO_COLOR is unset.
+func Pass(w io.Writer, s string) string { return wrap(w, ansiGreen, s) }
+
+// Fail wraps s in red.
+func Fail(w io.Writer, s string) string { return wrap(w, ansiRed, s) }
+
+// Warn wraps s in yellow.
+func Warn(w io.Writer, s string) string { return wrap(w, ansiYel, s) }
+
+// Dim wraps s in dim.
+func Dim(w io.Writer, s string) string { return wrap(w, ansiDim, s) }
+
+// Bold wraps s in bold.
+func Bold(w io.Writer, s string) string { return wrap(w, ansiBold, s) }
+
+// SupportsColor reports whether colored output should be emitted to
+// w. Exposed so callers that build multi-segment strings can avoid
+// duplicating the gate per call.
+func SupportsColor(w io.Writer) bool {
+	if strings.TrimSpace(os.Getenv("NO_COLOR")) != "" {
+		return false
+	}
+	file, ok := w.(*os.File)
+	if !ok {
+		return false
+	}
+	info, err := file.Stat()
+	if err != nil {
+		return false
+	}
+	return info.Mode()&os.ModeCharDevice != 0
+}
+
+func wrap(w io.Writer, code, s string) string {
+	if !SupportsColor(w) {
+		return s
+	}
+	return code + s + ansiReset
+}
--- a/internal/cli/style/style_test.go
+++ b/internal/cli/style/style_test.go
@ -0,0 +1,64 @@
+package style
+
+import (
+	"bytes"
+	"os"
+	"strings"
+	"testing"
+)
+
+// TestStyleNoOpsForNonTTYWriter pins that styled helpers don't emit
+// ANSI escapes when the destination isn't a terminal. Buffers stand
+// in for any non-TTY writer (CI, redirected stdout, log files).
+func TestStyleNoOpsForNonTTYWriter(t *testing.T) {
+	var buf bytes.Buffer
+	cases := map[string]string{
+		"pass": Pass(&buf, "ok"),
+		"fail": Fail(&buf, "boom"),
+		"warn": Warn(&buf, "huh"),
+		"dim":  Dim(&buf, "sub"),
+		"bold": Bold(&buf, "bold"),
+	}
+	for label, got := range cases {
+		if strings.Contains(got, "\x1b[") {
+			t.Errorf("%s: contains ANSI escape on non-TTY writer: %q", label, got)
+		}
+	}
+}
+
+// TestStyleSuppressedByNoColor pins https://no-color.org compliance:
+// even on a "real" TTY, NO_COLOR forces plain output.
+func TestStyleSuppressedByNoColor(t *testing.T) {
+	t.Setenv("NO_COLOR", "1")
+	r, w, err := os.Pipe()
+	if err != nil {
+		t.Fatalf("Pipe: %v", err)
+	}
+	defer r.Close()
+	defer w.Close()
+	// w is a pipe end, not a char device — NO_COLOR is the dominant
+	// gate but verifying the helper still suppresses guards against
+	// a future TTY-detection regression that would otherwise need a
+	// pty harness to surface.
+	if got := Pass(w, "ok"); strings.Contains(got, "\x1b[") {
+		t.Errorf("NO_COLOR set but Pass() emitted ANSI: %q", got)
+	}
+	if got := Fail(w, "boom"); strings.Contains(got, "\x1b[") {
+		t.Errorf("NO_COLOR set but Fail() emitted ANSI: %q", got)
+	}
+}
+
+// TestSupportsColorRespectsNoColor confirms the gate function used
+// by the helpers. Required for callers that compose multi-segment
+// strings and want to ask once.
+func TestSupportsColorRespectsNoColor(t *testing.T) {
+	t.Setenv("NO_COLOR", "1")
+	tmp, err := os.CreateTemp(t.TempDir(), "style-*")
+	if err != nil {
+		t.Fatalf("CreateTemp: %v", err)
+	}
+	defer tmp.Close()
+	if SupportsColor(tmp) {
+		t.Fatal("SupportsColor returned true with NO_COLOR set")
+	}
+}
--- a/internal/cli/tui.go
+++ b/internal/cli/tui.go
--- a/internal/cli/tui_test.go
+++ b/internal/cli/tui_test.go
@ -1,396 +0,0 @@
-package cli
-
-import (
-	"context"
-	"errors"
-	"os"
-	"path/filepath"
-	"strings"
-	"testing"
-	"time"
-
-	"banger/internal/api"
-	"banger/internal/model"
-	"banger/internal/paths"
-
-	tea "github.com/charmbracelet/bubbletea"
-)
-
-func TestCreateVMFormSubmit(t *testing.T) {
-	form := newCreateVMForm([]model.Image{{Name: "default"}}, model.DaemonConfig{DefaultImageName: "default"})
-	form.fields[0].input.SetValue("devbox")
-	form.fields[2].input.SetValue("4")
-	form.fields[3].input.SetValue("2048")
-	form.fields[4].input.SetValue("12G")
-	form.fields[5].input.SetValue("24G")
-	form.fields[6].index = 1
-
-	action, err := form.submit()
-	if err != nil {
-		t.Fatalf("submit: %v", err)
-	}
-	if action.kind != actionCreate {
-		t.Fatalf("kind = %s, want %s", action.kind, actionCreate)
-	}
-	if action.create.Name != "devbox" || action.create.ImageName != "default" {
-		t.Fatalf("unexpected create params: %+v", action.create)
-	}
-	if action.create.VCPUCount == nil || *action.create.VCPUCount != 4 || action.create.MemoryMiB == nil || *action.create.MemoryMiB != 2048 {
-		t.Fatalf("unexpected cpu/memory: %+v", action.create)
-	}
-	if action.create.SystemOverlaySize != "12G" || action.create.WorkDiskSize != "24G" {
-		t.Fatalf("unexpected disk sizes: %+v", action.create)
-	}
-	if !action.create.NATEnabled {
-		t.Fatalf("expected NAT enabled: %+v", action.create)
-	}
-}
-
-func TestEditVMFormSubmit(t *testing.T) {
-	form := newEditVMForm(model.VMRecord{
-		ID: "vm-1",
-		Spec: model.VMSpec{
-			VCPUCount:         2,
-			MemoryMiB:         1024,
-			WorkDiskSizeBytes: 16 * 1024 * 1024 * 1024,
-			NATEnabled:        false,
-		},
-	})
-	form.fields[0].input.SetValue("6")
-	form.fields[1].input.SetValue("4096")
-	form.fields[2].input.SetValue("32G")
-	form.fields[3].index = 1
-
-	action, err := form.submit()
-	if err != nil {
-		t.Fatalf("submit: %v", err)
-	}
-	if action.kind != actionEdit {
-		t.Fatalf("kind = %s, want %s", action.kind, actionEdit)
-	}
-	if action.set.IDOrName != "vm-1" {
-		t.Fatalf("unexpected vm id: %+v", action.set)
-	}
-	if action.set.VCPUCount == nil || *action.set.VCPUCount != 6 {
-		t.Fatalf("unexpected vcpu: %+v", action.set)
-	}
-	if action.set.MemoryMiB == nil || *action.set.MemoryMiB != 4096 {
-		t.Fatalf("unexpected memory: %+v", action.set)
-	}
-	if action.set.WorkDiskSize != "32G" {
-		t.Fatalf("unexpected disk size: %+v", action.set)
-	}
-	if action.set.NATEnabled == nil || !*action.set.NATEnabled {
-		t.Fatalf("expected nat enabled: %+v", action.set)
-	}
-}
-
-func TestResolveSelectedID(t *testing.T) {
-	vms := []model.VMRecord{{ID: "one"}, {ID: "two"}}
-	if got := resolveSelectedID("two", vms); got != "two" {
-		t.Fatalf("resolveSelectedID existing = %q, want %q", got, "two")
-	}
-	if got := resolveSelectedID("missing", vms); got != "one" {
-		t.Fatalf("resolveSelectedID fallback = %q, want %q", got, "one")
-	}
-	if got := resolveSelectedID("anything", nil); got != "" {
-		t.Fatalf("resolveSelectedID empty = %q, want empty", got)
-	}
-}
-
-func TestNewTUICommandStartsProgramWithoutEnsuringDaemon(t *testing.T) {
-	origEnsure := tuiEnsureDaemonFunc
-	origRunner := tuiProgramRunner
-	origTerminal := tuiIsTerminal
-	t.Cleanup(func() {
-		tuiEnsureDaemonFunc = origEnsure
-		tuiProgramRunner = origRunner
-		tuiIsTerminal = origTerminal
-	})
-
-	ensureCalled := false
-	tuiEnsureDaemonFunc = func(ctx context.Context) (paths.Layout, model.DaemonConfig, error) {
-		ensureCalled = true
-		return paths.Layout{}, model.DaemonConfig{}, nil
-	}
-	tuiProgramRunner = func(model tuiModel) error {
-		if ensureCalled {
-			t.Fatal("ensureDaemon should not run before the TUI starts")
-		}
-		if !model.daemonPending || !model.loading {
-			t.Fatalf("startup model = %+v, want pending daemon startup", model)
-		}
-		return nil
-	}
-	tuiIsTerminal = func(fd uintptr) bool { return true }
-
-	cmd := NewBangerCommand()
-	cmd.SetArgs([]string{"tui"})
-	if err := cmd.Execute(); err != nil {
-		t.Fatalf("Execute: %v", err)
-	}
-	if ensureCalled {
-		t.Fatal("ensureDaemon should not have been called")
-	}
-}
-
-func TestTUIViewRendersLayoutImmediately(t *testing.T) {
-	m := newTUIModel(paths.Layout{}, model.DaemonConfig{})
-	view := m.View()
-	if strings.Contains(view, "Loading...") {
-		t.Fatalf("view = %q, want full layout instead of one-line loading", view)
-	}
-	if !strings.Contains(view, "Starting daemon") {
-		t.Fatalf("view = %q, want startup placeholder", view)
-	}
-}
-
-func TestTUIVMLoadCanCompleteBeforeImages(t *testing.T) {
-	now := time.Date(2026, time.March, 18, 12, 0, 0, 0, time.UTC)
-	initial := newTUIModel(paths.Layout{}, model.DaemonConfig{})
-
-	updated, _ := initial.Update(daemonReadyMsg{
-		generation: initial.loadGeneration,
-		layout:     paths.Layout{SocketPath: "/tmp/bangerd.sock"},
-		cfg:        model.DaemonConfig{DefaultImageName: "default"},
-		duration:   2400 * time.Millisecond,
-	})
-	m := updated.(tuiModel)
-	if !m.daemonReady || !m.vmListPending || !m.imagePending {
-		t.Fatalf("model after daemonReady = %+v, want pending vm/image loads", m)
-	}
-
-	vm := model.VMRecord{
-		ID:            "vm-1",
-		Name:          "devbox",
-		State:         model.VMStateRunning,
-		CreatedAt:     now,
-		UpdatedAt:     now,
-		LastTouchedAt: now,
-		Spec: model.VMSpec{
-			VCPUCount:         2,
-			MemoryMiB:         1024,
-			WorkDiskSizeBytes: 16 * 1024 * 1024 * 1024,
-		},
-		Runtime: model.VMRuntime{
-			GuestIP: "172.16.0.2",
-			DNSName: "devbox.vm",
-		},
-	}
-	updated, _ = m.Update(vmListLoadedMsg{
-		generation: m.loadGeneration,
-		vms:        []model.VMRecord{vm},
-		duration:   20 * time.Millisecond,
-	})
-	m = updated.(tuiModel)
-	if len(m.vms) != 1 || m.selectedID != vm.ID {
-		t.Fatalf("model after vmListLoaded = %+v, want selected vm", m)
-	}
-	if !m.imagePending {
-		t.Fatalf("image load should still be pending: %+v", m)
-	}
-	if strings.Contains(m.View(), "No VMs") {
-		t.Fatalf("view should render the loaded VM while images are pending: %q", m.View())
-	}
-	if !strings.Contains(m.View(), "devbox") {
-		t.Fatalf("view = %q, want loaded VM name", m.View())
-	}
-}
-
-func TestTUICreateBlockedWhileImagesLoad(t *testing.T) {
-	m := newTUIModel(paths.Layout{}, model.DaemonConfig{})
-	m.daemonPending = false
-	m.daemonReady = true
-	m.imagePending = true
-	m.loading = true
-
-	updated, _ := m.updateBrowse(tea.KeyMsg{Type: tea.KeyRunes, Runes: []rune{'c'}})
-	if updated.mode != tuiModeBrowse {
-		t.Fatalf("mode = %v, want browse", updated.mode)
-	}
-	if updated.statusText != "Images are still loading" {
-		t.Fatalf("status = %q, want image loading warning", updated.statusText)
-	}
-}
-
-func TestTUIStatusIncludesStageDurationsAfterInitialLoad(t *testing.T) {
-	initial := newTUIModel(paths.Layout{}, model.DaemonConfig{})
-	updated, _ := initial.Update(daemonReadyMsg{
-		generation: initial.loadGeneration,
-		layout:     paths.Layout{SocketPath: "/tmp/bangerd.sock"},
-		duration:   2400 * time.Millisecond,
-	})
-	m := updated.(tuiModel)
-	updated, _ = m.Update(vmListLoadedMsg{
-		generation: m.loadGeneration,
-		vms:        []model.VMRecord{},
-		duration:   20 * time.Millisecond,
-	})
-	m = updated.(tuiModel)
-	updated, _ = m.Update(imageListLoadedMsg{
-		generation: m.loadGeneration,
-		images:     []model.Image{{Name: "default"}},
-		duration:   15 * time.Millisecond,
-	})
-	m = updated.(tuiModel)
-	if !strings.Contains(m.statusText, "daemon 2.4s") || !strings.Contains(m.statusText, "vm list 20ms") || !strings.Contains(m.statusText, "image list 15ms") {
-		t.Fatalf("statusText = %q, want stage timings", m.statusText)
-	}
-}
-
-func TestSSHDoneMsgShowsReminderWhenHealthCheckPasses(t *testing.T) {
-	origHealth := vmHealthFunc
-	t.Cleanup(func() {
-		vmHealthFunc = origHealth
-	})
-	vmHealthFunc = func(ctx context.Context, socketPath, idOrName string) (api.VMHealthResult, error) {
-		return api.VMHealthResult{Name: "devbox", Healthy: true}, nil
-	}
-
-	msg := sshDoneMsg(paths.Layout{SocketPath: "/tmp/bangerd.sock"}, actionRequest{id: "devbox", name: "devbox"}, "devbox", nil)
-	result, ok := msg.(actionResultMsg)
-	if !ok {
-		t.Fatalf("msg = %T, want actionResultMsg", msg)
-	}
-	if !strings.Contains(result.status, "devbox is still running") {
-		t.Fatalf("status = %q, want reminder", result.status)
-	}
-}
-
-func TestSSHDoneMsgShowsWarningWhenHealthCheckFails(t *testing.T) {
-	origHealth := vmHealthFunc
-	t.Cleanup(func() {
-		vmHealthFunc = origHealth
-	})
-	vmHealthFunc = func(ctx context.Context, socketPath, idOrName string) (api.VMHealthResult, error) {
-		return api.VMHealthResult{}, errors.New("dial failed")
-	}
-
-	msg := sshDoneMsg(paths.Layout{SocketPath: "/tmp/bangerd.sock"}, actionRequest{id: "devbox", name: "devbox"}, "devbox", nil)
-	result := msg.(actionResultMsg)
-	if !strings.Contains(result.status, "failed to check whether devbox is still running") {
-		t.Fatalf("status = %q, want warning", result.status)
-	}
-}
-
-func TestAggregateRunningVMResources(t *testing.T) {
-	t.Parallel()
-
-	running, vcpus, memoryBytes := aggregateRunningVMResources([]model.VMRecord{
-		{
-			State: model.VMStateRunning,
-			Spec: model.VMSpec{
-				VCPUCount: 2,
-				MemoryMiB: 1024,
-			},
-		},
-		{
-			State: model.VMStateStopped,
-			Spec: model.VMSpec{
-				VCPUCount: 8,
-				MemoryMiB: 8192,
-			},
-		},
-		{
-			State: model.VMStateRunning,
-			Spec: model.VMSpec{
-				VCPUCount: 4,
-				MemoryMiB: 2048,
-			},
-		},
-	})
-
-	if running != 2 || vcpus != 6 || memoryBytes != 3*1024*1024*1024 {
-		t.Fatalf("aggregateRunningVMResources = (%d, %d, %d), want (2, 6, %d)", running, vcpus, memoryBytes, int64(3*1024*1024*1024))
-	}
-}
-
-func TestTUIViewShowsResourceBar(t *testing.T) {
-	t.Parallel()
-
-	m := newTUIModel(paths.Layout{}, model.DaemonConfig{})
-	m.hostCPUCount = 32
-	m.hostMemoryBytes = 125 * 1024 * 1024 * 1024
-	m.hostDiskBytes = 200 * 1024 * 1024 * 1024
-	m.daemonPending = false
-	m.loading = false
-	stateDir := t.TempDir()
-	overlayPath := filepath.Join(stateDir, "system.cow")
-	workDiskPath := filepath.Join(stateDir, "root.ext4")
-	if err := os.WriteFile(overlayPath, make([]byte, 1024), 0o644); err != nil {
-		t.Fatalf("WriteFile overlay: %v", err)
-	}
-	if err := os.WriteFile(workDiskPath, make([]byte, 2048), 0o644); err != nil {
-		t.Fatalf("WriteFile work disk: %v", err)
-	}
-	m.vms = []model.VMRecord{
-		{
-			ID:    "vm-1",
-			Name:  "devbox",
-			State: model.VMStateRunning,
-			Spec: model.VMSpec{
-				VCPUCount:         2,
-				MemoryMiB:         1024,
-				WorkDiskSizeBytes: 16 * 1024 * 1024 * 1024,
-			},
-			Runtime: model.VMRuntime{
-				SystemOverlay: overlayPath,
-				WorkDiskPath:  workDiskPath,
-			},
-		},
-		{
-			ID:    "vm-2",
-			Name:  "db",
-			State: model.VMStateStopped,
-			Spec: model.VMSpec{
-				VCPUCount:         4,
-				MemoryMiB:         4096,
-				WorkDiskSizeBytes: 32 * 1024 * 1024 * 1024,
-			},
-		},
-	}
-	m.selectedID = "vm-1"
-	m.rebuildTable()
-	m.refreshDetail()
-
-	view := m.View()
-	if !strings.Contains(view, "VMs") || !strings.Contains(view, "1/2") {
-		t.Fatalf("view = %q, want running VM count", view)
-	}
-	if !strings.Contains(view, "CPU") || !strings.Contains(view, "2/32") {
-		t.Fatalf("view = %q, want vcpu aggregate", view)
-	}
-	if !strings.Contains(view, "RAM") || !strings.Contains(view, "1.0G/125.0G") {
-		t.Fatalf("view = %q, want memory aggregate", view)
-	}
-	if !strings.Contains(view, "Disk") {
-		t.Fatalf("view = %q, want disk aggregate", view)
-	}
-	if !strings.Contains(view, "█") || !strings.Contains(view, "░") {
-		t.Fatalf("view = %q, want visual progress bars", view)
-	}
-}
-
-func TestAggregateVMDiskUsage(t *testing.T) {
-	t.Parallel()
-
-	dir := t.TempDir()
-	overlayPath := filepath.Join(dir, "system.cow")
-	workDiskPath := filepath.Join(dir, "root.ext4")
-	if err := os.WriteFile(overlayPath, make([]byte, 4096), 0o644); err != nil {
-		t.Fatalf("WriteFile overlay: %v", err)
-	}
-	if err := os.WriteFile(workDiskPath, make([]byte, 8192), 0o644); err != nil {
-		t.Fatalf("WriteFile work disk: %v", err)
-	}
-
-	total := aggregateVMDiskUsage([]model.VMRecord{{
-		Runtime: model.VMRuntime{
-			SystemOverlay: overlayPath,
-			WorkDiskPath:  workDiskPath,
-		},
-	}})
-	if total <= 0 {
-		t.Fatalf("aggregateVMDiskUsage = %d, want positive allocated bytes", total)
-	}
-}
--- a/internal/cli/vm_create.go
+++ b/internal/cli/vm_create.go
@ -0,0 +1,330 @@
+package cli
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"strings"
+	"time"
+
+	"banger/internal/api"
+	"banger/internal/cli/style"
+	"banger/internal/config"
+	"banger/internal/model"
+	"banger/internal/paths"
+	"banger/internal/system"
+)
+
+// effectiveVMDefaults resolves the default VM sizing applied when
+// --vcpu/--memory/--disk-size aren't given: config overrides win
+// over host-derived heuristics, both fall back to baked-in
+// constants. Called at command-build time so the cobra flag defaults
+// reflect the resolved values.
+func effectiveVMDefaults() model.VMDefaults {
+	var override model.VMDefaultsOverride
+	if layout, err := paths.Resolve(); err == nil {
+		if cfg, err := config.Load(layout); err == nil {
+			override = cfg.VMDefaults
+		}
+	}
+	host, err := system.ReadHostResources()
+	if err != nil {
+		return model.ResolveVMDefaults(override, 0, 0)
+	}
+	return model.ResolveVMDefaults(override, host.CPUCount, host.TotalMemoryBytes)
+}
+
+// printVMSpecLine writes a one-line sizing summary to out. Always
+// emitted (even non-TTY) so logs and CI output carry the numbers.
+func printVMSpecLine(out io.Writer, params api.VMCreateParams) {
+	vcpu := model.DefaultVCPUCount
+	if params.VCPUCount != nil {
+		vcpu = *params.VCPUCount
+	}
+	memory := model.DefaultMemoryMiB
+	if params.MemoryMiB != nil {
+		memory = *params.MemoryMiB
+	}
+	diskBytes := int64(model.DefaultWorkDiskSize)
+	if strings.TrimSpace(params.WorkDiskSize) != "" {
+		if parsed, err := model.ParseSize(params.WorkDiskSize); err == nil {
+			diskBytes = parsed
+		}
+	}
+	_, _ = fmt.Fprintf(out, "spec: %d vcpu | %d MiB | %s disk\n",
+		vcpu, memory, model.FormatSizeBytes(diskBytes))
+}
+
+// runVMCreate drives the create RPC + polls for progress. stderr
+// gets the spec line up front and the progress renderer thereafter.
+// On context cancel we cooperate with the daemon to cancel the
+// in-flight op so it doesn't leak partially-created VM state.
+func (d *deps) runVMCreate(ctx context.Context, socketPath string, stderr io.Writer, params api.VMCreateParams, verbose bool) (model.VMRecord, error) {
+	start := time.Now()
+	printVMSpecLine(stderr, params)
+	begin, err := d.vmCreateBegin(ctx, socketPath, params)
+	if err != nil {
+		return model.VMRecord{}, err
+	}
+	renderer := newVMCreateProgressRenderer(stderr, verbose)
+	renderer.render(begin.Operation)
+
+	op := begin.Operation
+	for {
+		if op.Done {
+			renderer.render(op)
+			if op.Success && op.VM != nil {
+				renderer.clear()
+				elapsed := formatVMCreateElapsed(time.Since(start))
+				_, _ = fmt.Fprintf(stderr, "[vm create] ready in %s\n", style.Dim(stderr, elapsed))
+				return *op.VM, nil
+			}
+			if strings.TrimSpace(op.Error) == "" {
+				return model.VMRecord{}, errors.New("vm create failed")
+			}
+			return model.VMRecord{}, errors.New(op.Error)
+		}
+
+		select {
+		case <-ctx.Done():
+			cancelCtx, cancel := context.WithTimeout(context.Background(), time.Second)
+			defer cancel()
+			_ = d.vmCreateCancel(cancelCtx, socketPath, op.ID)
+			return model.VMRecord{}, ctx.Err()
+		case <-time.After(200 * time.Millisecond):
+		}
+
+		status, err := d.vmCreateStatus(ctx, socketPath, op.ID)
+		if err != nil {
+			if ctx.Err() != nil {
+				cancelCtx, cancel := context.WithTimeout(context.Background(), time.Second)
+				defer cancel()
+				_ = d.vmCreateCancel(cancelCtx, socketPath, op.ID)
+				return model.VMRecord{}, ctx.Err()
+			}
+			return model.VMRecord{}, err
+		}
+		op = status.Operation
+		renderer.render(op)
+	}
+}
+
+type vmCreateProgressRenderer struct {
+	out      io.Writer
+	enabled  bool
+	inline   bool
+	active   bool
+	lastLine string
+}
+
+// newVMCreateProgressRenderer wires up progress for `vm create`. On
+// non-TTY writers it stays disabled (CI/test logs already capture the
+// spec + ready lines); on TTY it rewrites a single line via \r unless
+// verbose is set or BANGER_NO_PROGRESS is exported, in which case it
+// falls back to one line per stage.
+func newVMCreateProgressRenderer(out io.Writer, verbose bool) *vmCreateProgressRenderer {
+	tty := writerSupportsProgress(out)
+	return &vmCreateProgressRenderer{
+		out:     out,
+		enabled: tty,
+		inline:  tty && !verbose && !progressDisabledByEnv(),
+	}
+}
+
+func (r *vmCreateProgressRenderer) render(op api.VMCreateOperation) {
+	if r == nil || !r.enabled {
+		return
+	}
+	line := formatVMCreateProgress(op)
+	if line == "" || line == r.lastLine {
+		return
+	}
+	r.lastLine = line
+	if r.inline {
+		_, _ = fmt.Fprint(r.out, "\r\x1b[K", line)
+		r.active = true
+		return
+	}
+	_, _ = fmt.Fprintln(r.out, line)
+}
+
+// clear resets the live inline line so the caller can write a clean
+// terminating message. No-op outside inline mode.
+func (r *vmCreateProgressRenderer) clear() {
+	if r == nil || !r.enabled || !r.inline || !r.active {
+		return
+	}
+	_, _ = fmt.Fprint(r.out, "\r\x1b[K")
+	r.active = false
+	r.lastLine = ""
+}
+
+// progressDisabledByEnv is the BANGER_NO_PROGRESS escape hatch — a
+// non-empty value forces line-per-stage output even on a TTY, so users
+// can pipe `script(1)` / tmux capture without \r artifacts.
+func progressDisabledByEnv() bool {
+	return strings.TrimSpace(os.Getenv("BANGER_NO_PROGRESS")) != ""
+}
+
+// writerSupportsProgress returns true only when out is a terminal.
+// Keeps stage lines + heartbeat dots out of piped / logged output
+// where they'd just be noise.
+func writerSupportsProgress(out io.Writer) bool {
+	file, ok := out.(*os.File)
+	if !ok {
+		return false
+	}
+	info, err := file.Stat()
+	if err != nil {
+		return false
+	}
+	return info.Mode()&os.ModeCharDevice != 0
+}
+
+// withHeartbeat runs fn while emitting a dot to stderr every 2
+// seconds so the user sees long-running RPCs (bundle downloads, etc.)
+// aren't wedged. No-op when stderr isn't a terminal, so piped or
+// logged output stays clean.
+func withHeartbeat(stderr io.Writer, label string, fn func() error) error {
+	if !writerSupportsProgress(stderr) {
+		return fn()
+	}
+	fmt.Fprintf(stderr, "[%s] ", label)
+	stop := make(chan struct{})
+	done := make(chan struct{})
+	go func() {
+		defer close(done)
+		ticker := time.NewTicker(2 * time.Second)
+		defer ticker.Stop()
+		for {
+			select {
+			case <-stop:
+				return
+			case <-ticker.C:
+				fmt.Fprint(stderr, ".")
+			}
+		}
+	}()
+	err := fn()
+	close(stop)
+	<-done
+	fmt.Fprintln(stderr)
+	return err
+}
+
+func formatVMCreateProgress(op api.VMCreateOperation) string {
+	stage := strings.TrimSpace(op.Stage)
+	detail := strings.TrimSpace(op.Detail)
+	label := vmCreateStageLabel(stage)
+	if label == "" && detail == "" {
+		return ""
+	}
+	if label == "" {
+		return "[vm create] " + detail
+	}
+	if detail == "" {
+		return "[vm create] " + label
+	}
+	return "[vm create] " + label + ": " + detail
+}
+
+// vmCreateStageLabel humanises the daemon-side stage IDs. Anything
+// unknown falls through to `strings.ReplaceAll(_, "_", " ")` so new
+// stages still render meaningfully without a code change.
+func vmCreateStageLabel(stage string) string {
+	switch strings.TrimSpace(stage) {
+	case "queued":
+		return "queued"
+	case "resolve_image":
+		return "resolving image"
+	case "reserve_vm":
+		return "allocating vm"
+	case "preflight":
+		return "checking host prerequisites"
+	case "prepare_rootfs":
+		return "preparing root filesystem"
+	case "prepare_host_features":
+		return "preparing host features"
+	case "prepare_work_disk":
+		return "preparing work disk"
+	case "boot_firecracker":
+		return "starting firecracker"
+	case "wait_vsock_agent":
+		return "waiting for vsock agent"
+	case "wait_guest_ready":
+		return "waiting for guest services"
+	case "apply_dns":
+		return "publishing dns"
+	case "apply_nat":
+		return "configuring nat"
+	case "finalize":
+		return "finalizing"
+	case "ready":
+		return "ready"
+	default:
+		return strings.ReplaceAll(stage, "_", " ")
+	}
+}
+
+// formatVMCreateElapsed renders a wall-clock duration as a friendly
+// "ready in 4.7s" / "ready in 1m02s" string. Sub-second durations
+// keep one decimal so quick smoke runs don't print "0s".
+func formatVMCreateElapsed(d time.Duration) string {
+	if d < time.Second {
+		return fmt.Sprintf("%dms", d.Milliseconds())
+	}
+	if d < time.Minute {
+		return fmt.Sprintf("%.1fs", d.Seconds())
+	}
+	d = d.Round(time.Second)
+	minutes := int(d / time.Minute)
+	seconds := int((d % time.Minute) / time.Second)
+	return fmt.Sprintf("%dm%02ds", minutes, seconds)
+}
+
+func validatePositiveSetting(label string, value int) error {
+	if value <= 0 {
+		return fmt.Errorf("%s must be a positive integer", label)
+	}
+	return nil
+}
+
+// shortID and relativeTime are small display helpers used across
+// every printer; kept here alongside the other render-time helpers.
+func shortID(id string) string {
+	if len(id) <= 12 {
+		return id
+	}
+	return id[:12]
+}
+
+func relativeTime(t time.Time) string {
+	if t.IsZero() {
+		return "-"
+	}
+	delta := time.Since(t)
+	switch {
+	case delta < 30*time.Second:
+		return "moments ago"
+	case delta < time.Minute:
+		return fmt.Sprintf("%d seconds ago", int(delta.Seconds()))
+	case delta < 2*time.Minute:
+		return "1 minute ago"
+	case delta < time.Hour:
+		return fmt.Sprintf("%d minutes ago", int(delta.Minutes()))
+	case delta < 2*time.Hour:
+		return "1 hour ago"
+	case delta < 24*time.Hour:
+		return fmt.Sprintf("%d hours ago", int(delta.Hours()))
+	case delta < 48*time.Hour:
+		return "1 day ago"
+	case delta < 7*24*time.Hour:
+		return fmt.Sprintf("%d days ago", int(delta.Hours()/24))
+	case delta < 14*24*time.Hour:
+		return "1 week ago"
+	default:
+		return fmt.Sprintf("%d weeks ago", int(delta.Hours()/(24*7)))
+	}
+}
--- a/internal/cli/vm_exec.go
+++ b/internal/cli/vm_exec.go
@ -0,0 +1,192 @@
+package cli
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"os/exec"
+	"strings"
+
+	"banger/internal/api"
+	"banger/internal/model"
+	"banger/internal/rpc"
+
+	"github.com/spf13/cobra"
+)
+
+func (d *deps) newVMExecCommand() *cobra.Command {
+	var guestPath string
+	var autoPrepare bool
+	cmd := &cobra.Command{
+		Use:   "exec <id-or-name> -- <command> [args...]",
+		Short: "Run a command in the VM workspace with the repo toolchain",
+		Long: strings.TrimSpace(`
+Run a command inside a persistent VM, wrapping it with 'mise exec' so
+all mise-managed tools (Go, Node, Python, etc.) are on PATH.
+
+If the VM has a prepared workspace (from 'vm workspace prepare' or
+'vm run ./repo'), the command runs from that directory and a stale-
+workspace warning is printed when the host repo has advanced since the
+last prepare; pass --auto-prepare to re-sync first. Otherwise the
+command runs from root's home directory. --guest-path overrides both.
+
+Exit code of the guest command is propagated verbatim.
+`),
+		Example: strings.TrimSpace(`
+  banger vm exec dev -- make test
+  banger vm exec dev -- go build ./...
+  banger vm exec dev --auto-prepare -- npm ci && npm test
+  banger vm exec dev --guest-path /root/other -- make lint
+`),
+		Args: cobra.ArbitraryArgs,
+		RunE: func(cmd *cobra.Command, args []string) error {
+			// Split on -- : everything before is [vm-name], everything after is the command.
+			dash := cmd.ArgsLenAtDash()
+			var vmRef string
+			var command []string
+			switch {
+			case dash < 0:
+				// No -- separator: first arg is VM, rest is command.
+				if len(args) < 2 {
+					return errors.New("usage: banger vm exec <id-or-name> -- <command> [args...]")
+				}
+				vmRef = args[0]
+				command = args[1:]
+			case dash == 0 || len(args[dash:]) == 0:
+				return errors.New("usage: banger vm exec <id-or-name> -- <command> [args...]")
+			default:
+				vmRef = args[:dash][0]
+				command = args[dash:]
+			}
+
+			layout, cfg, err := d.ensureDaemon(cmd.Context())
+			if err != nil {
+				return err
+			}
+			if err := validateSSHPrereqs(cfg); err != nil {
+				return err
+			}
+
+			// Fetch the full VM record — we need Workspace and GuestIP.
+			result, err := rpc.Call[api.VMShowResult](cmd.Context(), layout.SocketPath, "vm.show", api.VMRefParams{IDOrName: vmRef})
+			if err != nil {
+				return err
+			}
+			vm := result.VM
+			if vm.State != model.VMStateRunning {
+				return fmt.Errorf("vm %q is not running (state: %s)", vm.Name, vm.State)
+			}
+
+			// Resolve effective guest workspace path. Empty means "no
+			// cd": run from the SSH session's default cwd ($HOME). We
+			// only auto-cd when the user explicitly passed --guest-path
+			// or the VM actually has a recorded workspace — otherwise
+			// arbitrary VMs (no repo) would fail with cd errors.
+			execGuestPath := strings.TrimSpace(guestPath)
+			if execGuestPath == "" {
+				execGuestPath = strings.TrimSpace(vm.Workspace.GuestPath)
+			}
+
+			// Dirty-workspace check: compare stored HEAD with current host HEAD.
+			isDirty, currentHead, _ := d.vmExecDirtyCheck(cmd.Context(), vm.Workspace)
+			if isDirty {
+				storedShort := shortRef(vm.Workspace.HeadCommit)
+				currentShort := shortRef(currentHead)
+				preparedLabel := relativeTime(vm.Workspace.PreparedAt)
+
+				if autoPrepare && vm.Workspace.SourcePath != "" {
+					_, _ = fmt.Fprintf(cmd.ErrOrStderr(),
+						"[vm exec] workspace stale (prepared %s from %s, host HEAD now %s) — re-preparing\n",
+						preparedLabel, storedShort, currentShort)
+					if err := validateVMRunPrereqs(cfg); err != nil {
+						return err
+					}
+					if _, err := d.vmWorkspacePrepare(cmd.Context(), layout.SocketPath, api.VMWorkspacePrepareParams{
+						IDOrName:   vmRef,
+						SourcePath: vm.Workspace.SourcePath,
+						GuestPath:  execGuestPath,
+						Mode:       string(model.WorkspacePrepareModeShallowOverlay),
+					}); err != nil {
+						return fmt.Errorf("auto-prepare workspace: %w", err)
+					}
+				} else {
+					_, _ = fmt.Fprintf(cmd.ErrOrStderr(),
+						"[vm exec] warning: workspace stale (prepared %s from %s, host HEAD now %s) — use --auto-prepare to re-sync\n",
+						preparedLabel, storedShort, currentShort)
+				}
+			}
+
+			// Build and run the exec script.
+			script := buildVMExecScript(execGuestPath, command)
+			sshArgs, err := sshCommandArgs(cfg, vm.Runtime.GuestIP, []string{"bash", "-lc", script})
+			if err != nil {
+				return fmt.Errorf("vm %q: build ssh args: %w", vm.Name, err)
+			}
+			if err := d.sshExec(cmd.Context(), cmd.InOrStdin(), cmd.OutOrStdout(), cmd.ErrOrStderr(), sshArgs); err != nil {
+				var exitErr *exec.ExitError
+				if errors.As(err, &exitErr) {
+					return ExitCodeError{Code: exitErr.ExitCode()}
+				}
+				return err
+			}
+			return nil
+		},
+	}
+	cmd.Flags().StringVar(&guestPath, "guest-path", "", "workspace directory in the guest (default: from last workspace prepare; otherwise root's home)")
+	cmd.Flags().BoolVar(&autoPrepare, "auto-prepare", false, "re-sync the workspace from the host repo before running if it's stale")
+	_ = cmd.RegisterFlagCompletionFunc("guest-path", cobra.NoFileCompletions)
+	return cmd
+}
+
+// buildVMExecScript returns the bash -lc argument that runs the
+// command through mise exec when mise is available, falling back to a
+// plain exec if it's not. When guestPath is non-empty, the script
+// cd's into it first (workspace mode); when empty, the command runs
+// from the SSH session's default cwd so VMs without a prepared
+// workspace don't blow up on a non-existent /root/repo. Each command
+// argument is shell-quoted so spaces and special characters survive
+// the bash re-parse inside the -lc string.
+func buildVMExecScript(guestPath string, command []string) string {
+	parts := make([]string, len(command))
+	for i, a := range command {
+		parts[i] = shellQuote(a)
+	}
+	quotedCmd := strings.Join(parts, " ")
+	body := fmt.Sprintf(
+		"if command -v mise >/dev/null 2>&1; then mise exec -- %s; else %s; fi",
+		quotedCmd,
+		quotedCmd,
+	)
+	if guestPath == "" {
+		return body
+	}
+	return fmt.Sprintf("cd %s && %s", shellQuote(guestPath), body)
+}
+
+// vmExecDirtyCheck compares the HEAD commit stored in the VM's
+// workspace record against the current HEAD of the host repo. Returns
+// (false, "", nil) when the check can't be performed (no workspace
+// recorded, path gone, not a repo, git not installed) so callers
+// treat unknown as "not dirty" rather than blocking the exec.
+func (d *deps) vmExecDirtyCheck(ctx context.Context, ws model.VMWorkspace) (isDirty bool, currentHead string, err error) {
+	if ws.SourcePath == "" || ws.HeadCommit == "" {
+		return false, "", nil
+	}
+	out, err := d.hostCommandOutput(ctx, "git", "-C", ws.SourcePath, "rev-parse", "HEAD")
+	if err != nil {
+		// Source path gone, not a git repo, or git not installed —
+		// treat as unknown rather than blocking.
+		return false, "", nil
+	}
+	currentHead = strings.TrimSpace(string(out))
+	return currentHead != ws.HeadCommit, currentHead, nil
+}
+
+// shortRef returns the first 8 characters of a git ref / commit SHA
+// for display. Returns the full string if it's already short.
+func shortRef(ref string) string {
+	if len(ref) > 8 {
+		return ref[:8]
+	}
+	return ref
+}
--- a/internal/cli/vm_exec_test.go
+++ b/internal/cli/vm_exec_test.go
@ -0,0 +1,35 @@
+package cli
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestBuildVMExecScriptWithGuestPath(t *testing.T) {
+	got := buildVMExecScript("/root/repo", []string{"make", "test"})
+	want := "cd '/root/repo' && if command -v mise >/dev/null 2>&1; then mise exec -- 'make' 'test'; else 'make' 'test'; fi"
+	if got != want {
+		t.Fatalf("buildVMExecScript with path:\n got:  %q\n want: %q", got, want)
+	}
+}
+
+func TestBuildVMExecScriptWithoutGuestPath(t *testing.T) {
+	got := buildVMExecScript("", []string{"whoami"})
+	want := "if command -v mise >/dev/null 2>&1; then mise exec -- 'whoami'; else 'whoami'; fi"
+	if got != want {
+		t.Fatalf("buildVMExecScript without path:\n got:  %q\n want: %q", got, want)
+	}
+	if strings.Contains(got, "cd ") {
+		t.Fatalf("expected no cd when guestPath is empty, got: %q", got)
+	}
+}
+
+func TestBuildVMExecScriptShellQuotesPathWithSpaces(t *testing.T) {
+	got := buildVMExecScript("/tmp/with space", []string{"echo", "a b"})
+	if !strings.Contains(got, "cd '/tmp/with space'") {
+		t.Fatalf("expected guest path to be shell-quoted, got: %q", got)
+	}
+	if !strings.Contains(got, "mise exec -- 'echo' 'a b'") {
+		t.Fatalf("expected command args to be shell-quoted, got: %q", got)
+	}
+}
--- a/internal/cli/vm_run.go
+++ b/internal/cli/vm_run.go
@ -0,0 +1,540 @@
+package cli
+
+import (
+	"bytes"
+	"context"
+	"errors"
+	"fmt"
+	"io"
+	"net"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"time"
+
+	"banger/internal/api"
+	"banger/internal/daemon/workspace"
+	"banger/internal/model"
+	"banger/internal/toolingplan"
+
+	"github.com/spf13/cobra"
+)
+
+// vmRunGuestClient is the narrow guest-SSH surface vm run needs. The
+// daemon's guest-SSH package returns a value that satisfies this
+// interface directly; we restate it here so tests can plug in fakes
+// without pulling the full daemon in.
+type vmRunGuestClient interface {
+	Close() error
+	UploadFile(ctx context.Context, remotePath string, mode os.FileMode, data []byte, logWriter io.Writer) error
+	RunScript(ctx context.Context, script string, logWriter io.Writer) error
+	StreamTar(ctx context.Context, sourceDir, remoteCommand string, logWriter io.Writer) error
+	StreamTarEntries(ctx context.Context, sourceDir string, entries []string, remoteCommand string, logWriter io.Writer) error
+}
+
+// vmRunRepo is the CLI-local view of the workspace argument to
+// `vm run`: an absolute source path that passed preflight, plus the
+// two branch flags. Everything else the flow needs (RepoRoot,
+// RepoName, HEAD commit, etc.) comes back from the workspace.prepare
+// RPC, which does the full git inspection daemon-side.
+type vmRunRepo struct {
+	sourcePath       string
+	branchName       string
+	fromRef          string
+	includeUntracked bool
+}
+
+const vmRunToolingInstallTimeoutSeconds = 120
+
+// vmRunSSHTimeout bounds how long `vm run` waits for guest ssh after
+// the vsock agent is ready. vsock readiness already means systemd
+// should be up within seconds; a minute plus change is generous
+// headroom for a slow first boot while still short enough that a
+// wedged sshd surfaces promptly instead of hanging forever. Var, not
+// const, so tests can shrink it.
+var vmRunSSHTimeout = 90 * time.Second
+
+// ExitCodeError wraps a remote command's exit status so the CLI's main()
+// can propagate it verbatim. Only errors explicitly wrapped in this
+// type get forwarded as process exit codes — plain *exec.ExitError
+// values (from unrelated subprocesses like mkfs.ext4) must still
+// surface as regular errors so the user sees a message.
+type ExitCodeError struct {
+	Code int
+}
+
+func (e ExitCodeError) Error() string {
+	return fmt.Sprintf("exit status %d", e.Code)
+}
+
+// vmRunPreflightRepo validates a vm run workspace path BEFORE the VM
+// is created, so bad paths fail fast instead of leaving the user
+// with an orphaned VM. The check is intentionally minimal: the
+// daemon's PrepareVMWorkspace does a full git inspection (branch,
+// HEAD, identity, overlay) and returns everything the tooling
+// harness needs, so duplicating the heavy lifting here just doubles
+// the I/O. We only enforce what the user can fix locally before
+// banger commits to creating a VM:
+//
+//   - the path exists and is a directory,
+//   - it sits inside a non-bare git repository,
+//   - the repository has no submodules (unsupported in the shallow
+//     overlay mode vm run uses).
+func (d *deps) vmRunPreflightRepo(ctx context.Context, rawPath string) (string, error) {
+	if strings.TrimSpace(rawPath) == "" {
+		wd, err := d.cwd()
+		if err != nil {
+			return "", err
+		}
+		rawPath = wd
+	}
+	sourcePath, err := workspace.ResolveSourcePath(rawPath)
+	if err != nil {
+		return "", err
+	}
+	repoRoot, err := d.repoInspector.GitTrimmedOutput(ctx, sourcePath, "rev-parse", "--show-toplevel")
+	if err != nil {
+		return "", fmt.Errorf("%s is not inside a git repository", sourcePath)
+	}
+	isBare, err := d.repoInspector.GitTrimmedOutput(ctx, repoRoot, "rev-parse", "--is-bare-repository")
+	if err != nil {
+		return "", fmt.Errorf("inspect git repository %s: %w", repoRoot, err)
+	}
+	if isBare == "true" {
+		return "", fmt.Errorf("vm run requires a non-bare git repository: %s", repoRoot)
+	}
+	submodules, err := d.repoInspector.ListSubmodules(ctx, repoRoot)
+	if err != nil {
+		return "", err
+	}
+	if len(submodules) > 0 {
+		return "", fmt.Errorf("vm run does not support git submodules in %s (%s); use `vm create` + `vm workspace prepare --mode full_copy`", repoRoot, strings.Join(submodules, ", "))
+	}
+	return sourcePath, nil
+}
+
+// repoHasMiseFiles reports whether the repo at sourcePath contains a
+// mise tooling manifest. Used as a host-side preflight: when --nat is
+// off and a manifest is present, vm run refuses early instead of
+// committing to a VM that will silently fail to install tools.
+func repoHasMiseFiles(sourcePath string) (bool, error) {
+	for _, name := range []string{".mise.toml", ".tool-versions"} {
+		info, err := os.Stat(filepath.Join(sourcePath, name))
+		if err == nil && !info.IsDir() {
+			return true, nil
+		}
+		if err != nil && !errors.Is(err, os.ErrNotExist) {
+			return false, fmt.Errorf("inspect %s: %w", name, err)
+		}
+	}
+	return false, nil
+}
+
+// splitVMRunArgs partitions cobra positional args into the optional path
+// argument and the trailing command (everything after a `--` separator).
+// The path slice may contain 0..1 entries; the command slice may be empty.
+func splitVMRunArgs(cmd *cobra.Command, args []string) (pathArgs, commandArgs []string) {
+	dash := cmd.ArgsLenAtDash()
+	if dash < 0 {
+		return args, nil
+	}
+	if dash > len(args) {
+		dash = len(args)
+	}
+	return args[:dash], args[dash:]
+}
+
+// runVMRun orchestrates the full `vm run` flow: create the VM, wait
+// for guest ssh, optionally materialise a workspace and kick off the
+// tooling bootstrap, then either attach interactively or run the
+// user's command and propagate its exit status.
+func (d *deps) runVMRun(ctx context.Context, socketPath string, cfg model.DaemonConfig, stdin io.Reader, stdout, stderr io.Writer, params api.VMCreateParams, repo *vmRunRepo, command []string, removeOnExit, detach, skipBootstrap, verbose bool) error {
+	if repo != nil && !skipBootstrap && !params.NATEnabled {
+		hasMise, err := repoHasMiseFiles(repo.sourcePath)
+		if err != nil {
+			return err
+		}
+		if hasMise {
+			return errors.New("tooling bootstrap requires --nat (or pass --no-bootstrap to skip)")
+		}
+	}
+	progress := newVMRunProgressRenderer(stderr, verbose)
+	defer progress.clear()
+	vm, err := d.runVMCreate(ctx, socketPath, stderr, params, verbose)
+	if err != nil {
+		return err
+	}
+	vmRef := strings.TrimSpace(vm.Name)
+	if vmRef == "" {
+		vmRef = shortID(vm.ID)
+	}
+	// --rm cleanup is wired AFTER ssh is confirmed. An ssh-wait
+	// timeout leaves the VM alive for `vm logs` inspection (our
+	// error message tells the user that); the cleanup only fires
+	// once the session phase runs.
+	shouldRemove := false
+	if removeOnExit {
+		defer func() {
+			if !shouldRemove {
+				return
+			}
+			// Use a fresh context so Ctrl-C during the session
+			// doesn't abort the delete RPC.
+			cleanupCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+			defer cancel()
+			if err := d.vmDelete(cleanupCtx, socketPath, vmRef); err != nil {
+				progress.clear()
+				printVMRunWarning(stderr, fmt.Sprintf("--rm cleanup failed: %v (leaked vm %q; delete manually)", err, vmRef))
+			} else if err := removeUserKnownHosts(vm); err != nil {
+				progress.clear()
+				printVMRunWarning(stderr, fmt.Sprintf("known_hosts cleanup failed: %v", err))
+			}
+		}()
+	}
+	sshAddress := net.JoinHostPort(vm.Runtime.GuestIP, "22")
+	progress.render("waiting for guest ssh")
+	sshCtx, cancelSSH := context.WithTimeout(ctx, vmRunSSHTimeout)
+	if err := d.guestWaitForSSH(sshCtx, sshAddress, cfg.SSHKeyPath, 250*time.Millisecond); err != nil {
+		cancelSSH()
+		// Surface parent-context cancellation (Ctrl-C, caller
+		// timeout) as-is. Only the guest-side timeout needs the
+		// actionable hint.
+		if errors.Is(ctx.Err(), context.Canceled) || errors.Is(ctx.Err(), context.DeadlineExceeded) {
+			return fmt.Errorf("vm %q: %w", vmRef, ctx.Err())
+		}
+		return fmt.Errorf(
+			"vm %q is running but guest ssh did not come up within %s. "+
+				"sshd is the likely suspect — inspect the guest console with "+
+				"`banger vm logs %s` (look for `Failed to start ssh.service`). "+
+				"The VM is still alive; leave it for inspection or remove with `banger vm delete %s`. "+
+				"underlying error: %w",
+			vmRef, vmRunSSHTimeout, vmRef, vmRef, err,
+		)
+	}
+	cancelSSH()
+	shouldRemove = removeOnExit
+	if repo != nil {
+		progress.render("preparing guest workspace")
+		// --from is only meaningful paired with --branch; the daemon
+		// rejects "from without branch" outright. Our flag default is
+		// "HEAD" (useful only when --branch is set), so scrub it when
+		// branch is empty to avoid a false "workspace from requires
+		// branch" error.
+		fromRef := ""
+		if strings.TrimSpace(repo.branchName) != "" {
+			fromRef = repo.fromRef
+		}
+		if !repo.includeUntracked {
+			progress.clear()
+			d.noteUntrackedSkipped(ctx, stderr, repo.sourcePath)
+		}
+		prepared, err := d.vmWorkspacePrepare(ctx, socketPath, api.VMWorkspacePrepareParams{
+			IDOrName:         vmRef,
+			SourcePath:       repo.sourcePath,
+			GuestPath:        vmRunGuestDir(),
+			Branch:           repo.branchName,
+			From:             fromRef,
+			Mode:             string(model.WorkspacePrepareModeShallowOverlay),
+			IncludeUntracked: repo.includeUntracked,
+		})
+		if err != nil {
+			return fmt.Errorf("vm %q is running but workspace prepare failed: %w", vmRef, err)
+		}
+		// The prepare RPC already did the full git inspection on the
+		// daemon side; grab what the tooling harness needs from its
+		// result instead of re-inspecting here.
+		if len(command) == 0 && !skipBootstrap {
+			client, err := d.guestDial(ctx, sshAddress, cfg.SSHKeyPath)
+			if err != nil {
+				return fmt.Errorf("vm %q is running but guest ssh is unavailable: %w", vmRef, err)
+			}
+			if err := d.startVMRunToolingHarness(ctx, client, prepared.Workspace.RepoRoot, prepared.Workspace.RepoName, progress, detach, stderr); err != nil {
+				progress.clear()
+				printVMRunWarning(stderr, fmt.Sprintf("guest tooling bootstrap start failed: %v", err))
+			}
+			_ = client.Close()
+		}
+	}
+	if detach {
+		progress.commitLine(fmt.Sprintf("vm %s running; reconnect with: banger vm ssh %s", vmRef, vmRef))
+		return nil
+	}
+	sshArgs, err := sshCommandArgs(cfg, vm.Runtime.GuestIP, command)
+	if err != nil {
+		return fmt.Errorf("vm %q is running but ssh args could not be built: %w", vmRef, err)
+	}
+	if len(command) > 0 {
+		progress.render("running command in guest")
+		progress.clear()
+		if err := d.sshExec(ctx, stdin, stdout, stderr, sshArgs); err != nil {
+			var exitErr *exec.ExitError
+			if errors.As(err, &exitErr) {
+				return ExitCodeError{Code: exitErr.ExitCode()}
+			}
+			return err
+		}
+		return nil
+	}
+	progress.render("attaching to guest")
+	progress.clear()
+	return d.runSSHSession(ctx, socketPath, vmRef, stdin, stdout, stderr, sshArgs, removeOnExit)
+}
+
+func vmRunGuestDir() string {
+	return "/root/repo"
+}
+
+func vmRunToolingHarnessPath(repoName string) string {
+	return filepath.ToSlash(filepath.Join("/tmp", "banger-vm-run-tooling-"+repoName+".sh"))
+}
+
+func vmRunToolingHarnessLogPath(repoName string) string {
+	return filepath.ToSlash(filepath.Join("/root/.cache/banger", "vm-run-tooling-"+repoName+".log"))
+}
+
+// startVMRunToolingHarness uploads + launches the mise bootstrap
+// script inside the guest. repoRoot / repoName both come from the
+// daemon's workspace.prepare RPC response so the CLI doesn't have
+// to re-inspect the git tree.
+//
+// When wait is true (used by --detach), the harness runs in the
+// foreground so the CLI can return only after bootstrap finishes;
+// the harness's stdout is streamed to syncOut for live visibility.
+// When wait is false (interactive mode), the harness is nohup'd so
+// the user's ssh session can start while bootstrap continues.
+func (d *deps) startVMRunToolingHarness(ctx context.Context, client vmRunGuestClient, repoRoot, repoName string, progress *vmRunProgressRenderer, wait bool, syncOut io.Writer) error {
+	if progress != nil {
+		progress.render("starting guest tooling bootstrap")
+	}
+	plan := d.buildVMRunToolingPlan(ctx, repoRoot)
+	var uploadLog bytes.Buffer
+	if err := client.UploadFile(ctx, vmRunToolingHarnessPath(repoName), 0o755, []byte(vmRunToolingHarnessScript(plan)), &uploadLog); err != nil {
+		return formatVMRunStepError("upload guest tooling bootstrap", err, uploadLog.String())
+	}
+	if wait {
+		var launchLog bytes.Buffer
+		out := io.Writer(&launchLog)
+		if syncOut != nil {
+			out = io.MultiWriter(syncOut, &launchLog)
+		}
+		if err := client.RunScript(ctx, vmRunToolingHarnessSyncScript(repoName), out); err != nil {
+			return formatVMRunStepError("run guest tooling bootstrap", err, launchLog.String())
+		}
+		if progress != nil {
+			progress.render("guest tooling bootstrap done (log: " + vmRunToolingHarnessLogPath(repoName) + ")")
+		}
+		return nil
+	}
+	var launchLog bytes.Buffer
+	if err := client.RunScript(ctx, vmRunToolingHarnessLaunchScript(repoName), &launchLog); err != nil {
+		return formatVMRunStepError("launch guest tooling bootstrap", err, launchLog.String())
+	}
+	if progress != nil {
+		progress.render("guest tooling log: " + vmRunToolingHarnessLogPath(repoName))
+	}
+	return nil
+}
+
+func vmRunToolingHarnessScript(plan toolingplan.Plan) string {
+	var script strings.Builder
+	script.WriteString("set -uo pipefail\n")
+	fmt.Fprintf(&script, "DIR=%s\n", shellQuote(vmRunGuestDir()))
+	script.WriteString("export PATH=/usr/local/bin:/root/.local/share/mise/shims:$PATH\n")
+	script.WriteString("if [ -f /etc/profile.d/mise.sh ]; then . /etc/profile.d/mise.sh || true; fi\n")
+	script.WriteString("log() { printf '%s\\n' \"$*\"; }\n")
+	script.WriteString("run_best_effort() {\n")
+	script.WriteString("  \"$@\"\n")
+	script.WriteString("  rc=$?\n")
+	script.WriteString("  if [ \"$rc\" -ne 0 ]; then\n")
+	script.WriteString("    log \"command failed ($rc): $*\"\n")
+	script.WriteString("  fi\n")
+	script.WriteString("  return 0\n")
+	script.WriteString("}\n")
+	script.WriteString("run_bounded_best_effort() {\n")
+	script.WriteString("  timeout_secs=\"$1\"\n")
+	script.WriteString("  shift\n")
+	script.WriteString("  timeout_marker=\"$(mktemp)\"\n")
+	script.WriteString("  rm -f \"$timeout_marker\"\n")
+	script.WriteString("  \"$@\" &\n")
+	script.WriteString("  cmd_pid=$!\n")
+	script.WriteString("  (\n")
+	script.WriteString("    sleep \"$timeout_secs\"\n")
+	script.WriteString("    if kill -0 \"$cmd_pid\" 2>/dev/null; then\n")
+	script.WriteString("      : >\"$timeout_marker\"\n")
+	script.WriteString("      log \"command timed out after ${timeout_secs}s: $*\"\n")
+	script.WriteString("      kill -TERM \"$cmd_pid\" 2>/dev/null || true\n")
+	script.WriteString("      if command -v pkill >/dev/null 2>&1; then pkill -TERM -P \"$cmd_pid\" 2>/dev/null || true; fi\n")
+	script.WriteString("      sleep 2\n")
+	script.WriteString("      kill -KILL \"$cmd_pid\" 2>/dev/null || true\n")
+	script.WriteString("      if command -v pkill >/dev/null 2>&1; then pkill -KILL -P \"$cmd_pid\" 2>/dev/null || true; fi\n")
+	script.WriteString("    fi\n")
+	script.WriteString("  ) &\n")
+	script.WriteString("  watchdog_pid=$!\n")
+	script.WriteString("  wait \"$cmd_pid\"\n")
+	script.WriteString("  rc=$?\n")
+	script.WriteString("  kill \"$watchdog_pid\" 2>/dev/null || true\n")
+	script.WriteString("  wait \"$watchdog_pid\" 2>/dev/null || true\n")
+	script.WriteString("  if [ -f \"$timeout_marker\" ]; then\n")
+	script.WriteString("    rm -f \"$timeout_marker\"\n")
+	script.WriteString("    return 0\n")
+	script.WriteString("  fi\n")
+	script.WriteString("  rm -f \"$timeout_marker\"\n")
+	script.WriteString("  if [ \"$rc\" -ne 0 ]; then\n")
+	script.WriteString("    log \"command failed ($rc): $*\"\n")
+	script.WriteString("  fi\n")
+	script.WriteString("  return 0\n")
+	script.WriteString("}\n")
+	script.WriteString("cd \"$DIR\" || { log \"missing repo directory: $DIR\"; exit 0; }\n")
+	script.WriteString("MISE_BIN=\"$(command -v mise || true)\"\n")
+	script.WriteString("if [ -z \"$MISE_BIN\" ]; then log \"mise not found; skipping guest tooling bootstrap\"; exit 0; fi\n")
+	script.WriteString("log \"starting guest tooling bootstrap in $DIR\"\n")
+	if len(plan.RepoManagedTools) > 0 {
+		fmt.Fprintf(&script, "log %s\n", shellQuote("repo-managed mise tools: "+strings.Join(plan.RepoManagedTools, ", ")))
+	}
+	script.WriteString("if [ -f .mise.toml ] || [ -f .tool-versions ]; then\n")
+	script.WriteString("  log \"running mise install from repo declarations\"\n")
+	script.WriteString("  run_best_effort \"$MISE_BIN\" install\n")
+	script.WriteString("fi\n")
+	fmt.Fprintf(&script, "INSTALL_TIMEOUT_SECS=%d\n", vmRunToolingInstallTimeoutSeconds)
+	for _, step := range plan.Steps {
+		stepLabel := fmt.Sprintf("deterministic install: %s@%s (%s)", step.Tool, step.Version, step.Source)
+		fmt.Fprintf(&script, "log %s\n", shellQuote(stepLabel))
+		fmt.Fprintf(&script, "run_bounded_best_effort \"$INSTALL_TIMEOUT_SECS\" \"$MISE_BIN\" use -g --pin %s\n", shellQuote(step.Tool+"@"+step.Version))
+	}
+	for _, skip := range plan.Skips {
+		skipLabel := fmt.Sprintf("deterministic skip: %s (%s)", skip.Target, skip.Reason)
+		fmt.Fprintf(&script, "log %s\n", shellQuote(skipLabel))
+	}
+	if len(plan.Steps) > 0 {
+		script.WriteString("run_best_effort \"$MISE_BIN\" reshim\n")
+	}
+	script.WriteString("log \"guest tooling bootstrap finished\"\n")
+	return script.String()
+}
+
+func vmRunToolingHarnessLaunchScript(repoName string) string {
+	var script strings.Builder
+	script.WriteString("set -euo pipefail\n")
+	fmt.Fprintf(&script, "HELPER=%s\n", shellQuote(vmRunToolingHarnessPath(repoName)))
+	fmt.Fprintf(&script, "LOG=%s\n", shellQuote(vmRunToolingHarnessLogPath(repoName)))
+	script.WriteString("mkdir -p \"$(dirname \"$LOG\")\"\n")
+	script.WriteString("nohup bash \"$HELPER\" >\"$LOG\" 2>&1 </dev/null &\n")
+	script.WriteString("disown || true\n")
+	return script.String()
+}
+
+// vmRunToolingHarnessSyncScript is the foreground variant used by
+// --detach: it tees the harness output to both the log file and the
+// caller's stdout so the host-side CLI can stream live progress while
+// still preserving the log for later inspection.
+func vmRunToolingHarnessSyncScript(repoName string) string {
+	var script strings.Builder
+	script.WriteString("set -uo pipefail\n")
+	fmt.Fprintf(&script, "HELPER=%s\n", shellQuote(vmRunToolingHarnessPath(repoName)))
+	fmt.Fprintf(&script, "LOG=%s\n", shellQuote(vmRunToolingHarnessLogPath(repoName)))
+	script.WriteString("mkdir -p \"$(dirname \"$LOG\")\"\n")
+	script.WriteString("bash \"$HELPER\" 2>&1 | tee \"$LOG\"\n")
+	return script.String()
+}
+
+func formatVMRunStepError(action string, err error, log string) error {
+	log = strings.TrimSpace(log)
+	if log == "" {
+		return fmt.Errorf("%s: %w", action, err)
+	}
+	return fmt.Errorf("%s: %w: %s", action, err, log)
+}
+
+type vmRunProgressRenderer struct {
+	out      io.Writer
+	enabled  bool
+	inline   bool
+	active   bool
+	lastLine string
+}
+
+// newVMRunProgressRenderer wires up progress for `vm run`. Unlike the
+// vm_create renderer, this one emits in line mode even on non-TTY
+// writers (covers tests and piped output that the existing tooling
+// already parses); inline mode kicks in only when stderr is a TTY,
+// verbose is unset, and BANGER_NO_PROGRESS is unset.
+func newVMRunProgressRenderer(out io.Writer, verbose bool) *vmRunProgressRenderer {
+	if out == nil {
+		return &vmRunProgressRenderer{}
+	}
+	return &vmRunProgressRenderer{
+		out:     out,
+		enabled: true,
+		inline:  writerSupportsProgress(out) && !verbose && !progressDisabledByEnv(),
+	}
+}
+
+func (r *vmRunProgressRenderer) render(detail string) {
+	if r == nil || !r.enabled {
+		return
+	}
+	line := formatVMRunProgress(detail)
+	if line == "" || line == r.lastLine {
+		return
+	}
+	r.lastLine = line
+	if r.inline {
+		_, _ = fmt.Fprint(r.out, "\r\x1b[K", line)
+		r.active = true
+		return
+	}
+	_, _ = fmt.Fprintln(r.out, line)
+}
+
+// clear erases the live inline line so the caller can write a clean
+// terminating message (warning, ssh attach, command output). No-op
+// outside inline mode.
+func (r *vmRunProgressRenderer) clear() {
+	if r == nil || !r.enabled || !r.inline || !r.active {
+		return
+	}
+	_, _ = fmt.Fprint(r.out, "\r\x1b[K")
+	r.active = false
+	r.lastLine = ""
+}
+
+// commitLine prints detail as a final, persistent line. In inline
+// mode it overwrites the live status; in line mode it just appends.
+// Used for terminal messages like the --detach hand-off summary.
+func (r *vmRunProgressRenderer) commitLine(detail string) {
+	if r == nil || !r.enabled {
+		return
+	}
+	line := formatVMRunProgress(detail)
+	if line == "" {
+		return
+	}
+	if r.inline {
+		_, _ = fmt.Fprint(r.out, "\r\x1b[K", line, "\n")
+		r.active = false
+		r.lastLine = ""
+		return
+	}
+	if line == r.lastLine {
+		return
+	}
+	r.lastLine = line
+	_, _ = fmt.Fprintln(r.out, line)
+}
+
+func formatVMRunProgress(detail string) string {
+	detail = strings.TrimSpace(detail)
+	if detail == "" {
+		return ""
+	}
+	return "[vm run] " + detail
+}
+
+func printVMRunWarning(out io.Writer, detail string) {
+	detail = strings.TrimSpace(detail)
+	if out == nil || detail == "" {
+		return
+	}
+	_, _ = fmt.Fprintln(out, "[vm run] warning: "+detail)
+}
--- a/internal/cli/vm_run_test.go
+++ b/internal/cli/vm_run_test.go
@ -0,0 +1,278 @@
+package cli
+
+import (
+	"bytes"
+	"context"
+	"io"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+
+	"banger/internal/api"
+	"banger/internal/model"
+	"banger/internal/toolingplan"
+)
+
+func TestVMRunRejectsDetachWithRm(t *testing.T) {
+	cmd := NewBangerCommand()
+	cmd.SetArgs([]string{"vm", "run", "-d", "--rm"})
+
+	err := cmd.Execute()
+	if err == nil || !strings.Contains(err.Error(), "cannot combine --detach with --rm") {
+		t.Fatalf("Execute() error = %v, want --detach + --rm rejection", err)
+	}
+}
+
+func TestVMRunRejectsDetachWithCommand(t *testing.T) {
+	cmd := NewBangerCommand()
+	cmd.SetArgs([]string{"vm", "run", "-d", "--", "whoami"})
+
+	err := cmd.Execute()
+	if err == nil || !strings.Contains(err.Error(), "cannot combine --detach with a guest command") {
+		t.Fatalf("Execute() error = %v, want --detach + command rejection", err)
+	}
+}
+
+func TestRepoHasMiseFiles(t *testing.T) {
+	dir := t.TempDir()
+	got, err := repoHasMiseFiles(dir)
+	if err != nil {
+		t.Fatalf("repoHasMiseFiles(empty): %v", err)
+	}
+	if got {
+		t.Fatalf("repoHasMiseFiles(empty) = true, want false")
+	}
+
+	if err := os.WriteFile(filepath.Join(dir, ".mise.toml"), []byte(""), 0o600); err != nil {
+		t.Fatalf("write .mise.toml: %v", err)
+	}
+	got, err = repoHasMiseFiles(dir)
+	if err != nil {
+		t.Fatalf("repoHasMiseFiles(.mise.toml): %v", err)
+	}
+	if !got {
+		t.Fatalf("repoHasMiseFiles(.mise.toml) = false, want true")
+	}
+
+	dir2 := t.TempDir()
+	if err := os.WriteFile(filepath.Join(dir2, ".tool-versions"), []byte(""), 0o600); err != nil {
+		t.Fatalf("write .tool-versions: %v", err)
+	}
+	got, err = repoHasMiseFiles(dir2)
+	if err != nil {
+		t.Fatalf("repoHasMiseFiles(.tool-versions): %v", err)
+	}
+	if !got {
+		t.Fatalf("repoHasMiseFiles(.tool-versions) = false, want true")
+	}
+}
+
+// runVMRunDepsRunningVM returns a deps wired so runVMRun reaches a
+// point where it would create a VM and proceed — used by precondition
+// tests that should refuse before any of these fakes get called.
+func runVMRunDepsRunningVM(t *testing.T) (*deps, *model.VMRecord) {
+	t.Helper()
+	d := defaultDeps()
+	vm := &model.VMRecord{
+		ID:   "vm-id",
+		Name: "devbox",
+		Runtime: model.VMRuntime{
+			State:   model.VMStateRunning,
+			GuestIP: "172.16.0.2",
+			DNSName: "devbox.vm",
+		},
+	}
+	d.vmCreateBegin = func(context.Context, string, api.VMCreateParams) (api.VMCreateBeginResult, error) {
+		return api.VMCreateBeginResult{Operation: api.VMCreateOperation{ID: "op-1", Stage: "ready", Done: true, Success: true, VM: vm}}, nil
+	}
+	d.guestWaitForSSH = func(context.Context, string, string, time.Duration) error { return nil }
+	d.vmWorkspacePrepare = func(context.Context, string, api.VMWorkspacePrepareParams) (api.VMWorkspacePrepareResult, error) {
+		return api.VMWorkspacePrepareResult{Workspace: model.WorkspacePrepareResult{VMID: vm.ID, GuestPath: "/root/repo", RepoName: "repo", RepoRoot: "/tmp/repo"}}, nil
+	}
+	d.buildVMRunToolingPlan = func(context.Context, string) toolingplan.Plan {
+		return toolingplan.Plan{}
+	}
+	d.vmHealth = func(context.Context, string, string) (api.VMHealthResult, error) {
+		return api.VMHealthResult{Healthy: true}, nil
+	}
+	d.sshExec = func(context.Context, io.Reader, io.Writer, io.Writer, []string) error { return nil }
+	return d, vm
+}
+
+func TestRunVMRunRefusesBootstrapWithoutNAT(t *testing.T) {
+	repoRoot := t.TempDir()
+	if err := os.WriteFile(filepath.Join(repoRoot, ".mise.toml"), []byte(""), 0o600); err != nil {
+		t.Fatalf("write .mise.toml: %v", err)
+	}
+
+	d := defaultDeps()
+	d.vmCreateBegin = func(context.Context, string, api.VMCreateParams) (api.VMCreateBeginResult, error) {
+		t.Fatal("vmCreateBegin should not be called when NAT precondition refuses")
+		return api.VMCreateBeginResult{}, nil
+	}
+
+	repo := vmRunRepo{sourcePath: repoRoot}
+	var stdout, stderr bytes.Buffer
+	err := d.runVMRun(
+		context.Background(),
+		"/tmp/bangerd.sock",
+		model.DaemonConfig{SSHKeyPath: "/tmp/id_ed25519"},
+		strings.NewReader(""),
+		&stdout, &stderr,
+		api.VMCreateParams{Name: "devbox", NATEnabled: false},
+		&repo,
+		nil,
+		false, false, false, false,
+	)
+	if err == nil || !strings.Contains(err.Error(), "tooling bootstrap requires --nat") {
+		t.Fatalf("runVMRun = %v, want NAT precondition refusal", err)
+	}
+}
+
+func TestRunVMRunBootstrapPreconditionRespectsNoBootstrap(t *testing.T) {
+	repoRoot := t.TempDir()
+	if err := os.WriteFile(filepath.Join(repoRoot, ".mise.toml"), []byte(""), 0o600); err != nil {
+		t.Fatalf("write .mise.toml: %v", err)
+	}
+
+	d, _ := runVMRunDepsRunningVM(t)
+	dialed := false
+	d.guestDial = func(context.Context, string, string) (vmRunGuestClient, error) {
+		dialed = true
+		return &testVMRunGuestClient{}, nil
+	}
+
+	repo := vmRunRepo{sourcePath: repoRoot}
+	var stdout, stderr bytes.Buffer
+	err := d.runVMRun(
+		context.Background(),
+		"/tmp/bangerd.sock",
+		model.DaemonConfig{SSHKeyPath: "/tmp/id_ed25519"},
+		strings.NewReader(""),
+		&stdout, &stderr,
+		api.VMCreateParams{Name: "devbox", NATEnabled: false},
+		&repo,
+		nil,
+		false, false, true, false, // skipBootstrap = true
+	)
+	if err != nil {
+		t.Fatalf("runVMRun: %v", err)
+	}
+	if dialed {
+		t.Fatal("guestDial should not be called when --no-bootstrap is set")
+	}
+}
+
+func TestRunVMRunBootstrapPreconditionPassesWithoutMiseFiles(t *testing.T) {
+	repoRoot := t.TempDir() // empty repo, no mise files
+
+	d, _ := runVMRunDepsRunningVM(t)
+	dialed := false
+	d.guestDial = func(context.Context, string, string) (vmRunGuestClient, error) {
+		dialed = true
+		return &testVMRunGuestClient{}, nil
+	}
+
+	repo := vmRunRepo{sourcePath: repoRoot}
+	var stdout, stderr bytes.Buffer
+	err := d.runVMRun(
+		context.Background(),
+		"/tmp/bangerd.sock",
+		model.DaemonConfig{SSHKeyPath: "/tmp/id_ed25519"},
+		strings.NewReader(""),
+		&stdout, &stderr,
+		api.VMCreateParams{Name: "devbox", NATEnabled: false},
+		&repo,
+		nil,
+		false, false, false, false,
+	)
+	if err != nil {
+		t.Fatalf("runVMRun: %v", err)
+	}
+	// Bootstrap dispatch happens (no mise file gating) but dial still
+	// gets called because the harness pipeline runs.
+	if !dialed {
+		t.Fatal("guestDial should be called for bootstrap dispatch")
+	}
+}
+
+func TestRunVMRunDetachSkipsSshAttach(t *testing.T) {
+	d, _ := runVMRunDepsRunningVM(t)
+	d.guestDial = func(context.Context, string, string) (vmRunGuestClient, error) {
+		return &testVMRunGuestClient{}, nil
+	}
+	sshExecCalls := 0
+	d.sshExec = func(context.Context, io.Reader, io.Writer, io.Writer, []string) error {
+		sshExecCalls++
+		return nil
+	}
+
+	var stdout, stderr bytes.Buffer
+	err := d.runVMRun(
+		context.Background(),
+		"/tmp/bangerd.sock",
+		model.DaemonConfig{SSHKeyPath: "/tmp/id_ed25519"},
+		strings.NewReader(""),
+		&stdout, &stderr,
+		api.VMCreateParams{Name: "devbox"},
+		nil,                // bare mode
+		nil,                // no command
+		false, true, false, false, // detach = true
+	)
+	if err != nil {
+		t.Fatalf("runVMRun: %v", err)
+	}
+	if sshExecCalls != 0 {
+		t.Fatalf("sshExec called %d times, want 0 in detach mode", sshExecCalls)
+	}
+	if !strings.Contains(stderr.String(), "reconnect with: banger vm ssh devbox") {
+		t.Fatalf("stderr = %q, want reconnect hint", stderr.String())
+	}
+}
+
+func TestRunVMRunDetachUsesSyncBootstrapPath(t *testing.T) {
+	repoRoot := t.TempDir()
+
+	d, _ := runVMRunDepsRunningVM(t)
+	fakeClient := &testVMRunGuestClient{}
+	d.guestDial = func(context.Context, string, string) (vmRunGuestClient, error) {
+		return fakeClient, nil
+	}
+	sshExecCalls := 0
+	d.sshExec = func(context.Context, io.Reader, io.Writer, io.Writer, []string) error {
+		sshExecCalls++
+		return nil
+	}
+
+	repo := vmRunRepo{sourcePath: repoRoot}
+	var stdout, stderr bytes.Buffer
+	err := d.runVMRun(
+		context.Background(),
+		"/tmp/bangerd.sock",
+		model.DaemonConfig{SSHKeyPath: "/tmp/id_ed25519"},
+		strings.NewReader(""),
+		&stdout, &stderr,
+		api.VMCreateParams{Name: "devbox", NATEnabled: true},
+		&repo,
+		nil,
+		false, true, false, false, // detach = true
+	)
+	if err != nil {
+		t.Fatalf("runVMRun: %v", err)
+	}
+	if sshExecCalls != 0 {
+		t.Fatalf("sshExec called %d times, want 0 in detach mode", sshExecCalls)
+	}
+	if len(fakeClient.uploads) != 1 {
+		t.Fatalf("uploads = %d, want 1 (harness upload)", len(fakeClient.uploads))
+	}
+	// Sync mode should invoke the tee'd wrapper, not the nohup launcher.
+	if strings.Contains(fakeClient.launchScript, "nohup") {
+		t.Fatalf("detach mode should not use nohup launcher; got: %q", fakeClient.launchScript)
+	}
+	if !strings.Contains(fakeClient.launchScript, "tee") {
+		t.Fatalf("detach mode should tee output to log; got: %q", fakeClient.launchScript)
+	}
+}
--- a/internal/cli/vm_spec_test.go
+++ b/internal/cli/vm_spec_test.go
@ -0,0 +1,53 @@
+package cli
+
+import (
+	"bytes"
+	"strings"
+	"testing"
+
+	"banger/internal/api"
+)
+
+func TestPrintVMSpecLineWithAllFields(t *testing.T) {
+	vcpu, mem := 2, 2048
+	params := api.VMCreateParams{
+		VCPUCount:    &vcpu,
+		MemoryMiB:    &mem,
+		WorkDiskSize: "8G",
+	}
+	var buf bytes.Buffer
+	printVMSpecLine(&buf, params)
+	got := buf.String()
+	for _, want := range []string{"spec:", "2 vcpu", "2048 MiB", "8G"} {
+		if !strings.Contains(got, want) {
+			t.Errorf("output missing %q:\n%s", want, got)
+		}
+	}
+	if !strings.HasSuffix(got, "\n") {
+		t.Error("spec line should terminate with newline")
+	}
+}
+
+func TestPrintVMSpecLineFallsBackToBuiltinsOnNilFields(t *testing.T) {
+	// Empty params — the printer reaches for DefaultVCPUCount /
+	// DefaultMemoryMiB / DefaultWorkDiskSize so output is still sane.
+	var buf bytes.Buffer
+	printVMSpecLine(&buf, api.VMCreateParams{})
+	got := buf.String()
+	// Not asserting exact values — just that it produced a plausible
+	// line with the three labels.
+	for _, want := range []string{"spec:", "vcpu", "MiB", "disk"} {
+		if !strings.Contains(got, want) {
+			t.Errorf("output missing %q:\n%s", want, got)
+		}
+	}
+}
+
+func TestPrintVMSpecLineIgnoresUnparseableDiskSize(t *testing.T) {
+	// Falls back to builtin default; must not panic or print garbage.
+	var buf bytes.Buffer
+	printVMSpecLine(&buf, api.VMCreateParams{WorkDiskSize: "not-a-size"})
+	if !strings.Contains(buf.String(), "spec:") {
+		t.Errorf("expected spec line even with bad input, got %q", buf.String())
+	}
+}
--- a/internal/cli/workspace_preview.go
+++ b/internal/cli/workspace_preview.go
@ -0,0 +1,61 @@
+package cli
+
+import (
+	"context"
+	"fmt"
+	"io"
+)
+
+// runWorkspaceDryRun inspects the local repo at resolvedPath and
+// prints the file list that `vm run` / `workspace prepare` would ship
+// into the guest. Runs on the CLI side (no daemon RPC needed) since
+// the daemon is always local and the workspace inspection is a pure
+// git read. Git calls go through d.repoInspector so tests inject a
+// stub Runner via the deps struct instead of touching package globals.
+func (d *deps) runWorkspaceDryRun(ctx context.Context, out io.Writer, resolvedPath, branchName, fromRef string, includeUntracked bool) error {
+	spec, err := d.repoInspector.InspectRepo(ctx, resolvedPath, branchName, fromRef, includeUntracked)
+	if err != nil {
+		return err
+	}
+	fmt.Fprintf(out, "dry-run: %d file(s) would be copied to guest\n", len(spec.OverlayPaths))
+	fmt.Fprintf(out, "repo: %s\n", spec.RepoRoot)
+	if includeUntracked {
+		fmt.Fprintln(out, "mode: tracked + untracked non-ignored (--include-untracked)")
+	} else {
+		fmt.Fprintln(out, "mode: tracked only (re-run with --include-untracked to also copy untracked non-ignored files)")
+	}
+	fmt.Fprintln(out, "---")
+	for _, path := range spec.OverlayPaths {
+		fmt.Fprintln(out, path)
+	}
+	if !includeUntracked {
+		d.noteUntrackedSkipped(ctx, out, spec.RepoRoot)
+	}
+	return nil
+}
+
+// noteUntrackedSkipped prints a one-line notice when the repo holds
+// untracked non-ignored files that will NOT be copied because
+// --include-untracked was not passed.
+//
+// Best-effort: if sourcePath isn't inside a git repo, or git errors,
+// or there are no untracked files, the helper stays silent. The
+// notice is a courtesy — failing the whole operation over a courtesy
+// would be worse than the notice being missing.
+//
+// Resolves sourcePath to the repo root internally via `git rev-parse
+// --show-toplevel` so callers can pass whatever path the user typed.
+// Before this helper normalised, subdir inputs ran `ls-files
+// --others` scoped to the subdir, which silently underreported the
+// skipped files the user needed to know about.
+func (d *deps) noteUntrackedSkipped(ctx context.Context, out io.Writer, sourcePath string) {
+	repoRoot, err := d.repoInspector.GitTrimmedOutput(ctx, sourcePath, "rev-parse", "--show-toplevel")
+	if err != nil || repoRoot == "" {
+		return
+	}
+	count, err := d.repoInspector.CountUntrackedPaths(ctx, repoRoot)
+	if err != nil || count == 0 {
+		return
+	}
+	fmt.Fprintf(out, "---\nnote: %d untracked non-ignored file(s) were NOT copied (git-tracked files only by default — pass --include-untracked to include them)\n", count)
+}
--- a/internal/cli/workspace_preview_test.go
+++ b/internal/cli/workspace_preview_test.go
@ -0,0 +1,120 @@
+package cli
+
+import (
+	"bytes"
+	"context"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"banger/internal/daemon/workspace"
+)
+
+// seedRepoWithSubdir creates a git repo with one tracked file, and an
+// untracked non-ignored file at the repo root (not under the subdir).
+// Returns the repo root and the subdir path.
+func seedRepoWithSubdir(t *testing.T) (repoRoot, subDir string) {
+	t.Helper()
+	if _, err := exec.LookPath("git"); err != nil {
+		t.Skipf("git not on PATH: %v", err)
+	}
+	repoRoot = t.TempDir()
+	run := func(args ...string) {
+		t.Helper()
+		cmd := exec.Command(args[0], args[1:]...)
+		cmd.Dir = repoRoot
+		cmd.Env = append(os.Environ(),
+			"GIT_AUTHOR_NAME=t", "GIT_AUTHOR_EMAIL=t@t",
+			"GIT_COMMITTER_NAME=t", "GIT_COMMITTER_EMAIL=t@t",
+			"GIT_CONFIG_GLOBAL=/dev/null",
+		)
+		if out, err := cmd.CombinedOutput(); err != nil {
+			t.Fatalf("%v: %v\n%s", args, err, out)
+		}
+	}
+	writeFile := func(relPath, content string) {
+		t.Helper()
+		full := filepath.Join(repoRoot, relPath)
+		if err := os.MkdirAll(filepath.Dir(full), 0o755); err != nil {
+			t.Fatal(err)
+		}
+		if err := os.WriteFile(full, []byte(content), 0o644); err != nil {
+			t.Fatal(err)
+		}
+	}
+	run("git", "init", "-q", "-b", "main")
+	run("git", "config", "commit.gpgsign", "false")
+	writeFile("tracked.md", "hello\n")
+	writeFile("sub/kept.txt", "kept\n")
+	run("git", "add", ".")
+	run("git", "commit", "-q", "-m", "init")
+	// Untracked non-ignored file at the ROOT — not under sub/. This is
+	// what the pre-fix noteUntrackedSkipped would miss when the user
+	// passed sub/ as the workspace source.
+	writeFile("ROOT-SECRET.env", "TOKEN=abc\n")
+	subDir = filepath.Join(repoRoot, "sub")
+	return repoRoot, subDir
+}
+
+// TestNoteUntrackedSkippedCountsRepoWideEvenFromSubdir pins the bug
+// fix: when the user passes a subdirectory of a repo as the workspace
+// source, the untracked-files notice must still reflect what will
+// actually be skipped at the guest-shipping layer — which is a
+// repo-wide concern. Before the fix the helper ran `git -C <subdir>
+// ls-files --others --exclude-standard`, which only sees files under
+// the subdir, silently underreporting the real skip count.
+func TestNoteUntrackedSkippedCountsRepoWideEvenFromSubdir(t *testing.T) {
+	repoRoot, subDir := seedRepoWithSubdir(t)
+
+	d := defaultDeps()
+	d.repoInspector = workspace.NewInspector()
+
+	var out bytes.Buffer
+	d.noteUntrackedSkipped(context.Background(), &out, subDir)
+
+	got := out.String()
+	if !strings.Contains(got, "1 untracked") {
+		t.Fatalf("note = %q, want mention of 1 untracked file (the root-level SECRET.env)", got)
+	}
+	_ = repoRoot
+}
+
+// TestNoteUntrackedSkippedSilentOutsideRepo verifies the best-effort
+// contract: when sourcePath is not inside any git repo, the helper
+// prints nothing and does not error. Callers rely on this so a user
+// who points vm run at an ad-hoc directory (or an export tarball
+// that's been unpacked) doesn't get the whole operation aborted
+// over a courtesy notice.
+func TestNoteUntrackedSkippedSilentOutsideRepo(t *testing.T) {
+	d := defaultDeps()
+	d.repoInspector = workspace.NewInspector()
+
+	nonRepo := t.TempDir()
+	var out bytes.Buffer
+	d.noteUntrackedSkipped(context.Background(), &out, nonRepo)
+
+	if got := out.String(); got != "" {
+		t.Fatalf("note = %q, want no output outside a git repo", got)
+	}
+}
+
+// TestNoteUntrackedSkippedSwallowsInspectorErrors verifies that a
+// runner that errors on every call produces no output and no panic.
+// This is the other half of best-effort: even if git-the-binary is
+// somehow broken or missing, the live flow keeps running.
+func TestNoteUntrackedSkippedSwallowsInspectorErrors(t *testing.T) {
+	d := defaultDeps()
+	d.repoInspector = &workspace.Inspector{
+		Runner: func(context.Context, string, ...string) ([]byte, error) {
+			return nil, &exec.Error{Name: "git", Err: exec.ErrNotFound}
+		},
+	}
+
+	var out bytes.Buffer
+	d.noteUntrackedSkipped(context.Background(), &out, t.TempDir())
+	if got := out.String(); got != "" {
+		t.Fatalf("note = %q, want silence when inspector runner errors", got)
+	}
+}
--- a/internal/config/config.go
+++ b/internal/config/config.go
@ -1,254 +1,458 @@
 package config

 import (
-	"errors"
+	"crypto/ed25519"
+	"crypto/rand"
+	"crypto/x509"
+	"encoding/pem"
+	"fmt"
 	"os"
 	"path/filepath"
 	"strings"
 	"time"

 	toml "github.com/pelletier/go-toml"
+	"golang.org/x/crypto/ssh"

 	"banger/internal/model"
 	"banger/internal/paths"
-	"banger/internal/runtimebundle"
+	"banger/internal/system"
 )

 type fileConfig struct {
-	RuntimeDir         string `toml:"runtime_dir"`
-	RepoRoot           string `toml:"repo_root"`
-	LogLevel           string `toml:"log_level"`
-	FirecrackerBin     string `toml:"firecracker_bin"`
-	SSHKeyPath         string `toml:"ssh_key_path"`
-	NamegenPath        string `toml:"namegen_path"`
-	CustomizeScript    string `toml:"customize_script"`
-	VSockAgent         string `toml:"vsock_agent_path"`
-	VSockPingHelper    string `toml:"vsock_ping_helper_path"`
-	DefaultWorkSeed    string `toml:"default_work_seed"`
-	DefaultImageName   string `toml:"default_image_name"`
-	DefaultRootfs      string `toml:"default_rootfs"`
-	DefaultBaseRootfs  string `toml:"default_base_rootfs"`
-	DefaultKernel      string `toml:"default_kernel"`
-	DefaultInitrd      string `toml:"default_initrd"`
-	DefaultModulesDir  string `toml:"default_modules_dir"`
-	DefaultPackages    string `toml:"default_packages_file"`
-	AutoStopStaleAfter string `toml:"auto_stop_stale_after"`
-	StatsPollInterval  string `toml:"stats_poll_interval"`
-	MetricsPoll        string `toml:"metrics_poll_interval"`
-	BridgeName         string `toml:"bridge_name"`
-	BridgeIP           string `toml:"bridge_ip"`
-	CIDR               string `toml:"cidr"`
-	TapPoolSize        int    `toml:"tap_pool_size"`
-	DefaultDNS         string `toml:"default_dns"`
+	LogLevel           string              `toml:"log_level"`
+	FirecrackerBin     string              `toml:"firecracker_bin"`
+	JailerBin          string              `toml:"jailer_bin"`
+	JailerEnabled      *bool               `toml:"jailer_enabled"`
+	JailerChrootBase   string              `toml:"jailer_chroot_base"`
+	SSHKeyPath         string              `toml:"ssh_key_path"`
+	DefaultImageName   string              `toml:"default_image_name"`
+	AutoStopStaleAfter string              `toml:"auto_stop_stale_after"`
+	StatsPollInterval  string              `toml:"stats_poll_interval"`
+	BridgeName         string              `toml:"bridge_name"`
+	BridgeIP           string              `toml:"bridge_ip"`
+	CIDR               string              `toml:"cidr"`
+	TapPoolSize        int                 `toml:"tap_pool_size"`
+	DefaultDNS         string              `toml:"default_dns"`
+	FileSync           []fileSyncEntryFile `toml:"file_sync"`
+	VMDefaults         *vmDefaultsFile     `toml:"vm_defaults"`
+}
+
+type fileSyncEntryFile struct {
+	Host  string `toml:"host"`
+	Guest string `toml:"guest"`
+	Mode  string `toml:"mode"`
+}
+
+// vmDefaultsFile mirrors the optional `[vm_defaults]` block. All
+// fields are zero-valued when omitted; the resolver treats zero as
+// "not set, compute from host or fall back to builtin constants."
+type vmDefaultsFile struct {
+	VCPUCount         int    `toml:"vcpu"`
+	MemoryMiB         int    `toml:"memory_mib"`
+	DiskSize          string `toml:"disk_size"`
+	SystemOverlaySize string `toml:"system_overlay_size"`
 }

 func Load(layout paths.Layout) (model.DaemonConfig, error) {
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return model.DaemonConfig{}, err
+	}
+	return load(layout, home, true)
+}
+
+func LoadDaemon(layout paths.Layout, ownerHome string) (model.DaemonConfig, error) {
+	return load(layout, ownerHome, false)
+}
+
+func load(layout paths.Layout, home string, ensureDefaultSSHKey bool) (model.DaemonConfig, error) {
 	cfg := model.DaemonConfig{
-		LogLevel:            "info",
-		AutoStopStaleAfter:  0,
-		StatsPollInterval:   model.DefaultStatsPollInterval,
-		MetricsPollInterval: model.DefaultMetricsPollInterval,
-		BridgeName:          model.DefaultBridgeName,
-		BridgeIP:            model.DefaultBridgeIP,
-		CIDR:                model.DefaultCIDR,
-		TapPoolSize:         4,
-		DefaultDNS:          model.DefaultDNS,
-		DefaultImageName:    "default",
+		LogLevel:           "info",
+		AutoStopStaleAfter: 0,
+		StatsPollInterval:  model.DefaultStatsPollInterval,
+		BridgeName:         model.DefaultBridgeName,
+		BridgeIP:           model.DefaultBridgeIP,
+		CIDR:               model.DefaultCIDR,
+		TapPoolSize:        4,
+		DefaultDNS:         model.DefaultDNS,
+		DefaultImageName:   "debian-bookworm",
+		HostHomeDir:        home,
+		JailerBin:          model.DefaultJailerBinary,
+		JailerEnabled:      true,
+		// Chroot lives under StateDir (ext4) — not RuntimeDir (tmpfs).
+		// Hard-linking the kernel and any file-backed drives into the
+		// chroot requires same-filesystem; images already live under
+		// StateDir, so colocating the chroot avoids EXDEV.
+		JailerChrootBase: filepath.Join(layout.StateDir, "jail"),
 	}

-	path := filepath.Join(layout.ConfigDir, "config.toml")
-	info, err := os.Stat(path)
 	var file fileConfig
-	if err != nil {
-		if !os.IsNotExist(err) {
-			return cfg, err
-		}
-	} else if !info.IsDir() {
-		data, err := os.ReadFile(path)
+	configPath := filepath.Join(layout.ConfigDir, "config.toml")
+	if info, err := os.Stat(configPath); err == nil && !info.IsDir() {
+		data, err := os.ReadFile(configPath)
 		if err != nil {
 			return cfg, err
 		}
 		if err := toml.Unmarshal(data, &file); err != nil {
 			return cfg, err
 		}
-	}
-
-	cfg.RuntimeDir = paths.ResolveRuntimeDir(file.RuntimeDir, file.RepoRoot)
-	if err := applyRuntimeDefaults(&cfg); err != nil {
+	} else if err != nil && !os.IsNotExist(err) {
 		return cfg, err
 	}

-	if file.FirecrackerBin != "" {
-		cfg.FirecrackerBin = file.FirecrackerBin
+	if value := strings.TrimSpace(file.LogLevel); value != "" {
+		cfg.LogLevel = value
 	}
-	if file.LogLevel != "" {
-		cfg.LogLevel = file.LogLevel
+	if value := strings.TrimSpace(file.FirecrackerBin); value != "" {
+		cfg.FirecrackerBin = value
+	} else if path, err := system.LookupExecutable("firecracker"); err == nil {
+		cfg.FirecrackerBin = path
 	}
-	if file.NamegenPath != "" {
-		cfg.NamegenPath = file.NamegenPath
+	if value := strings.TrimSpace(file.JailerBin); value != "" {
+		cfg.JailerBin = value
 	}
-	if file.CustomizeScript != "" {
-		cfg.CustomizeScript = file.CustomizeScript
+	if file.JailerEnabled != nil {
+		cfg.JailerEnabled = *file.JailerEnabled
 	}
-	if file.VSockAgent != "" {
-		cfg.VSockAgentPath = file.VSockAgent
-	} else if file.VSockPingHelper != "" {
-		cfg.VSockAgentPath = file.VSockPingHelper
+	if value := strings.TrimSpace(file.JailerChrootBase); value != "" {
+		cfg.JailerChrootBase = value
 	}
-	if file.DefaultWorkSeed != "" {
-		cfg.DefaultWorkSeed = file.DefaultWorkSeed
+	if value := strings.TrimSpace(file.DefaultImageName); value != "" {
+		cfg.DefaultImageName = value
 	}
-	if file.DefaultImageName != "" {
-		cfg.DefaultImageName = file.DefaultImageName
+	if value := strings.TrimSpace(file.BridgeName); value != "" {
+		cfg.BridgeName = value
 	}
-	if file.DefaultRootfs != "" {
-		cfg.DefaultRootfs = file.DefaultRootfs
+	if value := strings.TrimSpace(file.BridgeIP); value != "" {
+		cfg.BridgeIP = value
 	}
-	if file.DefaultBaseRootfs != "" {
-		cfg.DefaultBaseRootfs = file.DefaultBaseRootfs
-	}
-	if file.DefaultKernel != "" {
-		cfg.DefaultKernel = file.DefaultKernel
-	}
-	if file.DefaultInitrd != "" {
-		cfg.DefaultInitrd = file.DefaultInitrd
-	}
-	if file.DefaultModulesDir != "" {
-		cfg.DefaultModulesDir = file.DefaultModulesDir
-	}
-	if file.DefaultPackages != "" {
-		cfg.DefaultPackagesFile = file.DefaultPackages
-	}
-	if file.BridgeName != "" {
-		cfg.BridgeName = file.BridgeName
-	}
-	if file.BridgeIP != "" {
-		cfg.BridgeIP = file.BridgeIP
-	}
-	if file.CIDR != "" {
-		cfg.CIDR = file.CIDR
+	if value := strings.TrimSpace(file.CIDR); value != "" {
+		cfg.CIDR = value
 	}
 	if file.TapPoolSize > 0 {
 		cfg.TapPoolSize = file.TapPoolSize
 	}
-	if file.DefaultDNS != "" {
-		cfg.DefaultDNS = file.DefaultDNS
+	if value := strings.TrimSpace(file.DefaultDNS); value != "" {
+		cfg.DefaultDNS = value
 	}
-	if file.AutoStopStaleAfter != "" {
-		duration, err := time.ParseDuration(file.AutoStopStaleAfter)
+	if value := strings.TrimSpace(file.AutoStopStaleAfter); value != "" {
+		duration, err := time.ParseDuration(value)
 		if err != nil {
 			return cfg, err
 		}
 		cfg.AutoStopStaleAfter = duration
 	}
-	if file.StatsPollInterval != "" {
-		duration, err := time.ParseDuration(file.StatsPollInterval)
+	if value := strings.TrimSpace(file.StatsPollInterval); value != "" {
+		duration, err := time.ParseDuration(value)
 		if err != nil {
 			return cfg, err
 		}
 		cfg.StatsPollInterval = duration
 	}
-	if file.MetricsPoll != "" {
-		duration, err := time.ParseDuration(file.MetricsPoll)
-		if err != nil {
-			return cfg, err
-		}
-		cfg.MetricsPollInterval = duration
-	}
-	if value := os.Getenv("BANGER_LOG_LEVEL"); value != "" {
+	if value := strings.TrimSpace(os.Getenv("BANGER_LOG_LEVEL")); value != "" {
 		cfg.LogLevel = value
 	}
+
+	sshKeyPath, err := resolveSSHKeyPath(layout, file.SSHKeyPath, home, ensureDefaultSSHKey)
+	if err != nil {
+		return cfg, err
+	}
+	cfg.SSHKeyPath = sshKeyPath
+
+	for i, entry := range file.FileSync {
+		validated, err := validateFileSyncEntry(entry, home)
+		if err != nil {
+			return cfg, fmt.Errorf("file_sync[%d]: %w", i, err)
+		}
+		cfg.FileSync = append(cfg.FileSync, validated)
+	}
+
+	if file.VMDefaults != nil {
+		override, err := parseVMDefaults(*file.VMDefaults)
+		if err != nil {
+			return cfg, fmt.Errorf("vm_defaults: %w", err)
+		}
+		cfg.VMDefaults = override
+	}
 	return cfg, nil
 }

-func applyRuntimeDefaults(cfg *model.DaemonConfig) error {
-	if cfg.RuntimeDir == "" {
-		return nil
+// parseVMDefaults validates and translates the TOML block into the
+// model-level override struct. Negative values are rejected outright;
+// zero means "not set."
+func parseVMDefaults(file vmDefaultsFile) (model.VMDefaultsOverride, error) {
+	override := model.VMDefaultsOverride{
+		VCPUCount: file.VCPUCount,
+		MemoryMiB: file.MemoryMiB,
 	}
-	meta, err := runtimebundle.LoadBundleMetadata(cfg.RuntimeDir)
-	switch {
-	case err == nil:
-		applyBundleMetadataDefaults(cfg, cfg.RuntimeDir, meta)
-	case errors.Is(err, os.ErrNotExist):
-		applyLegacyRuntimeDefaults(cfg)
-	default:
-		return err
+	if override.VCPUCount < 0 {
+		return model.VMDefaultsOverride{}, fmt.Errorf("vcpu must be >= 0 (got %d)", override.VCPUCount)
 	}
-	if cfg.DefaultRootfs == "" {
-		cfg.DefaultRootfs = firstExistingRuntimePath(
-			filepath.Join(cfg.RuntimeDir, "rootfs-docker.ext4"),
-			filepath.Join(cfg.RuntimeDir, "rootfs.ext4"),
-		)
+	if override.MemoryMiB < 0 {
+		return model.VMDefaultsOverride{}, fmt.Errorf("memory_mib must be >= 0 (got %d)", override.MemoryMiB)
 	}
-	if cfg.DefaultBaseRootfs == "" {
-		cfg.DefaultBaseRootfs = firstExistingRuntimePath(
-			filepath.Join(cfg.RuntimeDir, "rootfs.ext4"),
-			cfg.DefaultRootfs,
-		)
+	if value := strings.TrimSpace(file.DiskSize); value != "" {
+		bytes, err := model.ParseSize(value)
+		if err != nil {
+			return model.VMDefaultsOverride{}, fmt.Errorf("disk_size: %w", err)
+		}
+		override.WorkDiskSizeBytes = bytes
 	}
-	if cfg.DefaultWorkSeed == "" && cfg.DefaultRootfs != "" {
-		cfg.DefaultWorkSeed = firstExistingRuntimePath(associatedWorkSeedPath(cfg.DefaultRootfs))
+	if value := strings.TrimSpace(file.SystemOverlaySize); value != "" {
+		bytes, err := model.ParseSize(value)
+		if err != nil {
+			return model.VMDefaultsOverride{}, fmt.Errorf("system_overlay_size: %w", err)
+		}
+		override.SystemOverlaySizeByte = bytes
+	}
+	return override, nil
+}
+
+// validateFileSyncEntry normalises a single `[[file_sync]]` entry
+// and rejects anything the operator would regret later: empty
+// paths, unsupported leading characters, path traversal, host paths
+// outside the owner home, or non-absolute guest targets.
+func validateFileSyncEntry(entry fileSyncEntryFile, home string) (model.FileSyncEntry, error) {
+	host := strings.TrimSpace(entry.Host)
+	guest := strings.TrimSpace(entry.Guest)
+	if host == "" {
+		return model.FileSyncEntry{}, fmt.Errorf("host path is required")
+	}
+	if guest == "" {
+		return model.FileSyncEntry{}, fmt.Errorf("guest path is required")
+	}
+	if _, err := ResolveFileSyncHostPath(host, home); err != nil {
+		return model.FileSyncEntry{}, err
+	}
+	if err := validateFileSyncPath("guest", guest, true); err != nil {
+		return model.FileSyncEntry{}, err
+	}
+	// Guest paths must resolve under /root — that's where banger mounts
+	// the work disk. Syncing to /etc, /var, etc. would require writing
+	// to the rootfs snapshot, which file_sync deliberately doesn't do.
+	if !strings.HasPrefix(guest, "~/") && !strings.HasPrefix(guest, "/root/") && guest != "~" && guest != "/root" {
+		return model.FileSyncEntry{}, fmt.Errorf("guest path %q: must be under /root or ~/ (the work disk is mounted at /root)", guest)
+	}
+	mode := strings.TrimSpace(entry.Mode)
+	if mode != "" {
+		if err := validateFileSyncMode(mode); err != nil {
+			return model.FileSyncEntry{}, err
+		}
+	}
+	return model.FileSyncEntry{Host: host, Guest: guest, Mode: mode}, nil
+}
+
+// ResolveFileSyncHostPath expands a configured [[file_sync]].host path
+// against the owner home and rejects anything that lands outside that
+// home. Both config.Load and the root daemon use this so policy cannot
+// drift between startup-time validation and runtime file reads.
+func ResolveFileSyncHostPath(raw, home string) (string, error) {
+	raw = strings.TrimSpace(raw)
+	if err := validateFileSyncPath("host", raw, true); err != nil {
+		return "", err
+	}
+	home = strings.TrimSpace(home)
+	if home == "" {
+		return "", fmt.Errorf("host path %q: owner home is required", raw)
+	}
+	if !filepath.IsAbs(home) {
+		return "", fmt.Errorf("host path %q: owner home %q must be absolute", raw, home)
+	}
+	candidate := raw
+	if strings.HasPrefix(raw, "~/") {
+		candidate = filepath.Join(home, strings.TrimPrefix(raw, "~/"))
+	}
+	candidate = filepath.Clean(candidate)
+	if !filepath.IsAbs(candidate) {
+		return "", fmt.Errorf("host path %q: resolved path %q must be absolute", raw, candidate)
+	}
+	if err := ensurePathWithinRoot(candidate, home); err != nil {
+		return "", fmt.Errorf("host path %q: %w", raw, err)
+	}
+	return candidate, nil
+}
+
+// ResolveExistingFileSyncHostPath resolves a configured
+// [[file_sync]].host path to its real on-disk target. This is the
+// runtime companion to ResolveFileSyncHostPath: once os.Stat succeeds,
+// the daemon uses this to ensure a top-level symlink still points
+// inside the owner home before it reads from the path as root.
+func ResolveExistingFileSyncHostPath(raw, home string) (string, error) {
+	candidate, err := ResolveFileSyncHostPath(raw, home)
+	if err != nil {
+		return "", err
+	}
+	resolved, err := filepath.EvalSymlinks(candidate)
+	if err != nil {
+		return "", fmt.Errorf("host path %q: resolve symlinks: %w", raw, err)
+	}
+	resolved = filepath.Clean(resolved)
+	if err := ensurePathWithinRoot(resolved, home); err != nil {
+		return "", fmt.Errorf("host path %q: resolved symlink target %q: %w", raw, resolved, err)
+	}
+	return resolved, nil
+}
+
+// validateFileSyncPath rejects relative paths (other than a leading
+// "~/"), "..", empty segments, and "~user/..." forms banger doesn't
+// expand. Absolute paths and home-anchored paths pass through — the
+// actual expansion happens at sync time.
+func validateFileSyncPath(label, raw string, allowHome bool) error {
+	if raw == "~" {
+		return fmt.Errorf("%s path %q: bare '~' is not supported, point at a file or directory under it", label, raw)
+	}
+	// "~user/..." must be rejected specifically — catch it before the
+	// generic "must be absolute" message so the error names the real
+	// problem.
+	if strings.HasPrefix(raw, "~") && !strings.HasPrefix(raw, "~/") {
+		return fmt.Errorf("%s path %q: only '~/' is expanded, not '~user/'", label, raw)
+	}
+	if strings.HasPrefix(raw, "~/") {
+		if !allowHome {
+			return fmt.Errorf("%s path %q: home-relative paths are not supported here", label, raw)
+		}
+	} else if !strings.HasPrefix(raw, "/") {
+		return fmt.Errorf("%s path %q: must be absolute (start with '/') or home-anchored (start with '~/')", label, raw)
+	}
+	for _, segment := range strings.Split(raw, "/") {
+		if segment == ".." {
+			return fmt.Errorf("%s path %q: '..' segments are not allowed", label, raw)
+		}
 	}
 	return nil
 }

-func applyBundleMetadataDefaults(cfg *model.DaemonConfig, runtimeDir string, meta runtimebundle.BundleMetadata) {
-	cfg.FirecrackerBin = defaultRuntimePath(cfg.FirecrackerBin, runtimeDir, meta.FirecrackerBin)
-	cfg.SSHKeyPath = defaultRuntimePath(cfg.SSHKeyPath, runtimeDir, meta.SSHKeyPath)
-	cfg.NamegenPath = defaultRuntimePath(cfg.NamegenPath, runtimeDir, meta.NamegenPath)
-	cfg.CustomizeScript = defaultRuntimePath(cfg.CustomizeScript, runtimeDir, meta.CustomizeScript)
-	cfg.VSockAgentPath = defaultRuntimePath(cfg.VSockAgentPath, runtimeDir, meta.VSockAgentPath)
-	cfg.DefaultWorkSeed = defaultRuntimePath(cfg.DefaultWorkSeed, runtimeDir, meta.DefaultWorkSeed)
-	cfg.DefaultKernel = defaultRuntimePath(cfg.DefaultKernel, runtimeDir, meta.DefaultKernel)
-	cfg.DefaultInitrd = defaultRuntimePath(cfg.DefaultInitrd, runtimeDir, meta.DefaultInitrd)
-	cfg.DefaultModulesDir = defaultRuntimePath(cfg.DefaultModulesDir, runtimeDir, meta.DefaultModulesDir)
-	cfg.DefaultPackagesFile = defaultRuntimePath(cfg.DefaultPackagesFile, runtimeDir, meta.DefaultPackages)
-	cfg.DefaultRootfs = defaultRuntimePath(cfg.DefaultRootfs, runtimeDir, meta.DefaultRootfs)
-	cfg.DefaultBaseRootfs = defaultRuntimePath(cfg.DefaultBaseRootfs, runtimeDir, meta.DefaultBaseRootfs)
-}
-
-func applyLegacyRuntimeDefaults(cfg *model.DaemonConfig) {
-	cfg.FirecrackerBin = defaultRuntimePath(cfg.FirecrackerBin, cfg.RuntimeDir, "firecracker")
-	cfg.SSHKeyPath = defaultRuntimePath(cfg.SSHKeyPath, cfg.RuntimeDir, "id_ed25519")
-	cfg.NamegenPath = defaultRuntimePath(cfg.NamegenPath, cfg.RuntimeDir, "namegen")
-	cfg.CustomizeScript = defaultRuntimePath(cfg.CustomizeScript, cfg.RuntimeDir, "customize.sh")
-	cfg.VSockAgentPath = firstExistingRuntimePath(
-		defaultRuntimePath(cfg.VSockAgentPath, cfg.RuntimeDir, "banger-vsock-agent"),
-		filepath.Join(cfg.RuntimeDir, "banger-vsock-pingd"),
-	)
-	cfg.DefaultWorkSeed = defaultRuntimePath(cfg.DefaultWorkSeed, cfg.RuntimeDir, "rootfs-docker.work-seed.ext4")
-	cfg.DefaultKernel = defaultRuntimePath(cfg.DefaultKernel, cfg.RuntimeDir, "wtf/root/boot/vmlinux-6.8.0-94-generic")
-	cfg.DefaultInitrd = defaultRuntimePath(cfg.DefaultInitrd, cfg.RuntimeDir, "wtf/root/boot/initrd.img-6.8.0-94-generic")
-	cfg.DefaultModulesDir = defaultRuntimePath(cfg.DefaultModulesDir, cfg.RuntimeDir, "wtf/root/lib/modules/6.8.0-94-generic")
-	cfg.DefaultPackagesFile = defaultRuntimePath(cfg.DefaultPackagesFile, cfg.RuntimeDir, "packages.apt")
-}
-
-func defaultRuntimePath(current, runtimeDir, relative string) string {
-	if current != "" || relative == "" {
-		return current
+func ensurePathWithinRoot(candidate, root string) error {
+	root = filepath.Clean(strings.TrimSpace(root))
+	candidate = filepath.Clean(strings.TrimSpace(candidate))
+	rel, err := filepath.Rel(root, candidate)
+	if err != nil {
+		return fmt.Errorf("compare against owner home %q: %w", root, err)
 	}
-	return filepath.Join(runtimeDir, relative)
+	if rel == ".." || strings.HasPrefix(rel, ".."+string(os.PathSeparator)) {
+		return fmt.Errorf("must stay under owner home %q", root)
+	}
+	return nil
 }

-func firstExistingRuntimePath(paths ...string) string {
-	for _, candidate := range paths {
-		if candidate == "" {
-			continue
-		}
-		if _, err := os.Stat(candidate); err == nil {
-			return candidate
+// validateFileSyncMode accepts three- or four-digit octal strings.
+// Three-digit modes like "600" are auto-prefixed with a leading 0
+// when parsed by the consumer.
+func validateFileSyncMode(mode string) error {
+	if len(mode) < 3 || len(mode) > 4 {
+		return fmt.Errorf("mode %q: must be a 3- or 4-digit octal string", mode)
+	}
+	for _, r := range mode {
+		if r < '0' || r > '7' {
+			return fmt.Errorf("mode %q: must be octal (digits 0-7)", mode)
 		}
 	}
-	return ""
+	return nil
 }

-func associatedWorkSeedPath(rootfsPath string) string {
-	rootfsPath = strings.TrimSpace(rootfsPath)
-	if rootfsPath == "" {
-		return ""
+func resolveSSHKeyPath(layout paths.Layout, configured, home string, ensureDefault bool) (string, error) {
+	configured = strings.TrimSpace(configured)
+	if configured != "" {
+		return normalizeSSHKeyPath(configured, home)
 	}
-	if strings.HasSuffix(rootfsPath, ".ext4") {
-		return strings.TrimSuffix(rootfsPath, ".ext4") + ".work-seed.ext4"
+	// Key lives under the state dir, not the config dir. The daemon's
+	// ensureVMSSHClientConfig scrubs ConfigDir/ssh on every Open as
+	// part of migrating off the pre-state-dir layout — putting the
+	// default key there would race with that cleanup (create → delete
+	// → next VM create fails to read the key).
+	sshDir := strings.TrimSpace(layout.SSHDir)
+	if sshDir == "" {
+		sshDir = filepath.Join(layout.StateDir, "ssh")
 	}
-	return rootfsPath + ".work-seed"
+	if !filepath.IsAbs(sshDir) {
+		return "", fmt.Errorf("ssh key dir must be absolute; got %q (check paths.Resolve populated SSHDir / StateDir)", sshDir)
+	}
+	defaultPath := filepath.Join(sshDir, "id_ed25519")
+	if ensureDefault {
+		return ensureDefaultSSHKey(defaultPath)
+	}
+	return defaultPath, nil
+}
+
+// normalizeSSHKeyPath validates and canonicalises a user-configured
+// ssh_key_path. Accepts:
+//
+//   - absolute paths ("/home/me/keys/id_ed25519")
+//   - home-anchored paths ("~/keys/id_ed25519") — expanded against $HOME
+//
+// Rejects:
+//
+//   - bare "~" (ambiguous — expand to what?)
+//   - "~other/foo" (we only expand the current user's home)
+//   - relative paths ("id_ed25519", "./keys/id_ed25519") — these are
+//     ambiguous because the daemon's cwd isn't the user's shell cwd,
+//     and readers in internal/guest + internal/cli do raw os.ReadFile
+//     on the path without re-resolving against a known anchor
+func normalizeSSHKeyPath(raw, home string) (string, error) {
+	raw = strings.TrimSpace(raw)
+	if raw == "" {
+		return "", nil
+	}
+	if raw == "~" {
+		return "", fmt.Errorf("ssh_key_path %q: bare '~' is not supported, point at a specific key file", raw)
+	}
+	if strings.HasPrefix(raw, "~") && !strings.HasPrefix(raw, "~/") {
+		return "", fmt.Errorf("ssh_key_path %q: only '~/' is expanded, not '~user/'", raw)
+	}
+	if strings.HasPrefix(raw, "~/") {
+		home = strings.TrimSpace(home)
+		if home == "" {
+			return "", fmt.Errorf("ssh_key_path %q: no home directory available for ~ expansion", raw)
+		}
+		raw = filepath.Join(home, strings.TrimPrefix(raw, "~/"))
+	}
+	if !filepath.IsAbs(raw) {
+		return "", fmt.Errorf("ssh_key_path %q: must be absolute (start with '/') or home-anchored (start with '~/')", raw)
+	}
+	return filepath.Clean(raw), nil
+}
+
+func ensureDefaultSSHKey(path string) (string, error) {
+	if err := os.MkdirAll(filepath.Dir(path), 0o700); err != nil {
+		return "", err
+	}
+	if _, err := os.Stat(path); err == nil {
+		if err := ensurePublicKeyFile(path); err != nil {
+			return "", err
+		}
+		return path, nil
+	} else if !os.IsNotExist(err) {
+		return "", err
+	}
+
+	_, privateKey, err := ed25519.GenerateKey(rand.Reader)
+	if err != nil {
+		return "", err
+	}
+	pkcs8, err := x509.MarshalPKCS8PrivateKey(privateKey)
+	if err != nil {
+		return "", err
+	}
+	privatePEM := pem.EncodeToMemory(&pem.Block{Type: "PRIVATE KEY", Bytes: pkcs8})
+	if err := os.WriteFile(path, privatePEM, 0o600); err != nil {
+		return "", err
+	}
+	if err := ensurePublicKeyFile(path); err != nil {
+		return "", err
+	}
+	return path, nil
+}
+
+func ensurePublicKeyFile(privateKeyPath string) error {
+	data, err := os.ReadFile(privateKeyPath)
+	if err != nil {
+		return err
+	}
+	signer, err := ssh.ParsePrivateKey(data)
+	if err != nil {
+		return err
+	}
+	publicKey := ssh.MarshalAuthorizedKey(signer.PublicKey())
+	return os.WriteFile(privateKeyPath+".pub", publicKey, 0o644)
 }
--- a/internal/config/config_test.go
+++ b/internal/config/config_test.go
@ -1,154 +1,223 @@
 package config

 import (
-	"encoding/json"
 	"os"
 	"path/filepath"
+	"strings"
 	"testing"
+	"time"

 	"banger/internal/paths"
-	"banger/internal/runtimebundle"
 )

-func TestLoadDerivesArtifactPathsFromRuntimeDir(t *testing.T) {
-	runtimeDir := t.TempDir()
-	meta := runtimebundle.BundleMetadata{
-		FirecrackerBin:    "bin/firecracker",
-		SSHKeyPath:        "keys/id_ed25519",
-		NamegenPath:       "bin/namegen",
-		CustomizeScript:   "scripts/customize.sh",
-		VSockAgentPath:    "bin/banger-vsock-agent",
-		DefaultPackages:   "config/packages.apt",
-		DefaultRootfs:     "images/rootfs-docker.ext4",
-		DefaultWorkSeed:   "images/rootfs-docker.work-seed.ext4",
-		DefaultKernel:     "kernels/vmlinux",
-		DefaultInitrd:     "kernels/initrd.img",
-		DefaultModulesDir: "modules/current",
-	}
-	for _, rel := range []string{
-		meta.FirecrackerBin,
-		meta.SSHKeyPath,
-		meta.NamegenPath,
-		meta.CustomizeScript,
-		meta.VSockAgentPath,
-		meta.DefaultPackages,
-		meta.DefaultRootfs,
-		meta.DefaultWorkSeed,
-		meta.DefaultKernel,
-		meta.DefaultInitrd,
-		filepath.Join(meta.DefaultModulesDir, "modules.dep"),
-	} {
-		path := filepath.Join(runtimeDir, rel)
-		if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
-			t.Fatalf("mkdir %s: %v", filepath.Dir(path), err)
-		}
-		if err := os.WriteFile(path, []byte("test"), 0o644); err != nil {
-			t.Fatalf("write %s: %v", path, err)
-		}
-	}
-	data, err := json.Marshal(meta)
-	if err != nil {
-		t.Fatalf("Marshal: %v", err)
-	}
-	if err := os.WriteFile(filepath.Join(runtimeDir, runtimebundle.BundleMetadataFile), data, 0o644); err != nil {
-		t.Fatalf("write bundle metadata: %v", err)
+func TestLoadDefaultsResolveFirecrackerAndGenerateSSHKey(t *testing.T) {
+	configDir := t.TempDir()
+	sshDir := t.TempDir()
+	binDir := t.TempDir()
+	firecrackerPath := filepath.Join(binDir, "firecracker")
+	if err := os.WriteFile(firecrackerPath, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil {
+		t.Fatalf("write firecracker: %v", err)
 	}
+	t.Setenv("PATH", binDir)

-	t.Setenv("BANGER_RUNTIME_DIR", runtimeDir)
-	cfg, err := Load(paths.Layout{ConfigDir: t.TempDir()})
+	cfg, err := Load(paths.Layout{ConfigDir: configDir, SSHDir: sshDir})
 	if err != nil {
 		t.Fatalf("Load: %v", err)
 	}

-	if cfg.RuntimeDir != runtimeDir {
-		t.Fatalf("RuntimeDir = %q, want %q", cfg.RuntimeDir, runtimeDir)
+	if cfg.FirecrackerBin != firecrackerPath {
+		t.Fatalf("FirecrackerBin = %q, want %q", cfg.FirecrackerBin, firecrackerPath)
 	}
-	if cfg.FirecrackerBin != filepath.Join(runtimeDir, meta.FirecrackerBin) {
-		t.Fatalf("FirecrackerBin = %q", cfg.FirecrackerBin)
+	// Default key lives under SSHDir (state dir), NOT ConfigDir/ssh.
+	// ConfigDir/ssh gets scrubbed by ensureVMSSHClientConfig on every
+	// daemon Open, so regression-guard that the generator never picks
+	// that path again.
+	wantKey := filepath.Join(sshDir, "id_ed25519")
+	if cfg.SSHKeyPath != wantKey {
+		t.Fatalf("SSHKeyPath = %q, want %q", cfg.SSHKeyPath, wantKey)
 	}
-	if cfg.SSHKeyPath != filepath.Join(runtimeDir, meta.SSHKeyPath) {
-		t.Fatalf("SSHKeyPath = %q", cfg.SSHKeyPath)
+	for _, path := range []string{wantKey, wantKey + ".pub"} {
+		if _, err := os.Stat(path); err != nil {
+			t.Fatalf("stat %s: %v", path, err)
+		}
 	}
-	if cfg.NamegenPath != filepath.Join(runtimeDir, meta.NamegenPath) {
-		t.Fatalf("NamegenPath = %q", cfg.NamegenPath)
+	forbiddenKey := filepath.Join(configDir, "ssh", "id_ed25519")
+	if _, err := os.Stat(forbiddenKey); err == nil {
+		t.Fatalf("key was also generated at %s; config.Load must not write under ConfigDir/ssh", forbiddenKey)
 	}
-	if cfg.CustomizeScript != filepath.Join(runtimeDir, meta.CustomizeScript) {
-		t.Fatalf("CustomizeScript = %q", cfg.CustomizeScript)
-	}
-	if cfg.VSockAgentPath != filepath.Join(runtimeDir, meta.VSockAgentPath) {
-		t.Fatalf("VSockAgentPath = %q", cfg.VSockAgentPath)
-	}
-	if cfg.DefaultRootfs != filepath.Join(runtimeDir, meta.DefaultRootfs) {
-		t.Fatalf("DefaultRootfs = %q", cfg.DefaultRootfs)
-	}
-	if cfg.DefaultWorkSeed != filepath.Join(runtimeDir, meta.DefaultWorkSeed) {
-		t.Fatalf("DefaultWorkSeed = %q", cfg.DefaultWorkSeed)
-	}
-	if cfg.DefaultBaseRootfs != filepath.Join(runtimeDir, meta.DefaultRootfs) {
-		t.Fatalf("DefaultBaseRootfs = %q", cfg.DefaultBaseRootfs)
-	}
-	if cfg.DefaultKernel != filepath.Join(runtimeDir, meta.DefaultKernel) {
-		t.Fatalf("DefaultKernel = %q", cfg.DefaultKernel)
-	}
-	if cfg.DefaultInitrd != filepath.Join(runtimeDir, meta.DefaultInitrd) {
-		t.Fatalf("DefaultInitrd = %q", cfg.DefaultInitrd)
-	}
-	if cfg.DefaultModulesDir != filepath.Join(runtimeDir, meta.DefaultModulesDir) {
-		t.Fatalf("DefaultModulesDir = %q", cfg.DefaultModulesDir)
-	}
-	if cfg.DefaultPackagesFile != filepath.Join(runtimeDir, meta.DefaultPackages) {
-		t.Fatalf("DefaultPackagesFile = %q", cfg.DefaultPackagesFile)
+	if cfg.DefaultImageName != "debian-bookworm" {
+		t.Fatalf("DefaultImageName = %q, want debian-bookworm", cfg.DefaultImageName)
 	}
 }

-func TestLoadFallsBackToLegacyRuntimeLayoutWithoutBundleMetadata(t *testing.T) {
-	runtimeDir := t.TempDir()
-	for _, rel := range []string{
-		"firecracker",
-		"id_ed25519",
-		"namegen",
-		"customize.sh",
-		"banger-vsock-agent",
-		"packages.apt",
-		"rootfs-docker.ext4",
-		"rootfs-docker.work-seed.ext4",
-		"wtf/root/boot/vmlinux-6.8.0-94-generic",
-		"wtf/root/boot/initrd.img-6.8.0-94-generic",
-		"wtf/root/lib/modules/6.8.0-94-generic/modules.dep",
-	} {
-		path := filepath.Join(runtimeDir, rel)
-		if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
-			t.Fatalf("mkdir %s: %v", filepath.Dir(path), err)
-		}
-		if err := os.WriteFile(path, []byte("test"), 0o644); err != nil {
-			t.Fatalf("write %s: %v", path, err)
-		}
+func TestLoadSSHKeyPathExpandsHomeAnchored(t *testing.T) {
+	homeDir := t.TempDir()
+	t.Setenv("HOME", homeDir)
+
+	configDir := t.TempDir()
+	data := []byte("ssh_key_path = \"~/mykeys/id_ed25519\"\n")
+	if err := os.WriteFile(filepath.Join(configDir, "config.toml"), data, 0o644); err != nil {
+		t.Fatalf("write config.toml: %v", err)
 	}

-	t.Setenv("BANGER_RUNTIME_DIR", runtimeDir)
-	cfg, err := Load(paths.Layout{ConfigDir: t.TempDir()})
+	cfg, err := Load(paths.Layout{ConfigDir: configDir, SSHDir: t.TempDir()})
 	if err != nil {
 		t.Fatalf("Load: %v", err)
 	}
-
-	if cfg.FirecrackerBin != filepath.Join(runtimeDir, "firecracker") {
-		t.Fatalf("FirecrackerBin = %q", cfg.FirecrackerBin)
-	}
-	if cfg.VSockAgentPath != filepath.Join(runtimeDir, "banger-vsock-agent") {
-		t.Fatalf("VSockAgentPath = %q", cfg.VSockAgentPath)
-	}
-	if cfg.DefaultWorkSeed != filepath.Join(runtimeDir, "rootfs-docker.work-seed.ext4") {
-		t.Fatalf("DefaultWorkSeed = %q", cfg.DefaultWorkSeed)
-	}
-	if cfg.DefaultKernel != filepath.Join(runtimeDir, "wtf/root/boot/vmlinux-6.8.0-94-generic") {
-		t.Fatalf("DefaultKernel = %q", cfg.DefaultKernel)
+	want := filepath.Join(homeDir, "mykeys", "id_ed25519")
+	if cfg.SSHKeyPath != want {
+		t.Fatalf("SSHKeyPath = %q, want %q", cfg.SSHKeyPath, want)
 	}
 }

-func TestLoadAppliesLogLevelEnvOverride(t *testing.T) {
-	t.Setenv("BANGER_LOG_LEVEL", "debug")
+func TestLoadDaemonDoesNotGenerateDefaultSSHKey(t *testing.T) {
+	ownerHome := t.TempDir()
+	sshDir := filepath.Join(t.TempDir(), "daemon-ssh")
+	cfg, err := LoadDaemon(paths.Layout{ConfigDir: t.TempDir(), SSHDir: sshDir}, ownerHome)
+	if err != nil {
+		t.Fatalf("LoadDaemon: %v", err)
+	}
+	wantKey := filepath.Join(sshDir, "id_ed25519")
+	if cfg.SSHKeyPath != wantKey {
+		t.Fatalf("SSHKeyPath = %q, want %q", cfg.SSHKeyPath, wantKey)
+	}
+	if cfg.HostHomeDir != ownerHome {
+		t.Fatalf("HostHomeDir = %q, want %q", cfg.HostHomeDir, ownerHome)
+	}
+	if _, err := os.Stat(wantKey); !os.IsNotExist(err) {
+		t.Fatalf("LoadDaemon created %s, want no key material on daemon config load", wantKey)
+	}
+}

-	cfg, err := Load(paths.Layout{ConfigDir: t.TempDir()})
+// TestLoadNormalizesAbsoluteSSHKeyPath pins filepath.Clean behaviour
+// for configured paths: trailing slashes and duplicate slashes are
+// flattened so downstream path comparisons don't see two spellings
+// for the same path.
+func TestLoadNormalizesAbsoluteSSHKeyPath(t *testing.T) {
+	cases := []struct {
+		name string
+		raw  string
+		want string
+	}{
+		{"trailing slash collapsed", "/tmp/keys/id_ed25519/", "/tmp/keys/id_ed25519"},
+		{"duplicate slashes collapsed", "/tmp//keys///id_ed25519", "/tmp/keys/id_ed25519"},
+		{"dot segments resolved", "/tmp/keys/./id_ed25519", "/tmp/keys/id_ed25519"},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			configDir := t.TempDir()
+			data := []byte("ssh_key_path = \"" + tc.raw + "\"\n")
+			if err := os.WriteFile(filepath.Join(configDir, "config.toml"), data, 0o644); err != nil {
+				t.Fatalf("write config.toml: %v", err)
+			}
+			cfg, err := Load(paths.Layout{ConfigDir: configDir, SSHDir: t.TempDir()})
+			if err != nil {
+				t.Fatalf("Load %q: %v", tc.raw, err)
+			}
+			if cfg.SSHKeyPath != tc.want {
+				t.Fatalf("SSHKeyPath = %q, want %q", cfg.SSHKeyPath, tc.want)
+			}
+		})
+	}
+}
+
+// TestEnsureDefaultSSHKeyRejectsCorruptExistingFile pins the
+// "don't silently overwrite" contract: if someone wrote garbage to
+// the default key path (or the key was truncated mid-write by a
+// previous crash), config.Load must surface the parse error instead
+// of pretending the file is usable. The regression we care about is
+// a future refactor that adds "regenerate if invalid" silently —
+// that would nuke a real user key on every daemon Open.
+func TestEnsureDefaultSSHKeyRejectsCorruptExistingFile(t *testing.T) {
+	sshDir := t.TempDir()
+	corruptKey := filepath.Join(sshDir, "id_ed25519")
+	if err := os.WriteFile(corruptKey, []byte("not a pem private key"), 0o600); err != nil {
+		t.Fatalf("write corrupt key: %v", err)
+	}
+
+	_, err := Load(paths.Layout{ConfigDir: t.TempDir(), SSHDir: sshDir})
+	if err == nil {
+		t.Fatal("Load: want error when existing key file is not a valid private key")
+	}
+	// The error should mention the parse failure, not "regenerated".
+	if strings.Contains(err.Error(), "regenerat") {
+		t.Fatalf("Load silently regenerated: %v", err)
+	}
+	// Original garbage must still be there — the invariant is "don't
+	// touch files you can't parse".
+	data, readErr := os.ReadFile(corruptKey)
+	if readErr != nil {
+		t.Fatalf("ReadFile: %v", readErr)
+	}
+	if string(data) != "not a pem private key" {
+		t.Fatalf("key content = %q, want the original garbage", string(data))
+	}
+}
+
+// TestResolveSSHKeyPathRejectsEmptySSHDirAndStateDir pins the
+// guard in resolveSSHKeyPath: if a caller builds a layout without
+// SSHDir and StateDir, they shouldn't get a key generated in cwd.
+// The guard existed before (added after a test scribbled into
+// internal/config/ssh/); this test prevents it from going away.
+func TestResolveSSHKeyPathRejectsEmptySSHDirAndStateDir(t *testing.T) {
+	_, err := Load(paths.Layout{ConfigDir: t.TempDir()})
+	if err == nil {
+		t.Fatal("Load: want error when neither SSHDir nor StateDir is set")
+	}
+	if !strings.Contains(err.Error(), "must be absolute") {
+		t.Fatalf("Load error = %v, want 'must be absolute' diagnostic", err)
+	}
+}
+
+func TestLoadRejectsInvalidSSHKeyPath(t *testing.T) {
+	cases := []struct {
+		name string
+		raw  string
+		want string
+	}{
+		{"relative bare", "id_ed25519", "must be absolute"},
+		{"relative with dot", "./keys/id_ed25519", "must be absolute"},
+		{"bare tilde", "~", "bare '~' is not supported"},
+		{"user-tilde", "~other/id_ed25519", "only '~/' is expanded"},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			configDir := t.TempDir()
+			data := []byte("ssh_key_path = \"" + tc.raw + "\"\n")
+			if err := os.WriteFile(filepath.Join(configDir, "config.toml"), data, 0o644); err != nil {
+				t.Fatalf("write config.toml: %v", err)
+			}
+			_, err := Load(paths.Layout{ConfigDir: configDir, SSHDir: t.TempDir()})
+			if err == nil {
+				t.Fatalf("Load %q: want error containing %q", tc.raw, tc.want)
+			}
+			if !strings.Contains(err.Error(), tc.want) {
+				t.Fatalf("Load %q: error = %v, want contains %q", tc.raw, err, tc.want)
+			}
+		})
+	}
+}
+
+func TestLoadAppliesConfigOverrides(t *testing.T) {
+	configDir := t.TempDir()
+	data := []byte(`
+log_level = "debug"
+firecracker_bin = "/opt/firecracker"
+ssh_key_path = "/tmp/custom-key"
+default_image_name = "void"
+auto_stop_stale_after = "1h"
+stats_poll_interval = "15s"
+bridge_name = "br-test"
+bridge_ip = "10.0.0.1"
+cidr = "25"
+tap_pool_size = 8
+default_dns = "9.9.9.9"
+`)
+	if err := os.WriteFile(filepath.Join(configDir, "config.toml"), data, 0o644); err != nil {
+		t.Fatalf("write config.toml: %v", err)
+	}
+
+	cfg, err := Load(paths.Layout{ConfigDir: configDir, SSHDir: t.TempDir()})
 	if err != nil {
 		t.Fatalf("Load: %v", err)
 	}
@ -156,136 +225,271 @@ func TestLoadAppliesLogLevelEnvOverride(t *testing.T) {
 	if cfg.LogLevel != "debug" {
 		t.Fatalf("LogLevel = %q", cfg.LogLevel)
 	}
+	if cfg.FirecrackerBin != "/opt/firecracker" {
+		t.Fatalf("FirecrackerBin = %q", cfg.FirecrackerBin)
+	}
+	if cfg.SSHKeyPath != "/tmp/custom-key" {
+		t.Fatalf("SSHKeyPath = %q", cfg.SSHKeyPath)
+	}
+	if cfg.DefaultImageName != "void" {
+		t.Fatalf("DefaultImageName = %q", cfg.DefaultImageName)
+	}
+	if cfg.AutoStopStaleAfter != time.Hour {
+		t.Fatalf("AutoStopStaleAfter = %s", cfg.AutoStopStaleAfter)
+	}
+	if cfg.StatsPollInterval != 15*time.Second {
+		t.Fatalf("StatsPollInterval = %s", cfg.StatsPollInterval)
+	}
+	if cfg.BridgeName != "br-test" || cfg.BridgeIP != "10.0.0.1" || cfg.CIDR != "25" {
+		t.Fatalf("bridge config = %+v", cfg)
+	}
+	if cfg.TapPoolSize != 8 {
+		t.Fatalf("TapPoolSize = %d", cfg.TapPoolSize)
+	}
+	if cfg.DefaultDNS != "9.9.9.9" {
+		t.Fatalf("DefaultDNS = %q", cfg.DefaultDNS)
+	}
 }

-func TestLoadDefaultsLogLevelToInfo(t *testing.T) {
-	cfg, err := Load(paths.Layout{ConfigDir: t.TempDir()})
+func TestLoadAppliesLogLevelEnvOverride(t *testing.T) {
+	t.Setenv("BANGER_LOG_LEVEL", "warn")
+
+	cfg, err := Load(paths.Layout{ConfigDir: t.TempDir(), SSHDir: t.TempDir()})
 	if err != nil {
 		t.Fatalf("Load: %v", err)
 	}
-	if cfg.LogLevel != "info" {
-		t.Fatalf("LogLevel = %q, want info", cfg.LogLevel)
+	if cfg.LogLevel != "warn" {
+		t.Fatalf("LogLevel = %q, want warn", cfg.LogLevel)
 	}
 }

-func TestLoadIgnoresConfigSSHKeyOverrideForGuestAccess(t *testing.T) {
-	runtimeDir := t.TempDir()
-	meta := runtimebundle.BundleMetadata{
-		FirecrackerBin:    "bin/firecracker",
-		SSHKeyPath:        "keys/id_ed25519",
-		NamegenPath:       "bin/namegen",
-		CustomizeScript:   "scripts/customize.sh",
-		VSockAgentPath:    "bin/banger-vsock-agent",
-		DefaultPackages:   "config/packages.apt",
-		DefaultRootfs:     "images/rootfs.ext4",
-		DefaultWorkSeed:   "images/rootfs.work-seed.ext4",
-		DefaultKernel:     "kernels/vmlinux",
-		DefaultModulesDir: "modules/current",
-	}
-	for _, rel := range []string{
-		meta.FirecrackerBin,
-		meta.SSHKeyPath,
-		meta.NamegenPath,
-		meta.CustomizeScript,
-		meta.VSockAgentPath,
-		meta.DefaultPackages,
-		meta.DefaultRootfs,
-		meta.DefaultWorkSeed,
-		meta.DefaultKernel,
-		filepath.Join(meta.DefaultModulesDir, "modules.dep"),
-	} {
-		path := filepath.Join(runtimeDir, rel)
-		if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
-			t.Fatalf("mkdir %s: %v", filepath.Dir(path), err)
-		}
-		if err := os.WriteFile(path, []byte("test"), 0o644); err != nil {
-			t.Fatalf("write %s: %v", path, err)
-		}
-	}
-	data, err := json.Marshal(meta)
-	if err != nil {
-		t.Fatalf("Marshal: %v", err)
-	}
-	if err := os.WriteFile(filepath.Join(runtimeDir, runtimebundle.BundleMetadataFile), data, 0o644); err != nil {
-		t.Fatalf("write bundle metadata: %v", err)
-	}
+func TestLoadAcceptsFileSyncEntries(t *testing.T) {
+	homeDir := t.TempDir()
+	t.Setenv("HOME", homeDir)

 	configDir := t.TempDir()
-	if err := os.WriteFile(filepath.Join(configDir, "config.toml"), []byte("ssh_key_path = \"/tmp/override-key\"\n"), 0o644); err != nil {
-		t.Fatalf("write config.toml: %v", err)
-	}
+	hostsFile := filepath.Join(homeDir, ".config", "gh", "hosts.yml")
+	data := []byte(`
+[[file_sync]]
+host = "~/.aws"
+guest = "~/.aws"

-	t.Setenv("BANGER_RUNTIME_DIR", runtimeDir)
-	cfg, err := Load(paths.Layout{ConfigDir: configDir})
+[[file_sync]]
+host = "` + hostsFile + `"
+guest = "/root/.config/gh/hosts.yml"
+mode = "0644"
+`)
+	if err := os.WriteFile(filepath.Join(configDir, "config.toml"), data, 0o644); err != nil {
+		t.Fatal(err)
+	}
+	cfg, err := Load(paths.Layout{ConfigDir: configDir, SSHDir: t.TempDir()})
 	if err != nil {
 		t.Fatalf("Load: %v", err)
 	}
-
-	want := filepath.Join(runtimeDir, meta.SSHKeyPath)
-	if cfg.SSHKeyPath != want {
-		t.Fatalf("SSHKeyPath = %q, want runtime key %q", cfg.SSHKeyPath, want)
+	if len(cfg.FileSync) != 2 {
+		t.Fatalf("FileSync = %+v", cfg.FileSync)
+	}
+	if cfg.FileSync[0].Host != "~/.aws" || cfg.FileSync[0].Guest != "~/.aws" {
+		t.Fatalf("entry[0] = %+v", cfg.FileSync[0])
+	}
+	if cfg.FileSync[1].Host != hostsFile || cfg.FileSync[1].Guest != "/root/.config/gh/hosts.yml" {
+		t.Fatalf("entry[1] = %+v", cfg.FileSync[1])
+	}
+	if cfg.FileSync[1].Mode != "0644" {
+		t.Fatalf("entry[1] mode = %q", cfg.FileSync[1].Mode)
 	}
 }

-func TestLoadAcceptsLegacyBundleVsockPingHelperPath(t *testing.T) {
-	runtimeDir := t.TempDir()
-	meta := runtimebundle.BundleMetadata{
-		FirecrackerBin:      "bin/firecracker",
-		SSHKeyPath:          "keys/id_ed25519",
-		NamegenPath:         "bin/namegen",
-		CustomizeScript:     "scripts/customize.sh",
-		VSockPingHelperPath: "bin/banger-vsock-pingd",
-		DefaultPackages:     "config/packages.apt",
-		DefaultRootfs:       "images/rootfs.ext4",
-		DefaultKernel:       "kernels/vmlinux",
-	}
-	for _, rel := range []string{
-		meta.FirecrackerBin,
-		meta.SSHKeyPath,
-		meta.NamegenPath,
-		meta.CustomizeScript,
-		meta.VSockPingHelperPath,
-		meta.DefaultPackages,
-		meta.DefaultRootfs,
-		meta.DefaultKernel,
-	} {
-		path := filepath.Join(runtimeDir, rel)
-		if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
-			t.Fatalf("mkdir %s: %v", filepath.Dir(path), err)
-		}
-		if err := os.WriteFile(path, []byte("test"), 0o644); err != nil {
-			t.Fatalf("write %s: %v", path, err)
-		}
-	}
-	data, err := json.Marshal(meta)
-	if err != nil {
-		t.Fatalf("Marshal: %v", err)
-	}
-	if err := os.WriteFile(filepath.Join(runtimeDir, runtimebundle.BundleMetadataFile), data, 0o644); err != nil {
-		t.Fatalf("write bundle metadata: %v", err)
-	}
+func TestLoadDaemonAcceptsFileSyncPathUnderOwnerHome(t *testing.T) {
+	ownerHome := t.TempDir()
+	t.Setenv("HOME", t.TempDir())

-	t.Setenv("BANGER_RUNTIME_DIR", runtimeDir)
-	cfg, err := Load(paths.Layout{ConfigDir: t.TempDir()})
-	if err != nil {
-		t.Fatalf("Load: %v", err)
-	}
-	if cfg.VSockAgentPath != filepath.Join(runtimeDir, meta.VSockPingHelperPath) {
-		t.Fatalf("VSockAgentPath = %q", cfg.VSockAgentPath)
-	}
-}
-
-func TestLoadAcceptsLegacyConfigVsockPingHelperPath(t *testing.T) {
 	configDir := t.TempDir()
-	if err := os.WriteFile(filepath.Join(configDir, "config.toml"), []byte("vsock_ping_helper_path = \"/tmp/legacy-agent\"\n"), 0o644); err != nil {
-		t.Fatalf("write config.toml: %v", err)
+	allowed := filepath.Join(ownerHome, ".config", "gh", "hosts.yml")
+	data := []byte(`
+[[file_sync]]
+host = "` + allowed + `"
+guest = "~/.config/gh/hosts.yml"
+`)
+	if err := os.WriteFile(filepath.Join(configDir, "config.toml"), data, 0o644); err != nil {
+		t.Fatal(err)
 	}

-	cfg, err := Load(paths.Layout{ConfigDir: configDir})
+	cfg, err := LoadDaemon(paths.Layout{ConfigDir: configDir, SSHDir: t.TempDir()}, ownerHome)
+	if err != nil {
+		t.Fatalf("LoadDaemon: %v", err)
+	}
+	got, err := ResolveFileSyncHostPath(cfg.FileSync[0].Host, cfg.HostHomeDir)
+	if err != nil {
+		t.Fatalf("ResolveFileSyncHostPath: %v", err)
+	}
+	if got != allowed {
+		t.Fatalf("resolved host path = %q, want %q", got, allowed)
+	}
+}
+
+func TestLoadRejectsInvalidFileSyncEntries(t *testing.T) {
+	cases := []struct {
+		name string
+		toml string
+		want string
+	}{
+		{
+			"empty host",
+			`[[file_sync]]` + "\n" + `host = ""` + "\n" + `guest = "~/foo"`,
+			"host path is required",
+		},
+		{
+			"empty guest",
+			`[[file_sync]]` + "\n" + `host = "~/foo"` + "\n" + `guest = ""`,
+			"guest path is required",
+		},
+		{
+			"relative host",
+			`[[file_sync]]` + "\n" + `host = "foo/bar"` + "\n" + `guest = "~/foo"`,
+			"must be absolute",
+		},
+		{
+			"guest outside /root",
+			`[[file_sync]]` + "\n" + `host = "~/x"` + "\n" + `guest = "/etc/resolv.conf"`,
+			"must be under /root or ~/",
+		},
+		{
+			"path traversal",
+			`[[file_sync]]` + "\n" + `host = "~/../secrets"` + "\n" + `guest = "~/secrets"`,
+			"'..' segments",
+		},
+		{
+			"tilde user",
+			`[[file_sync]]` + "\n" + `host = "~other/foo"` + "\n" + `guest = "~/foo"`,
+			"only '~/' is expanded",
+		},
+		{
+			"invalid mode",
+			`[[file_sync]]` + "\n" + `host = "~/x"` + "\n" + `guest = "~/x"` + "\n" + `mode = "rwx"`,
+			"must be octal",
+		},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			configDir := t.TempDir()
+			if err := os.WriteFile(filepath.Join(configDir, "config.toml"), []byte(tc.toml+"\n"), 0o644); err != nil {
+				t.Fatal(err)
+			}
+			_, err := Load(paths.Layout{ConfigDir: configDir, SSHDir: t.TempDir()})
+			if err == nil {
+				t.Fatalf("Load: want error containing %q", tc.want)
+			}
+			if !strings.Contains(err.Error(), tc.want) {
+				t.Fatalf("Load error = %v, want contains %q", err, tc.want)
+			}
+		})
+	}
+}
+
+func TestLoadRejectsFileSyncHostOutsideHome(t *testing.T) {
+	homeDir := t.TempDir()
+	t.Setenv("HOME", homeDir)
+
+	configDir := t.TempDir()
+	data := []byte(`
+[[file_sync]]
+host = "/etc/resolv.conf"
+guest = "~/resolv.conf"
+`)
+	if err := os.WriteFile(filepath.Join(configDir, "config.toml"), data, 0o644); err != nil {
+		t.Fatal(err)
+	}
+	_, err := Load(paths.Layout{ConfigDir: configDir, SSHDir: t.TempDir()})
+	if err == nil {
+		t.Fatal("Load: want error for host path outside home")
+	}
+	if !strings.Contains(err.Error(), "owner home") {
+		t.Fatalf("Load error = %v, want owner-home diagnostic", err)
+	}
+}
+
+func TestLoadDaemonRejectsFileSyncHostOutsideOwnerHome(t *testing.T) {
+	ownerHome := t.TempDir()
+	t.Setenv("HOME", t.TempDir())
+
+	configDir := t.TempDir()
+	outside := filepath.Join(t.TempDir(), "secret.txt")
+	data := []byte(`
+[[file_sync]]
+host = "` + outside + `"
+guest = "~/secret.txt"
+`)
+	if err := os.WriteFile(filepath.Join(configDir, "config.toml"), data, 0o644); err != nil {
+		t.Fatal(err)
+	}
+	_, err := LoadDaemon(paths.Layout{ConfigDir: configDir, SSHDir: t.TempDir()}, ownerHome)
+	if err == nil {
+		t.Fatal("LoadDaemon: want error for host path outside owner home")
+	}
+	if !strings.Contains(err.Error(), "owner home") {
+		t.Fatalf("LoadDaemon error = %v, want owner-home diagnostic", err)
+	}
+}
+
+func TestLoadAcceptsVMDefaults(t *testing.T) {
+	configDir := t.TempDir()
+	data := []byte(`
+[vm_defaults]
+vcpu = 4
+memory_mib = 4096
+disk_size = "16G"
+system_overlay_size = "12G"
+`)
+	if err := os.WriteFile(filepath.Join(configDir, "config.toml"), data, 0o644); err != nil {
+		t.Fatal(err)
+	}
+	cfg, err := Load(paths.Layout{ConfigDir: configDir, SSHDir: t.TempDir()})
 	if err != nil {
 		t.Fatalf("Load: %v", err)
 	}
-	if cfg.VSockAgentPath != "/tmp/legacy-agent" {
-		t.Fatalf("VSockAgentPath = %q", cfg.VSockAgentPath)
+	if cfg.VMDefaults.VCPUCount != 4 {
+		t.Errorf("VCPUCount = %d, want 4", cfg.VMDefaults.VCPUCount)
+	}
+	if cfg.VMDefaults.MemoryMiB != 4096 {
+		t.Errorf("MemoryMiB = %d, want 4096", cfg.VMDefaults.MemoryMiB)
+	}
+	if cfg.VMDefaults.WorkDiskSizeBytes != 16*1024*1024*1024 {
+		t.Errorf("WorkDiskSizeBytes = %d, want 16 GiB", cfg.VMDefaults.WorkDiskSizeBytes)
+	}
+	if cfg.VMDefaults.SystemOverlaySizeByte != 12*1024*1024*1024 {
+		t.Errorf("SystemOverlaySizeByte = %d, want 12 GiB", cfg.VMDefaults.SystemOverlaySizeByte)
+	}
+}
+
+func TestLoadEmptyVMDefaultsLeavesZeros(t *testing.T) {
+	// No [vm_defaults] block → cfg.VMDefaults is the zero value,
+	// which the resolver will map to auto or builtin.
+	cfg, err := Load(paths.Layout{ConfigDir: t.TempDir(), SSHDir: t.TempDir()})
+	if err != nil {
+		t.Fatalf("Load: %v", err)
+	}
+	if cfg.VMDefaults.VCPUCount != 0 || cfg.VMDefaults.MemoryMiB != 0 {
+		t.Errorf("VMDefaults = %+v, want zeroed", cfg.VMDefaults)
+	}
+}
+
+func TestLoadRejectsNegativeVMDefaults(t *testing.T) {
+	cases := map[string]string{
+		"vcpu":      `[vm_defaults]` + "\n" + `vcpu = -1`,
+		"memory":    `[vm_defaults]` + "\n" + `memory_mib = -1`,
+		"disk_size": `[vm_defaults]` + "\n" + `disk_size = "banana"`,
+		"overlay":   `[vm_defaults]` + "\n" + `system_overlay_size = "banana"`,
+	}
+	for name, body := range cases {
+		t.Run(name, func(t *testing.T) {
+			configDir := t.TempDir()
+			if err := os.WriteFile(filepath.Join(configDir, "config.toml"), []byte(body+"\n"), 0o644); err != nil {
+				t.Fatal(err)
+			}
+			if _, err := Load(paths.Layout{ConfigDir: configDir, SSHDir: t.TempDir()}); err == nil {
+				t.Fatal("expected error")
+			}
+		})
 	}
 }
--- a/internal/daemon/ARCHITECTURE.md
+++ b/internal/daemon/ARCHITECTURE.md
@ -0,0 +1,217 @@
+# `internal/daemon` architecture
+
+This document describes the current daemon package layout: the `Daemon`
+composition root, the four services it wires together, the subpackages
+that own stateless helpers, the privileged-ops seam used by the
+supported system install, and the lock ordering every caller must
+respect.
+
+## Supported service topology
+
+On the supported host path (`banger system install` on a `systemd`
+host), banger runs as two cooperating services:
+
+- `bangerd.service` runs as the configured owner user. It owns the
+  public RPC socket, store, image state, workspace prep, and the
+  lifecycle state machine.
+- `bangerd-root.service` runs as root. It owns only the privileged
+  host-kernel operations: bridge/tap, NAT/resolver routing, dm/loop
+  snapshot plumbing, privileged ext4 mutation on dm devices, and
+  firecracker process/socket ownership.
+
+The owner daemon talks to the root helper through the `privilegedOps`
+seam. Non-system/dev paths still use the same seam, but it is backed
+by an in-process adapter instead of the helper RPC client.
+
+## Composition
+
+`Daemon` is a thin composition root. It holds shared infrastructure
+(store, runner, logger, layout, config, listener, privileged-ops
+adapter) plus pointers to four focused services. RPC dispatch is a
+pure forwarder into those services; no lifecycle / image / workspace /
+networking behaviour lives on `*Daemon` itself.
+
+```
+Daemon
+├── *HostNetwork      — bridge, tap pool, NAT, DNS, firecracker process,
+│                       DM snapshots, vsock readiness
+├── *ImageService     — register, promote, delete, pull (bundle + OCI),
+│                       kernel catalog, managed-seed refresh
+├── *WorkspaceService — workspace.prepare / workspace.export, auth-key
+│                       + git-identity sync onto the work disk
+└── *VMService        — VM lifecycle (create/start/stop/restart/kill/
+                        delete/set), stats polling, ports query,
+                        handle cache, per-VM lock set, create-op
+                        registry, preflight validation
+```
+
+Each service owns its own state. Cross-service calls go through narrow
+consumer-defined seams:
+
+- `WorkspaceService` does not hold a `*VMService` pointer. It takes
+  function-typed deps (`vmResolver`, `aliveChecker`, `withVMLockByRef`,
+  `imageResolver`, `imageWorkSeed`) so it sees exactly the operations
+  it needs and nothing more. Those deps are captured as closures so
+  construction-order cycles don't recur.
+- `VMService` holds direct pointers to `*HostNetwork`, `*ImageService`,
+  and `*WorkspaceService`. Orchestrating a VM start really does compose
+  all three (bridge + tap + image resolution + work-disk sync), and
+  declaring a function-typed interface for every call would balloon
+  the surface for no win — services are unexported, so package-external
+  code can never reach them.
+- Capability hooks do not take `*Daemon`. Each capability is a struct
+  with explicit service-pointer fields (`workDiskCapability{vm, ws,
+  store, defaultImageName}`, `dnsCapability{net}`, `natCapability{vm,
+  net, logger}`) populated at wiring time. `VMService` invokes them
+  through a `capabilityHooks` struct (function-typed bag) populated at
+  construction; neither the service nor any capability has a `*Daemon`
+  pointer.
+
+Services + capabilities are built eagerly by `wireServices(d)`, called
+once from `Daemon.Open` after the composition root's infrastructure is
+populated, and once per test that constructs a `&Daemon{...}` literal.
+Tests that want to stub a particular service or the capability list
+assign the field before calling `wireServices` — the helper is
+idempotent and skips anything already set.
+
+## Service state
+
+### `HostNetwork` (`host_network.go`, `nat.go`, `dns_routing.go`, `tap_pool.go`, `snapshot.go`)
+
+- `tapPool` — TAP interface pool, owns its own lock.
+- `vmDNS *vmdns.Server` — in-process DNS server for `.vm` names.
+- `privilegedOps` — the host-kernel seam used for bridge/tap/NAT,
+  resolver routing, dm snapshots, privileged ext4 mutation, and
+  firecracker ownership/kill flows.
+- No direct VM-state access. Where an operation needs a VM's tap name
+  (e.g. `ensureNAT`), the signature takes `guestIP` + `tap` string so
+  the caller (VMService) resolves them first.
+
+### `ImageService` (`image_service.go`, `images.go`, `images_pull.go`, `image_seed.go`, `kernels.go`)
+
+- `imageOpsMu sync.Mutex` — the publication-window lock. Held only
+  across the recheck-name + atomic-rename + UpsertImage commit atom.
+  Slow work (network fetch, ext4 build, SSH-key seeding) runs unlocked.
+- Test seams `pullAndFlatten`, `finalizePulledRootfs`, `bundleFetch`
+  are struct fields (not package globals), so tests inject per-instance
+  fakes.
+
+### `WorkspaceService` (`workspace_service.go`, `workspace.go`, `vm_authsync.go`)
+
+- `workspaceLocks vmLockSet` — per-VM mutex scoped to
+  `workspace.prepare` / `workspace.export`. These ops acquire
+  `vmLocks[id]` (on VMService) only long enough to validate VM state
+  and snapshot the fields they need, then release it and acquire
+  `workspaceLocks[id]` for the slow guest I/O phase. That keeps
+  `vm stop` / `delete` / `restart` from queueing behind a running tar
+  import.
+- Test seams `workspaceInspectRepo`, `workspaceImport` are per-instance
+  fields.
+
+### `VMService` (`vm_service.go`, `vm_lifecycle.go`, `vm_create.go`, `vm_create_ops.go`, `vm_stats.go`, `vm_set.go`, `vm_disk.go`, `vm_handles.go`, `vm_authsync.go` (via WorkspaceService), `preflight.go`, `ports.go`, `vm.go`)
+
+- `vmLocks vmLockSet` — per-VM `*sync.Mutex`, one per VM ID. Held for
+  the **entire lifecycle op** on that VM: `start` holds it across
+  preflight, bridge setup, firecracker spawn, and post-boot wiring
+  (seconds to tens of seconds). Two `start`/`stop`/`delete`/`set`
+  calls against the same VM therefore serialise; calls against
+  different VMs run independently.
+- `createVMMu sync.Mutex` — narrow **reservation** mutex. `CreateVM`
+  resolves the image (possibly auto-pulling, which self-locks on
+  `imageOpsMu`) and parses sizing flags outside this lock, then holds
+  `createVMMu` only to re-check that the requested VM name is still
+  free, allocate the next guest IP, and insert the initial "created"
+  row. The subsequent boot flow runs under the per-VM lock only.
+- `createOps opstate.Registry[*vmCreateOperationState]` — in-flight
+  async create operations; owns its own lock.
+- `handles *handleCache` — in-memory map of per-VM transient kernel/
+  process handles (PID, tap device, loop devices, DM target). Each
+  VM directory holds a small `handles.json` scratch file so the
+  cache can be rebuilt at daemon startup.
+- `vsockHostDevice` — path to `/dev/vhost-vsock` the preflight and
+  doctor checks RequireFile against. Defaulted in wireServices;
+  tests point at a tempfile to make the check pass without the
+  kernel module loaded. Guest-SSH test seams live on `*Daemon`
+  (`d.guestWaitForSSH`, `d.guestDial`), not VMService — workspace
+  prepare is the only path that reaches guest SSH, and it gets
+  there through closures WorkspaceService captured at wiring time.
+
+## Subpackages
+
+Stateless helpers with no need for a service pointer live in
+subpackages. Each takes explicit dependencies (typically a
+`system.Runner`-compatible interface) and holds no global state beyond
+small test seams.
+
+| Subpackage                   | Purpose                                                                |
+| ---------------------------- | ---------------------------------------------------------------------- |
+| `internal/daemon/opstate`    | Generic `Registry[T AsyncOp]` for async-operation bookkeeping.         |
+| `internal/daemon/dmsnap`     | Device-mapper COW snapshot create/cleanup/remove.                      |
+| `internal/daemon/fcproc`     | Firecracker process primitives (bridge, tap, binary, PID, kill, wait). |
+| `internal/daemon/imagemgr`   | Image subsystem pure helpers: validators, staging, build script gen.   |
+| `internal/daemon/workspace`  | Workspace helpers: git inspection, copy prep, guest import script.     |
+
+All subpackages are leaves — no intra-daemon subpackage imports another.
+
+## Lock ordering
+
+Acquire in this order, release in reverse. Never acquire in the
+opposite direction.
+
+```
+VMService.vmLocks[id]  →  WorkspaceService.workspaceLocks[id]
+                      →  {VMService.createVMMu, ImageService.imageOpsMu}
+                      →  subsystem-local locks
+```
+
+`vmLocks[id]` and `workspaceLocks[id]` are NEVER held at the same
+time. `workspace.prepare` acquires `vmLocks[id]` just long enough to
+validate VM state, releases it, then acquires `workspaceLocks[id]`
+for the guest I/O phase. Regular lifecycle ops (`start`, `stop`,
+`delete`, `set`) do NOT do this split — they hold `vmLocks[id]`
+across the whole flow.
+
+Subsystem-local locks (`tapPool.mu`, `opstate.Registry` mu,
+`handleCache.mu`) are leaves. They do not contend with each other.
+
+Notes:
+
+- `vmLocks[id]` is the outer lock for any operation scoped to a single
+  VM. Acquired via `VMService.withVMLockByID` / `withVMLockByRef`. The
+  callback runs under the lock — treat the whole function body as
+  critical section.
+- `createVMMu` is held only across the VM-name reservation + IP
+  allocation + initial UpsertVM. Image resolution and the full boot
+  flow happen outside it.
+- `imageOpsMu` is held only across the publication atom (recheck name
+  + atomic rename + UpsertImage, or the equivalent for Register /
+  Promote / Delete). Network fetch, ext4 build, and file copies run
+  unlocked.
+- Holding a subsystem-local lock while calling into guest SSH is
+  discouraged; copy needed state out under the lock and release before
+  blocking I/O.
+
+## Reconcile and background work
+
+`Daemon.reconcile(ctx)` is the orchestrator run at startup. It
+rehydrates the handle cache, reaps stale VMs, and republishes DNS
+records. `Daemon.backgroundLoop()` is the ticker fan-out —
+`VMService.pollStats`, `VMService.stopStaleVMs`, and
+`VMService.pruneVMCreateOperations` run on independent tickers. On the
+supported system path, any reconcile-time host cleanup that needs
+privilege goes through `privilegedOps`, not directly through the owner
+daemon process.
+
+## External API
+
+Only `internal/cli` imports this package. The surface is:
+
+- `daemon.Open(ctx) (*Daemon, error)`
+- `daemon.OpenSystem(ctx) (*Daemon, error)`
+- `(*Daemon).Serve(ctx) error`
+- `(*Daemon).Close() error`
+- `daemon.Doctor(...)` — host diagnostics (no receiver).
+
+All other methods live on the four services and are reached only
+through the RPC `dispatch` switch in `daemon.go`. They are free to
+move/rename during refactoring.
--- a/internal/daemon/autopull_test.go
+++ b/internal/daemon/autopull_test.go
@ -0,0 +1,153 @@
+package daemon
+
+import (
+	"context"
+	"errors"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"banger/internal/imagecat"
+	"banger/internal/model"
+	"banger/internal/paths"
+	"banger/internal/system"
+)
+
+func TestFindOrAutoPullImageReturnsLocalWithoutPulling(t *testing.T) {
+	d := &Daemon{
+		layout: paths.Layout{ImagesDir: t.TempDir()},
+		store:  openDaemonStore(t),
+		runner: system.NewRunner(),
+	}
+	d.img = &ImageService{
+		layout: d.layout,
+		store:  d.store,
+		runner: d.runner,
+		bundleFetch: func(context.Context, string, imagecat.CatEntry) (imagecat.Manifest, error) {
+			t.Fatal("bundleFetch should not be called when image is local")
+			return imagecat.Manifest{}, nil
+		},
+	}
+	wireServices(d)
+	id, _ := model.NewID()
+	if err := d.store.UpsertImage(context.Background(), model.Image{
+		ID:        id,
+		Name:      "my-local-image",
+		CreatedAt: model.Now(),
+		UpdatedAt: model.Now(),
+	}); err != nil {
+		t.Fatal(err)
+	}
+	image, err := d.vm.findOrAutoPullImage(context.Background(), "my-local-image")
+	if err != nil {
+		t.Fatalf("findOrAutoPullImage: %v", err)
+	}
+	if image.Name != "my-local-image" {
+		t.Fatalf("Name = %q, want my-local-image", image.Name)
+	}
+}
+
+func TestFindOrAutoPullImagePullsFromCatalog(t *testing.T) {
+	imagesDir := t.TempDir()
+	kernelsDir := t.TempDir()
+	seedKernel(t, kernelsDir, "generic-6.12")
+
+	pullCalls := 0
+	d := &Daemon{
+		layout: paths.Layout{ImagesDir: imagesDir, KernelsDir: kernelsDir},
+		store:  openDaemonStore(t),
+		runner: system.NewRunner(),
+	}
+	d.img = &ImageService{
+		layout: d.layout,
+		store:  d.store,
+		runner: d.runner,
+		bundleFetch: func(ctx context.Context, destDir string, entry imagecat.CatEntry) (imagecat.Manifest, error) {
+			pullCalls++
+			return stubBundleFetch(imagecat.Manifest{KernelRef: "generic-6.12"})(ctx, destDir, entry)
+		},
+		workSeedBuilder: stubWorkSeedBuilder,
+	}
+	wireServices(d)
+	// "debian-bookworm" is in the embedded imagecat catalog.
+	image, err := d.vm.findOrAutoPullImage(context.Background(), "debian-bookworm")
+	if err != nil {
+		t.Fatalf("findOrAutoPullImage: %v", err)
+	}
+	if image.Name != "debian-bookworm" {
+		t.Fatalf("Name = %q, want debian-bookworm", image.Name)
+	}
+	if pullCalls != 1 {
+		t.Fatalf("bundleFetch calls = %d, want 1", pullCalls)
+	}
+}
+
+func TestFindOrAutoPullImageReturnsOriginalErrorWhenNotInCatalog(t *testing.T) {
+	d := &Daemon{
+		layout: paths.Layout{ImagesDir: t.TempDir()},
+		store:  openDaemonStore(t),
+		runner: system.NewRunner(),
+	}
+	wireServices(d)
+	_, err := d.vm.findOrAutoPullImage(context.Background(), "not-in-catalog-or-store")
+	if err == nil || !strings.Contains(err.Error(), "not found") {
+		t.Fatalf("err = %v, want not-found", err)
+	}
+}
+
+func TestReadOrAutoPullKernelReturnsLocalWithoutPulling(t *testing.T) {
+	kernelsDir := t.TempDir()
+	seedKernel(t, kernelsDir, "generic-6.12")
+	d := &Daemon{layout: paths.Layout{KernelsDir: kernelsDir}}
+	wireServices(d)
+
+	entry, err := d.img.readOrAutoPullKernel(context.Background(), "generic-6.12")
+	if err != nil {
+		t.Fatalf("readOrAutoPullKernel: %v", err)
+	}
+	if entry.Name != "generic-6.12" {
+		t.Fatalf("Name = %q", entry.Name)
+	}
+}
+
+func TestReadOrAutoPullKernelErrorsWhenNotInCatalog(t *testing.T) {
+	d := &Daemon{layout: paths.Layout{KernelsDir: t.TempDir()}}
+	wireServices(d)
+	_, err := d.img.readOrAutoPullKernel(context.Background(), "nonexistent-kernel")
+	if err == nil || !strings.Contains(err.Error(), "not found") {
+		t.Fatalf("err = %v, want not-found", err)
+	}
+}
+
+// TestReadOrAutoPullKernelSurfacesNonNotExistError covers the path where
+// kernelcat.ReadLocal fails for a reason other than missing entry (e.g.
+// corrupt manifest); the autopull logic should NOT try to fetch in that
+// case since the entry clearly exists in some broken form.
+func TestReadOrAutoPullKernelSurfacesNonNotExistError(t *testing.T) {
+	kernelsDir := t.TempDir()
+	// Seed a manifest that doesn't match the entry's own Name field —
+	// kernelcat.ReadLocal returns an error, not os.ErrNotExist.
+	dir := filepath.Join(kernelsDir, "broken-kernel")
+	if err := os.MkdirAll(dir, 0o755); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.WriteFile(filepath.Join(dir, "manifest.json"), []byte(`{"name":"different-name"}`), 0o644); err != nil {
+		t.Fatal(err)
+	}
+	d := &Daemon{layout: paths.Layout{KernelsDir: kernelsDir}}
+	wireServices(d)
+	_, err := d.img.readOrAutoPullKernel(context.Background(), "broken-kernel")
+	if err == nil {
+		t.Fatal("want error")
+	}
+	// Must not be wrapped in an "auto-pull" message — the corrupt-manifest
+	// failure should surface as the primary cause.
+	if strings.Contains(err.Error(), "not found in catalog") {
+		t.Fatalf("err = %v, should not claim 'not in catalog'", err)
+	}
+	// Sanity: ensure it's not os.ErrNotExist-compatible.
+	if errors.Is(err, os.ErrNotExist) {
+		t.Fatalf("err = %v, should not be os.ErrNotExist", err)
+	}
+}
--- a/internal/daemon/capabilities.go
+++ b/internal/daemon/capabilities.go
@ -3,23 +3,34 @@ package daemon
 import (
 	"context"
 	"errors"
+	"log/slog"
 	"net"
 	"os"
 	"strings"
+	"time"
+
+	"github.com/miekg/dns"

 	"banger/internal/firecracker"
 	"banger/internal/guestconfig"
 	"banger/internal/model"
+	"banger/internal/store"
 	"banger/internal/system"
 	"banger/internal/vmdns"
 )

+// vmCapability is the base capability tag. Actual behaviour lives on
+// optional sub-interfaces (startPreflight / guestConfig / machineConfig
+// / prepareHost / postStart / cleanup / configChange / doctor); a
+// capability implements whichever subset it cares about. None of them
+// take *Daemon — each capability is a struct constructed with its
+// explicit service-pointer dependencies at wireServices time.
 type vmCapability interface {
 	Name() string
 }

 type startPreflightCapability interface {
-	AddStartPreflight(context.Context, *Daemon, *system.Preflight, model.VMRecord, model.Image)
+	AddStartPreflight(context.Context, *system.Preflight, model.VMRecord, model.Image)
 }

 type guestConfigCapability interface {
@ -31,46 +42,48 @@ type machineConfigCapability interface {
 }

 type prepareHostCapability interface {
-	PrepareHost(context.Context, *Daemon, *model.VMRecord, model.Image) error
+	PrepareHost(context.Context, *model.VMRecord, model.Image) error
 }

 type postStartCapability interface {
-	PostStart(context.Context, *Daemon, model.VMRecord, model.Image) error
+	PostStart(context.Context, model.VMRecord, model.Image) error
 }

 type cleanupCapability interface {
-	Cleanup(context.Context, *Daemon, model.VMRecord) error
+	Cleanup(context.Context, model.VMRecord) error
 }

 type configChangeCapability interface {
-	ApplyConfigChange(context.Context, *Daemon, model.VMRecord, model.VMRecord) error
+	ApplyConfigChange(context.Context, model.VMRecord, model.VMRecord) error
 }

 type doctorCapability interface {
-	AddDoctorChecks(context.Context, *Daemon, *system.Report)
+	AddDoctorChecks(context.Context, *system.Report)
 }

-func (d *Daemon) registeredCapabilities() []vmCapability {
-	if len(d.vmCaps) > 0 {
-		return d.vmCaps
-	}
+// defaultCapabilities builds the production capability list from
+// already-constructed services. Called from wireServices once d.vm /
+// d.ws / d.net are populated, so every capability ships with the
+// concrete service pointers it needs and none of them reach through
+// *Daemon at dispatch time.
+func (d *Daemon) defaultCapabilities() []vmCapability {
 	return []vmCapability{
-		workDiskCapability{},
-		dnsCapability{},
-		natCapability{},
+		newWorkDiskCapability(d.vm, d.ws, d.store, d.config.DefaultImageName),
+		newDNSCapability(d.net),
+		newNATCapability(d.vm, d.net, d.logger),
 	}
 }

 func (d *Daemon) addCapabilityStartPrereqs(ctx context.Context, checks *system.Preflight, vm model.VMRecord, image model.Image) {
-	for _, capability := range d.registeredCapabilities() {
+	for _, capability := range d.vmCaps {
 		if hook, ok := capability.(startPreflightCapability); ok {
-			hook.AddStartPreflight(ctx, d, checks, vm, image)
+			hook.AddStartPreflight(ctx, checks, vm, image)
 		}
 	}
 }

 func (d *Daemon) contributeGuestConfig(builder *guestconfig.Builder, vm model.VMRecord, image model.Image) {
-	for _, capability := range d.registeredCapabilities() {
+	for _, capability := range d.vmCaps {
 		if hook, ok := capability.(guestConfigCapability); ok {
 			hook.ContributeGuest(builder, vm, image)
 		}
@ -78,7 +91,7 @@ func (d *Daemon) contributeGuestConfig(builder *guestconfig.Builder, vm model.VM
 }

 func (d *Daemon) contributeMachineConfig(cfg *firecracker.MachineConfig, vm model.VMRecord, image model.Image) {
-	for _, capability := range d.registeredCapabilities() {
+	for _, capability := range d.vmCaps {
 		if hook, ok := capability.(machineConfigCapability); ok {
 			hook.ContributeMachine(cfg, vm, image)
 		}
@ -86,13 +99,13 @@ func (d *Daemon) contributeMachineConfig(cfg *firecracker.MachineConfig, vm mode
 }

 func (d *Daemon) prepareCapabilityHosts(ctx context.Context, vm *model.VMRecord, image model.Image) error {
-	prepared := make([]vmCapability, 0, len(d.registeredCapabilities()))
-	for _, capability := range d.registeredCapabilities() {
+	prepared := make([]vmCapability, 0, len(d.vmCaps))
+	for _, capability := range d.vmCaps {
 		hook, ok := capability.(prepareHostCapability)
 		if !ok {
 			continue
 		}
-		if err := hook.PrepareHost(ctx, d, vm, image); err != nil {
+		if err := hook.PrepareHost(ctx, vm, image); err != nil {
 			d.cleanupPreparedCapabilities(context.Background(), vm, prepared)
 			return err
 		}
@ -102,9 +115,17 @@ func (d *Daemon) prepareCapabilityHosts(ctx context.Context, vm *model.VMRecord,
 }

 func (d *Daemon) postStartCapabilities(ctx context.Context, vm model.VMRecord, image model.Image) error {
-	for _, capability := range d.registeredCapabilities() {
+	for _, capability := range d.vmCaps {
+		switch capability.Name() {
+		case "dns":
+			vmCreateStage(ctx, "apply_dns", "publishing vm dns record")
+		case "nat":
+			if vm.Spec.NATEnabled {
+				vmCreateStage(ctx, "apply_nat", "configuring nat")
+			}
+		}
 		if hook, ok := capability.(postStartCapability); ok {
-			if err := hook.PostStart(ctx, d, vm, image); err != nil {
+			if err := hook.PostStart(ctx, vm, image); err != nil {
 				return err
 			}
 		}
@ -113,7 +134,7 @@ func (d *Daemon) postStartCapabilities(ctx context.Context, vm model.VMRecord, i
 }

 func (d *Daemon) cleanupCapabilityState(ctx context.Context, vm model.VMRecord) error {
-	return d.cleanupPreparedCapabilities(ctx, &vm, d.registeredCapabilities())
+	return d.cleanupPreparedCapabilities(ctx, &vm, d.vmCaps)
 }

 func (d *Daemon) cleanupPreparedCapabilities(ctx context.Context, vm *model.VMRecord, capabilities []vmCapability) error {
@ -123,15 +144,24 @@ func (d *Daemon) cleanupPreparedCapabilities(ctx context.Context, vm *model.VMRe
 		if !ok {
 			continue
 		}
-		err = joinErr(err, hook.Cleanup(ctx, d, *vm))
+		cleanupErr := hook.Cleanup(ctx, *vm)
+		if cleanupErr != nil && d.logger != nil {
+			// Log per-capability cleanup failures. The aggregate
+			// errors.Join return value is still the contract for
+			// callers, but a multi-failure cleanup hides which
+			// capability misbehaved unless we surface each one
+			// individually here.
+			d.logger.Warn("capability cleanup failed", append(vmLogAttrs(*vm), "capability", capabilities[index].Name(), "error", cleanupErr.Error())...)
+		}
+		err = joinErr(err, cleanupErr)
 	}
 	return err
 }

 func (d *Daemon) applyCapabilityConfigChanges(ctx context.Context, before, after model.VMRecord) error {
-	for _, capability := range d.registeredCapabilities() {
+	for _, capability := range d.vmCaps {
 		if hook, ok := capability.(configChangeCapability); ok {
-			if err := hook.ApplyConfigChange(ctx, d, before, after); err != nil {
+			if err := hook.ApplyConfigChange(ctx, before, after); err != nil {
 				return err
 			}
 		}
@ -140,18 +170,37 @@ func (d *Daemon) applyCapabilityConfigChanges(ctx context.Context, before, after
 }

 func (d *Daemon) addCapabilityDoctorChecks(ctx context.Context, report *system.Report) {
-	for _, capability := range d.registeredCapabilities() {
+	for _, capability := range d.vmCaps {
 		if hook, ok := capability.(doctorCapability); ok {
-			hook.AddDoctorChecks(ctx, d, report)
+			hook.AddDoctorChecks(ctx, report)
 		}
 	}
 }

-type workDiskCapability struct{}
+// workDiskCapability provisions a per-VM work disk (image-seeded or
+// freshly formatted) and syncs host-side authorised keys + git
+// identity + file_sync entries onto it. Holds pointers to the VM and
+// workspace services because PrepareHost orchestrates across both,
+// plus the store + default image name for its doctor check.
+type workDiskCapability struct {
+	vm               *VMService
+	ws               *WorkspaceService
+	store            *store.Store
+	defaultImageName string
+}
+
+func newWorkDiskCapability(vm *VMService, ws *WorkspaceService, st *store.Store, defaultImageName string) workDiskCapability {
+	return workDiskCapability{
+		vm:               vm,
+		ws:               ws,
+		store:            st,
+		defaultImageName: defaultImageName,
+	}
+}

 func (workDiskCapability) Name() string { return "work-disk" }

-func (workDiskCapability) AddStartPreflight(_ context.Context, _ *Daemon, checks *system.Preflight, vm model.VMRecord, image model.Image) {
+func (workDiskCapability) AddStartPreflight(_ context.Context, checks *system.Preflight, vm model.VMRecord, image model.Image) {
 	if exists(vm.Runtime.WorkDiskPath) {
 		return
 	}
@ -190,45 +239,73 @@ func (workDiskCapability) ContributeMachine(cfg *firecracker.MachineConfig, vm m
 	})
 }

-func (workDiskCapability) PrepareHost(ctx context.Context, d *Daemon, vm *model.VMRecord, image model.Image) error {
-	if err := d.ensureWorkDisk(ctx, vm, image); err != nil {
+func (c workDiskCapability) PrepareHost(ctx context.Context, vm *model.VMRecord, image model.Image) error {
+	prep, err := c.vm.ensureWorkDisk(ctx, vm, image)
+	if err != nil {
 		return err
 	}
-	return d.ensureAuthorizedKeyOnWorkDisk(ctx, vm)
+	if err := c.ws.ensureAuthorizedKeyOnWorkDisk(ctx, vm, image, prep); err != nil {
+		return err
+	}
+	if err := c.ws.ensureHushLoginOnWorkDisk(ctx, vm); err != nil {
+		return err
+	}
+	if err := c.ws.ensureGitIdentityOnWorkDisk(ctx, vm); err != nil {
+		return err
+	}
+	return c.ws.runFileSync(ctx, vm)
 }

-func (workDiskCapability) AddDoctorChecks(_ context.Context, d *Daemon, report *system.Report) {
-	if strings.TrimSpace(d.config.DefaultWorkSeed) != "" && exists(d.config.DefaultWorkSeed) {
-		checks := system.NewPreflight()
-		checks.RequireFile(d.config.DefaultWorkSeed, "default work seed image", `rebuild the default runtime rootfs to regenerate the /root seed`)
-		report.AddPreflight("feature /root work disk", checks, "seeded /root work disk artifact available")
-		return
+func (c workDiskCapability) AddDoctorChecks(_ context.Context, report *system.Report) {
+	if c.store != nil && strings.TrimSpace(c.defaultImageName) != "" {
+		if image, err := c.store.GetImageByName(context.Background(), c.defaultImageName); err == nil && strings.TrimSpace(image.WorkSeedPath) != "" && exists(image.WorkSeedPath) {
+			checks := system.NewPreflight()
+			checks.RequireFile(image.WorkSeedPath, "default image work-seed", `rebuild the default image to regenerate the /root seed`)
+			report.AddPreflight("feature /root work disk", checks, "seeded /root work disk artifact available")
+			return
+		}
 	}
 	checks := system.NewPreflight()
-	for _, command := range []string{"mkfs.ext4", "mount", "umount", "cp"} {
+	for _, command := range []string{"truncate", "mkfs.ext4"} {
 		checks.RequireCommand(command, toolHint(command))
 	}
 	report.AddPreflight("feature /root work disk", checks, "fallback /root work disk tooling available")
-	report.AddWarn("feature /root work disk", "default image has no work-seed artifact; new VM creates will be slower until the image is rebuilt")
+	report.AddWarn("feature /root work disk", "default image has no work-seed artifact; guest /root will be empty until the image is rebuilt")
 }

-type dnsCapability struct{}
+// dnsCapability publishes + removes <vm>.vm records on the in-process
+// DNS server. Only needs HostNetwork.
+type dnsCapability struct {
+	net *HostNetwork
+}
+
+func newDNSCapability(net *HostNetwork) dnsCapability {
+	return dnsCapability{net: net}
+}

 func (dnsCapability) Name() string { return "dns" }

-func (dnsCapability) PostStart(ctx context.Context, d *Daemon, vm model.VMRecord, _ model.Image) error {
-	return d.setDNS(ctx, vm.Name, vm.Runtime.GuestIP)
+func (c dnsCapability) PostStart(ctx context.Context, vm model.VMRecord, _ model.Image) error {
+	return c.net.setDNS(ctx, vm.Name, vm.Runtime.GuestIP)
 }

-func (dnsCapability) Cleanup(ctx context.Context, d *Daemon, vm model.VMRecord) error {
-	return d.removeDNS(ctx, vm.Runtime.DNSName)
+func (c dnsCapability) Cleanup(_ context.Context, vm model.VMRecord) error {
+	return c.net.removeDNS(vm.Runtime.DNSName)
 }

-func (dnsCapability) AddDoctorChecks(_ context.Context, _ *Daemon, report *system.Report) {
+func (dnsCapability) AddDoctorChecks(_ context.Context, report *system.Report) {
 	conn, err := net.ListenPacket("udp", vmdns.DefaultListenAddr)
 	if err != nil {
 		if strings.Contains(strings.ToLower(err.Error()), "address already in use") {
-			report.AddWarn("feature vm dns", "listener address "+vmdns.DefaultListenAddr+" is already in use")
+			// "Already in use" is the expected state when banger's own
+			// daemon is running. Probe the listener with a *.vm query
+			// the banger DNS server is the only thing on the host
+			// authoritative for, and pass if the response shape matches.
+			if probeBangerDNS(vmdns.DefaultListenAddr) {
+				report.AddPass("feature vm dns", "banger DNS server is already serving "+vmdns.DefaultListenAddr)
+				return
+			}
+			report.AddWarn("feature vm dns", "listener address "+vmdns.DefaultListenAddr+" is held by another process")
 			return
 		}
 		report.AddFail("feature vm dns", "cannot bind "+vmdns.DefaultListenAddr+": "+err.Error())
@ -238,56 +315,91 @@ func (dnsCapability) AddDoctorChecks(_ context.Context, _ *Daemon, report *syste
 	report.AddPass("feature vm dns", "listener can bind "+vmdns.DefaultListenAddr)
 }

-type natCapability struct{}
+// probeBangerDNS returns true iff a UDP DNS query to addr is answered
+// by something that behaves like banger's vmdns server: a *.vm name
+// produces an authoritative NXDOMAIN. Any other listener (a stub
+// resolver, a different DNS server) either refuses, recurses, or
+// returns non-authoritative — all distinguishable from this probe.
+func probeBangerDNS(addr string) bool {
+	client := &dns.Client{Net: "udp", Timeout: 500 * time.Millisecond}
+	req := new(dns.Msg)
+	req.SetQuestion("doctor-probe-not-a-real-vm.vm.", dns.TypeA)
+	resp, _, err := client.Exchange(req, addr)
+	if err != nil || resp == nil {
+		return false
+	}
+	return resp.Authoritative && resp.Rcode == dns.RcodeNameError
+}
+
+// natCapability sets up host-side NAT so guest traffic can reach the
+// outside world. Needs VMService (tap lookup + aliveness) and
+// HostNetwork (NAT rules), plus the daemon logger for the cleanup
+// short-circuit note.
+type natCapability struct {
+	vm     *VMService
+	net    *HostNetwork
+	logger *slog.Logger
+}
+
+func newNATCapability(vm *VMService, net *HostNetwork, logger *slog.Logger) natCapability {
+	return natCapability{vm: vm, net: net, logger: logger}
+}

 func (natCapability) Name() string { return "nat" }

-func (natCapability) AddStartPreflight(ctx context.Context, d *Daemon, checks *system.Preflight, vm model.VMRecord, _ model.Image) {
+func (c natCapability) AddStartPreflight(ctx context.Context, checks *system.Preflight, vm model.VMRecord, _ model.Image) {
 	if !vm.Spec.NATEnabled {
 		return
 	}
-	d.addNATPrereqs(ctx, checks)
+	c.net.addNATPrereqs(ctx, checks)
 }

-func (natCapability) PostStart(ctx context.Context, d *Daemon, vm model.VMRecord, _ model.Image) error {
+func (c natCapability) PostStart(ctx context.Context, vm model.VMRecord, _ model.Image) error {
 	if !vm.Spec.NATEnabled {
 		return nil
 	}
-	return d.ensureNAT(ctx, vm, true)
+	return c.net.ensureNAT(ctx, vm.Runtime.GuestIP, c.vm.vmHandles(vm.ID).TapDevice, true)
 }

-func (natCapability) Cleanup(ctx context.Context, d *Daemon, vm model.VMRecord) error {
+func (c natCapability) Cleanup(ctx context.Context, vm model.VMRecord) error {
 	if !vm.Spec.NATEnabled {
 		return nil
 	}
-	if strings.TrimSpace(vm.Runtime.GuestIP) == "" || strings.TrimSpace(vm.Runtime.TapDevice) == "" {
-		if d.logger != nil {
-			d.logger.Debug("skipping nat cleanup without runtime network handles", append(vmLogAttrs(vm), "guest_ip", vm.Runtime.GuestIP, "tap_device", vm.Runtime.TapDevice)...)
+	// Handle cache is volatile across daemon restarts; Runtime is
+	// the persisted DB-backed copy. Fall back so a crash / corrupt
+	// handles.json doesn't leak iptables rules keyed off the tap.
+	tap := strings.TrimSpace(c.vm.vmHandles(vm.ID).TapDevice)
+	if tap == "" {
+		tap = strings.TrimSpace(vm.Runtime.TapDevice)
+	}
+	if strings.TrimSpace(vm.Runtime.GuestIP) == "" || tap == "" {
+		if c.logger != nil {
+			c.logger.Debug("skipping nat cleanup without runtime network handles", append(vmLogAttrs(vm), "guest_ip", vm.Runtime.GuestIP, "tap_device", tap)...)
 		}
 		return nil
 	}
-	return d.ensureNAT(ctx, vm, false)
+	return c.net.ensureNAT(ctx, vm.Runtime.GuestIP, tap, false)
 }

-func (natCapability) ApplyConfigChange(ctx context.Context, d *Daemon, before, after model.VMRecord) error {
+func (c natCapability) ApplyConfigChange(ctx context.Context, before, after model.VMRecord) error {
 	if before.Spec.NATEnabled == after.Spec.NATEnabled {
 		return nil
 	}
-	if after.State != model.VMStateRunning || !system.ProcessRunning(after.Runtime.PID, after.Runtime.APISockPath) {
+	if !c.vm.vmAlive(after) {
 		return nil
 	}
-	return d.ensureNAT(ctx, after, after.Spec.NATEnabled)
+	return c.net.ensureNAT(ctx, after.Runtime.GuestIP, c.vm.vmHandles(after.ID).TapDevice, after.Spec.NATEnabled)
 }

-func (natCapability) AddDoctorChecks(ctx context.Context, d *Daemon, report *system.Report) {
+func (c natCapability) AddDoctorChecks(ctx context.Context, report *system.Report) {
 	checks := system.NewPreflight()
 	checks.RequireCommand("ip", toolHint("ip"))
-	d.addNATPrereqs(ctx, checks)
+	c.net.addNATPrereqs(ctx, checks)
 	if len(checks.Problems()) > 0 {
 		report.Add(system.CheckStatusFail, "feature nat", checks.Problems()...)
 		return
 	}
-	uplink, err := d.defaultUplink(ctx)
+	uplink, err := c.net.defaultUplink(ctx)
 	if err != nil {
 		report.AddFail("feature nat", err.Error())
 		return
--- a/internal/daemon/capabilities_test.go
+++ b/internal/daemon/capabilities_test.go
@ -3,6 +3,7 @@ package daemon
 import (
 	"context"
 	"errors"
+	"net"
 	"reflect"
 	"testing"

@ -10,31 +11,32 @@ import (
 	"banger/internal/guestconfig"
 	"banger/internal/model"
 	"banger/internal/system"
+	"banger/internal/vmdns"
 )

 type testCapability struct {
 	name           string
-	prepare        func(context.Context, *Daemon, *model.VMRecord, model.Image) error
-	cleanup        func(context.Context, *Daemon, model.VMRecord) error
+	prepare        func(context.Context, *model.VMRecord, model.Image) error
+	cleanup        func(context.Context, model.VMRecord) error
 	contribute     func(*guestconfig.Builder, model.VMRecord, model.Image)
 	contributeFC   func(*firecracker.MachineConfig, model.VMRecord, model.Image)
-	configChange   func(context.Context, *Daemon, model.VMRecord, model.VMRecord) error
-	doctor         func(context.Context, *Daemon, *system.Report)
-	startPreflight func(context.Context, *Daemon, *system.Preflight, model.VMRecord, model.Image)
+	configChange   func(context.Context, model.VMRecord, model.VMRecord) error
+	doctor         func(context.Context, *system.Report)
+	startPreflight func(context.Context, *system.Preflight, model.VMRecord, model.Image)
 }

 func (c testCapability) Name() string { return c.name }

-func (c testCapability) PrepareHost(ctx context.Context, d *Daemon, vm *model.VMRecord, image model.Image) error {
+func (c testCapability) PrepareHost(ctx context.Context, vm *model.VMRecord, image model.Image) error {
 	if c.prepare != nil {
-		return c.prepare(ctx, d, vm, image)
+		return c.prepare(ctx, vm, image)
 	}
 	return nil
 }

-func (c testCapability) Cleanup(ctx context.Context, d *Daemon, vm model.VMRecord) error {
+func (c testCapability) Cleanup(ctx context.Context, vm model.VMRecord) error {
 	if c.cleanup != nil {
-		return c.cleanup(ctx, d, vm)
+		return c.cleanup(ctx, vm)
 	}
 	return nil
 }
@ -51,22 +53,22 @@ func (c testCapability) ContributeMachine(cfg *firecracker.MachineConfig, vm mod
 	}
 }

-func (c testCapability) ApplyConfigChange(ctx context.Context, d *Daemon, before, after model.VMRecord) error {
+func (c testCapability) ApplyConfigChange(ctx context.Context, before, after model.VMRecord) error {
 	if c.configChange != nil {
-		return c.configChange(ctx, d, before, after)
+		return c.configChange(ctx, before, after)
 	}
 	return nil
 }

-func (c testCapability) AddDoctorChecks(ctx context.Context, d *Daemon, report *system.Report) {
+func (c testCapability) AddDoctorChecks(ctx context.Context, report *system.Report) {
 	if c.doctor != nil {
-		c.doctor(ctx, d, report)
+		c.doctor(ctx, report)
 	}
 }

-func (c testCapability) AddStartPreflight(ctx context.Context, d *Daemon, checks *system.Preflight, vm model.VMRecord, image model.Image) {
+func (c testCapability) AddStartPreflight(ctx context.Context, checks *system.Preflight, vm model.VMRecord, image model.Image) {
 	if c.startPreflight != nil {
-		c.startPreflight(ctx, d, checks, vm, image)
+		c.startPreflight(ctx, checks, vm, image)
 	}
 }

@ -78,32 +80,33 @@ func TestPrepareCapabilityHostsRollsBackPreparedCapabilitiesInReverseOrder(t *te
 		vmCaps: []vmCapability{
 			testCapability{
 				name: "first",
-				prepare: func(context.Context, *Daemon, *model.VMRecord, model.Image) error {
+				prepare: func(context.Context, *model.VMRecord, model.Image) error {
 					return nil
 				},
-				cleanup: func(context.Context, *Daemon, model.VMRecord) error {
+				cleanup: func(context.Context, model.VMRecord) error {
 					cleanupOrder = append(cleanupOrder, "first")
 					return nil
 				},
 			},
 			testCapability{
 				name: "second",
-				prepare: func(context.Context, *Daemon, *model.VMRecord, model.Image) error {
+				prepare: func(context.Context, *model.VMRecord, model.Image) error {
 					return nil
 				},
-				cleanup: func(context.Context, *Daemon, model.VMRecord) error {
+				cleanup: func(context.Context, model.VMRecord) error {
 					cleanupOrder = append(cleanupOrder, "second")
 					return nil
 				},
 			},
 			testCapability{
 				name: "broken",
-				prepare: func(context.Context, *Daemon, *model.VMRecord, model.Image) error {
+				prepare: func(context.Context, *model.VMRecord, model.Image) error {
 					return errors.New("boom")
 				},
 			},
 		},
 	}
+	wireServices(d)

 	err := d.prepareCapabilityHosts(context.Background(), &vm, model.Image{})
 	if err == nil || err.Error() != "boom" {
@ -128,6 +131,7 @@ func TestContributeHooksPopulateGuestAndMachineConfig(t *testing.T) {
 			},
 		},
 	}
+	wireServices(d)

 	builder := guestconfig.NewBuilder()
 	d.contributeGuestConfig(builder, model.VMRecord{}, model.Image{})
@ -143,3 +147,42 @@ func TestContributeHooksPopulateGuestAndMachineConfig(t *testing.T) {
 		t.Fatalf("guest fstab = %q, want %q", fstab, want)
 	}
 }
+
+func TestProbeBangerDNSAcceptsRealServer(t *testing.T) {
+	server, err := vmdns.New("127.0.0.1:0", nil)
+	if err != nil {
+		t.Fatalf("vmdns.New: %v", err)
+	}
+	t.Cleanup(func() { _ = server.Close() })
+
+	if !probeBangerDNS(server.Addr()) {
+		t.Fatal("probeBangerDNS rejected the real banger DNS server")
+	}
+}
+
+func TestProbeBangerDNSRejectsSilentListener(t *testing.T) {
+	// A UDP listener that drops every datagram. The probe should
+	// time out and return false — i.e. "this is not banger".
+	conn, err := net.ListenPacket("udp", "127.0.0.1:0")
+	if err != nil {
+		t.Fatalf("ListenPacket: %v", err)
+	}
+	t.Cleanup(func() { _ = conn.Close() })
+
+	if probeBangerDNS(conn.LocalAddr().String()) {
+		t.Fatal("probeBangerDNS accepted a silent non-DNS listener")
+	}
+}
+
+func TestDefaultCapabilitiesInOrder(t *testing.T) {
+	d := &Daemon{}
+	wireServices(d)
+	var names []string
+	for _, capability := range d.vmCaps {
+		names = append(names, capability.Name())
+	}
+	want := []string{"work-disk", "dns", "nat"}
+	if !reflect.DeepEqual(names, want) {
+		t.Fatalf("capabilities = %v, want %v", names, want)
+	}
+}
--- a/internal/daemon/concurrency_test.go
+++ b/internal/daemon/concurrency_test.go
@ -0,0 +1,210 @@
+package daemon
+
+import (
+	"context"
+	"os"
+	"path/filepath"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+
+	"banger/internal/api"
+	"banger/internal/imagepull"
+	"banger/internal/paths"
+	"banger/internal/system"
+)
+
+// TestPullImageDoesNotSerialiseOnDifferentNames confirms the refactor
+// actually releases imageOpsMu during the slow staging phase: two
+// PullImage calls for distinct names run concurrently, with the
+// "pull" half overlapping in time. Before the fix the two would have
+// run strictly sequentially (one blocking the other inside
+// imageOpsMu across the full OCI pull), which the maxActive >= 2
+// assertion would fail.
+func TestPullImageDoesNotSerialiseOnDifferentNames(t *testing.T) {
+	if _, err := os.Stat("/usr/bin/mkfs.ext4"); err != nil {
+		if _, err := os.Stat("/sbin/mkfs.ext4"); err != nil {
+			t.Skip("mkfs.ext4 not available; skipping")
+		}
+	}
+	imagesDir := t.TempDir()
+	cacheDir := t.TempDir()
+	kernel, initrd, modules := writeFakeKernelTriple(t)
+
+	var (
+		active       atomic.Int32
+		maxActive    atomic.Int32
+		enterPull    = make(chan struct{})
+		startRelease = make(chan struct{})
+	)
+
+	slowPullAndFlatten := func(_ context.Context, _ string, _ string, destDir string) (imagepull.Metadata, error) {
+		// Record that we entered the pull body.
+		enterPull <- struct{}{}
+		// Track concurrent overlap.
+		n := active.Add(1)
+		for {
+			cur := maxActive.Load()
+			if n <= cur || maxActive.CompareAndSwap(cur, n) {
+				break
+			}
+		}
+		// Wait for the test to unblock us AFTER both pulls have
+		// entered the body.
+		<-startRelease
+		active.Add(-1)
+		// Produce the minimal synthetic tree stubPullAndFlatten does.
+		if err := os.MkdirAll(filepath.Join(destDir, "etc"), 0o755); err != nil {
+			return imagepull.Metadata{}, err
+		}
+		if err := os.WriteFile(filepath.Join(destDir, "etc", "hello"), []byte("world"), 0o644); err != nil {
+			return imagepull.Metadata{}, err
+		}
+		return imagepull.Metadata{Entries: map[string]imagepull.FileMeta{}}, nil
+	}
+
+	d := &Daemon{
+		layout: paths.Layout{ImagesDir: imagesDir, OCICacheDir: cacheDir},
+		store:  openDaemonStore(t),
+		runner: system.NewRunner(),
+	}
+	d.img = &ImageService{
+		layout:               d.layout,
+		store:                d.store,
+		runner:               d.runner,
+		pullAndFlatten:       slowPullAndFlatten,
+		finalizePulledRootfs: stubFinalizePulledRootfs,
+		workSeedBuilder:      stubWorkSeedBuilder,
+	}
+	wireServices(d)
+
+	mkParams := func(name string) api.ImagePullParams {
+		return api.ImagePullParams{
+			Ref:        "example.invalid/" + name + ":latest",
+			Name:       name,
+			KernelPath: kernel,
+			InitrdPath: initrd,
+			ModulesDir: modules,
+		}
+	}
+
+	var wg sync.WaitGroup
+	errs := make([]error, 2)
+	for i, name := range []string{"alpha", "beta"} {
+		wg.Add(1)
+		go func(i int, name string) {
+			defer wg.Done()
+			_, err := d.img.PullImage(context.Background(), mkParams(name))
+			errs[i] = err
+		}(i, name)
+	}
+
+	// Wait for BOTH pulls to enter the slow body before we release
+	// them. If imageOpsMu still wrapped the full flow, the second
+	// pull would block on the mutex and never reach the enterPull
+	// send — the timeout below would fire.
+	for i := 0; i < 2; i++ {
+		select {
+		case <-enterPull:
+		case <-time.After(3 * time.Second):
+			t.Fatalf("pull %d never entered the slow body — imageOpsMu still serialises distinct pulls", i+1)
+		}
+	}
+	close(startRelease)
+	wg.Wait()
+
+	for i, err := range errs {
+		if err != nil {
+			t.Fatalf("pull %d failed: %v", i+1, err)
+		}
+	}
+	if maxActive.Load() < 2 {
+		t.Fatalf("maxActive = %d, want >= 2 (pulls did not overlap)", maxActive.Load())
+	}
+}
+
+// TestPullImageRejectsNameClashAtPublish confirms the publish-window
+// recheck is what actually enforces name uniqueness now that the slow
+// body runs unlocked. Two pulls race to the same name; one wins and
+// the other errors.
+func TestPullImageRejectsNameClashAtPublish(t *testing.T) {
+	if _, err := os.Stat("/usr/bin/mkfs.ext4"); err != nil {
+		if _, err := os.Stat("/sbin/mkfs.ext4"); err != nil {
+			t.Skip("mkfs.ext4 not available; skipping")
+		}
+	}
+	imagesDir := t.TempDir()
+	cacheDir := t.TempDir()
+	kernel, initrd, modules := writeFakeKernelTriple(t)
+
+	release := make(chan struct{})
+	synchronised := make(chan struct{}, 2)
+	pullAndFlatten := func(_ context.Context, _ string, _ string, destDir string) (imagepull.Metadata, error) {
+		synchronised <- struct{}{}
+		<-release
+		if err := os.MkdirAll(filepath.Join(destDir, "etc"), 0o755); err != nil {
+			return imagepull.Metadata{}, err
+		}
+		if err := os.WriteFile(filepath.Join(destDir, "marker"), []byte("ok"), 0o644); err != nil {
+			return imagepull.Metadata{}, err
+		}
+		return imagepull.Metadata{Entries: map[string]imagepull.FileMeta{}}, nil
+	}
+
+	d := &Daemon{
+		layout: paths.Layout{ImagesDir: imagesDir, OCICacheDir: cacheDir},
+		store:  openDaemonStore(t),
+		runner: system.NewRunner(),
+	}
+	d.img = &ImageService{
+		layout:               d.layout,
+		store:                d.store,
+		runner:               d.runner,
+		pullAndFlatten:       pullAndFlatten,
+		finalizePulledRootfs: stubFinalizePulledRootfs,
+		workSeedBuilder:      stubWorkSeedBuilder,
+	}
+	wireServices(d)
+
+	params := api.ImagePullParams{
+		Ref:        "example.invalid/contender:latest",
+		Name:       "contender",
+		KernelPath: kernel,
+		InitrdPath: initrd,
+		ModulesDir: modules,
+	}
+
+	var wg sync.WaitGroup
+	errs := make([]error, 2)
+	for i := 0; i < 2; i++ {
+		wg.Add(1)
+		go func(i int) {
+			defer wg.Done()
+			_, err := d.img.PullImage(context.Background(), params)
+			errs[i] = err
+		}(i)
+	}
+	// Both workers must enter the pull body before either publishes.
+	for i := 0; i < 2; i++ {
+		select {
+		case <-synchronised:
+		case <-time.After(3 * time.Second):
+			t.Fatalf("pull %d never entered the slow body", i+1)
+		}
+	}
+	close(release)
+	wg.Wait()
+
+	wins, losses := 0, 0
+	for _, err := range errs {
+		if err == nil {
+			wins++
+		} else {
+			losses++
+		}
+	}
+	if wins != 1 || losses != 1 {
+		t.Fatalf("wins=%d losses=%d, want exactly one of each (errs=%v)", wins, losses, errs)
+	}
+}
--- a/internal/daemon/daemon.go
+++ b/internal/daemon/daemon.go
@ -3,7 +3,6 @@ package daemon
 import (
 	"bufio"
 	"context"
-	"database/sql"
 	"encoding/json"
 	"errors"
 	"fmt"
@ -15,36 +14,50 @@ import (
 	"sync"
 	"time"

-	"banger/internal/api"
+	"golang.org/x/sys/unix"
+
 	"banger/internal/config"
+	ws "banger/internal/daemon/workspace"
+	"banger/internal/installmeta"
 	"banger/internal/model"
 	"banger/internal/paths"
+	"banger/internal/roothelper"
 	"banger/internal/rpc"
 	"banger/internal/store"
 	"banger/internal/system"
 	"banger/internal/vmdns"
 )

+// Daemon is the composition root: shared infrastructure (store,
+// runner, logger, layout, config) plus pointers to the four focused
+// services that own behavior. Open wires the services; the dispatch
+// loop forwards RPCs to them. No lifecycle / image / workspace /
+// networking behavior lives on *Daemon itself — it's wiring.
 type Daemon struct {
-	layout         paths.Layout
-	config         model.DaemonConfig
-	store          *store.Store
-	runner         system.CommandRunner
-	logger         *slog.Logger
-	mu             sync.Mutex
-	vmLocksMu      sync.Mutex
-	vmLocks        map[string]*sync.Mutex
-	tapPoolMu      sync.Mutex
-	tapPool        []string
-	tapPoolNext    int
-	closing        chan struct{}
-	once           sync.Once
-	pid            int
-	listener       net.Listener
-	vmDNS          *vmdns.Server
-	vmCaps         []vmCapability
-	imageBuild     func(context.Context, imageBuildSpec) error
-	requestHandler func(context.Context, rpc.Request) rpc.Response
+	layout     paths.Layout
+	userLayout paths.Layout
+	config     model.DaemonConfig
+	store      *store.Store
+	runner     system.CommandRunner
+	logger     *slog.Logger
+	priv       privilegedOps
+
+	net   *HostNetwork
+	img   *ImageService
+	ws    *WorkspaceService
+	vm    *VMService
+	stats *StatsService
+
+	closing         chan struct{}
+	once            sync.Once
+	pid             int
+	listener        net.Listener
+	vmCaps          []vmCapability
+	requestHandler  func(context.Context, rpc.Request) rpc.Response
+	guestWaitForSSH func(context.Context, string, string, time.Duration) error
+	guestDial       func(context.Context, string, string) (guestSSHClient, error)
+	clientUID       int
+	clientGID       int
 }

 func Open(ctx context.Context) (d *Daemon, err error) {
@ -59,6 +72,39 @@ func Open(ctx context.Context) (d *Daemon, err error) {
 	if err != nil {
 		return nil, err
 	}
+	return openWithConfig(ctx, layout, layout, cfg, os.Getuid(), os.Getgid(), true, nil)
+}
+
+func OpenSystem(ctx context.Context) (*Daemon, error) {
+	meta, err := installmeta.Load(installmeta.DefaultPath)
+	if err != nil {
+		return nil, err
+	}
+	layout := paths.ResolveSystem()
+	if err := paths.EnsureSystemOwned(layout); err != nil {
+		return nil, err
+	}
+	ownerLayout, err := paths.ResolveUserForHome(meta.OwnerHome)
+	if err != nil {
+		return nil, err
+	}
+	cfg, err := config.LoadDaemon(ownerLayout, meta.OwnerHome)
+	if err != nil {
+		return nil, err
+	}
+	// config.Load fills JailerChrootBase from the layout it sees. In
+	// system mode that's the owner's layout (no privileged StateDir) so
+	// the value lands under the owner home — wrong for the helper, which
+	// validates paths against the system StateDir. Override unconditionally
+	// here so both daemon and helper see /var/lib/banger/jail.
+	if strings.TrimSpace(cfg.JailerChrootBase) == "" || !filepath.IsAbs(cfg.JailerChrootBase) || strings.HasPrefix(cfg.JailerChrootBase, ownerLayout.StateDir) {
+		cfg.JailerChrootBase = filepath.Join(layout.StateDir, "jail")
+	}
+	helper := newHelperPrivilegedOps(roothelper.NewClient(installmeta.DefaultRootHelperSocketPath), cfg, layout)
+	return openWithConfig(ctx, layout, ownerLayout, cfg, -1, -1, false, helper)
+}
+
+func openWithConfig(ctx context.Context, layout, userLayout paths.Layout, cfg model.DaemonConfig, clientUID, clientGID int, syncSSHConfig bool, priv privilegedOps) (d *Daemon, err error) {
 	logger, normalizedLevel, err := newDaemonLogger(os.Stderr, cfg.LogLevel)
 	if err != nil {
 		return nil, err
@ -68,38 +114,64 @@ func Open(ctx context.Context) (d *Daemon, err error) {
 	if err != nil {
 		return nil, err
 	}
+	closing := make(chan struct{})
+	runner := system.NewRunner()
 	d = &Daemon{
-		layout:  layout,
-		config:  cfg,
-		store:   db,
-		runner:  system.NewRunner(),
-		logger:  logger,
-		closing: make(chan struct{}),
-		pid:     os.Getpid(),
-	}
-	d.logger.Info("daemon opened", "socket", layout.SocketPath, "state_dir", layout.StateDir, "runtime_dir", cfg.RuntimeDir, "log_level", cfg.LogLevel)
-	if err = d.startVMDNS(vmdns.DefaultListenAddr); err != nil {
-		d.logger.Error("daemon open failed", "stage", "start_vm_dns", "error", err.Error())
-		return nil, err
+		layout:     layout,
+		userLayout: userLayout,
+		config:     cfg,
+		store:      db,
+		runner:     runner,
+		logger:     logger,
+		closing:    closing,
+		pid:        os.Getpid(),
+		clientUID:  clientUID,
+		clientGID:  clientGID,
+		priv:       priv,
 	}
+	wireServices(d)
+	// From here on, every failure path must run Close() so the host
+	// state we touched (DNS listener goroutine, resolvectl routing,
+	// SQLite handle, future side effects) gets unwound. Close is
+	// idempotent + nil-guarded so it's safe to call on a partially
+	// initialised daemon — `d.vmDNS == nil` and friends short-circuit
+	// the teardown of components we never set up.
 	defer func() {
 		if err != nil {
-			_ = d.stopVMDNS()
+			_ = d.Close()
 		}
 	}()
-	if err = d.ensureDefaultImage(ctx); err != nil {
-		d.logger.Error("daemon open failed", "stage", "ensure_default_image", "error", err.Error())
+
+	if syncSSHConfig {
+		d.ensureVMSSHClientConfig()
+	}
+	d.logger.Info("daemon opened", "socket", layout.SocketPath, "state_dir", layout.StateDir, "log_level", cfg.LogLevel)
+	if err = d.net.startVMDNS(vmdns.DefaultListenAddr); err != nil {
+		d.logger.Error("daemon open failed", "stage", "start_vm_dns", "error", err.Error())
 		return nil, err
 	}
 	if err = d.reconcile(ctx); err != nil {
 		d.logger.Error("daemon open failed", "stage", "reconcile", "error", err.Error())
 		return nil, err
 	}
-	if err = d.initializeTapPool(ctx); err != nil {
-		d.logger.Error("daemon open failed", "stage", "initialize_tap_pool", "error", err.Error())
-		return nil, err
+	d.net.ensureVMDNSResolverRouting(ctx)
+	// Seed HostNetwork's pool index from taps already claimed by VMs
+	// on disk so newly warmed pool entries don't collide with them.
+	if d.config.TapPoolSize > 0 && d.store != nil {
+		vms, listErr := d.store.ListVMs(ctx)
+		if listErr != nil {
+			d.logger.Error("daemon open failed", "stage", "initialize_tap_pool", "error", listErr.Error())
+			return nil, listErr
+		}
+		used := make([]string, 0, len(vms))
+		for _, vm := range vms {
+			if tap := d.vm.vmHandles(vm.ID).TapDevice; tap != "" {
+				used = append(used, tap)
+			}
+		}
+		d.net.initializeTapPool(used)
 	}
-	go d.ensureTapPool(context.Background())
+	go d.net.ensureTapPool(context.Background())
 	return d, nil
 }

@ -113,7 +185,11 @@ func (d *Daemon) Close() error {
 		if d.listener != nil {
 			_ = d.listener.Close()
 		}
-		err = errors.Join(d.stopVMDNS(), d.store.Close())
+		var closeErr error
+		if d.store != nil {
+			closeErr = d.store.Close()
+		}
+		err = errors.Join(d.net.clearVMDNSResolverRouting(context.Background()), d.net.stopVMDNS(), closeErr)
 	})
 	return err
 }
@ -130,13 +206,31 @@ func (d *Daemon) Serve(ctx context.Context) error {
 	d.listener = listener
 	defer listener.Close()
 	defer os.Remove(d.layout.SocketPath)
+	serveDone := make(chan struct{})
+	defer close(serveDone)
+	go func() {
+		select {
+		case <-ctx.Done():
+			_ = listener.Close()
+		case <-d.closing:
+		case <-serveDone:
+		}
+	}()
+	// Tighten the socket mode while root still owns it, then hand it to
+	// the configured client uid/gid. In the hardened systemd unit we keep
+	// CAP_CHOWN but intentionally do not keep the broader file-ownership
+	// capability set that would be needed to chmod after chown.
 	if err := os.Chmod(d.layout.SocketPath, 0o600); err != nil {
 		return err
 	}
+	if d.clientUID >= 0 && d.clientGID >= 0 {
+		if err := os.Chown(d.layout.SocketPath, d.clientUID, d.clientGID); err != nil {
+			return err
+		}
+	}
 	if d.logger != nil {
 		d.logger.Info("daemon serving", "socket", d.layout.SocketPath, "pid", d.pid)
 	}
-
 	go d.backgroundLoop()

 	for {
@ -149,7 +243,7 @@ func (d *Daemon) Serve(ctx context.Context) error {
 				return nil
 			default:
 			}
-			if ne, ok := err.(net.Error); ok && ne.Temporary() {
+			if _, ok := err.(net.Error); ok {
 				if d.logger != nil {
 					d.logger.Warn("daemon accept temporary failure", "error", err.Error())
 				}
@ -167,6 +261,13 @@ func (d *Daemon) Serve(ctx context.Context) error {

 func (d *Daemon) handleConn(conn net.Conn) {
 	defer conn.Close()
+	if err := d.authorizeConn(conn); err != nil {
+		if d.logger != nil {
+			d.logger.Warn("daemon connection rejected", "remote", conn.RemoteAddr().String(), "error", err.Error())
+		}
+		_ = json.NewEncoder(conn).Encode(rpc.NewError("unauthorized", err.Error()))
+		return
+	}
 	reader := bufio.NewReader(conn)
 	var req rpc.Request
 	if err := json.NewDecoder(reader).Decode(&req); err != nil {
@ -189,6 +290,44 @@ func (d *Daemon) handleConn(conn net.Conn) {
 	}
 }

+// authorizeConn enforces SO_PEERCRED on the daemon socket as a
+// belt-and-braces check on top of filesystem perms (0600 + chowned to
+// the owner). Filesystem perms already prevent other host users from
+// connecting; the peer-cred read closes the door on any path that
+// might leak the socket FD to a non-owner process. Mirrors the
+// equivalent check in roothelper.authorizeConn.
+func (d *Daemon) authorizeConn(conn net.Conn) error {
+	unixConn, ok := conn.(*net.UnixConn)
+	if !ok {
+		return errors.New("daemon requires unix connections")
+	}
+	rawConn, err := unixConn.SyscallConn()
+	if err != nil {
+		return err
+	}
+	var cred *unix.Ucred
+	var controlErr error
+	if err := rawConn.Control(func(fd uintptr) {
+		cred, controlErr = unix.GetsockoptUcred(int(fd), unix.SOL_SOCKET, unix.SO_PEERCRED)
+	}); err != nil {
+		return err
+	}
+	if controlErr != nil {
+		return controlErr
+	}
+	if cred == nil {
+		return errors.New("missing peer credentials")
+	}
+	expected := d.clientUID
+	if expected < 0 {
+		expected = os.Getuid()
+	}
+	if int(cred.Uid) == 0 || int(cred.Uid) == expected {
+		return nil
+	}
+	return fmt.Errorf("uid %d is not allowed to use the daemon", cred.Uid)
+}
+
 func (d *Daemon) watchRequestDisconnect(conn net.Conn, reader *bufio.Reader, method string, cancel context.CancelFunc) func() {
 	if conn == nil || reader == nil {
 		return func() {}
@ -214,7 +353,7 @@ func (d *Daemon) watchRequestDisconnect(conn net.Conn, reader *bufio.Reader, met
 			default:
 			}
 			if d.logger != nil {
-				d.logger.Info("daemon request canceled", "method", method, "remote", conn.RemoteAddr().String(), "error", err.Error())
+				d.logger.Debug("daemon request canceled", "method", method, "remote", conn.RemoteAddr().String(), "error", err.Error())
 			}
 			cancel()
 			return
@ -228,164 +367,34 @@ func (d *Daemon) watchRequestDisconnect(conn net.Conn, reader *bufio.Reader, met
 }

 func (d *Daemon) dispatch(ctx context.Context, req rpc.Request) rpc.Response {
+	// Per-RPC correlation id is generated unconditionally — even
+	// errors that short-circuit before reaching a handler get one
+	// so the operator has a handle for every CLI failure.
+	// Generation can fail in theory (crypto/rand IO error) —
+	// degrade gracefully to a blank id rather than tearing down
+	// the request.
+	opID, _ := model.NewOpID()
+	if opID != "" {
+		ctx = WithOpID(ctx, opID)
+	}
+	stampOpID := func(resp rpc.Response) rpc.Response {
+		if !resp.OK && resp.Error != nil && resp.Error.OpID == "" && opID != "" {
+			resp.Error.OpID = opID
+		}
+		return resp
+	}
+
 	if req.Version != rpc.Version {
-		return rpc.NewError("bad_version", fmt.Sprintf("unsupported version %d", req.Version))
+		return stampOpID(rpc.NewError("bad_version", fmt.Sprintf("unsupported version %d", req.Version)))
 	}
 	if d.requestHandler != nil {
-		return d.requestHandler(ctx, req)
+		return stampOpID(d.requestHandler(ctx, req))
 	}
-	switch req.Method {
-	case "ping":
-		result, _ := rpc.NewResult(api.PingResult{Status: "ok", PID: d.pid})
-		return result
-	case "shutdown":
-		go d.Close()
-		result, _ := rpc.NewResult(api.ShutdownResult{Status: "stopping"})
-		return result
-	case "vm.create":
-		params, err := rpc.DecodeParams[api.VMCreateParams](req)
-		if err != nil {
-			return rpc.NewError("bad_request", err.Error())
-		}
-		vm, err := d.CreateVM(ctx, params)
-		return marshalResultOrError(api.VMShowResult{VM: vm}, err)
-	case "vm.list":
-		vms, err := d.store.ListVMs(ctx)
-		return marshalResultOrError(api.VMListResult{VMs: vms}, err)
-	case "vm.show":
-		params, err := rpc.DecodeParams[api.VMRefParams](req)
-		if err != nil {
-			return rpc.NewError("bad_request", err.Error())
-		}
-		vm, err := d.FindVM(ctx, params.IDOrName)
-		return marshalResultOrError(api.VMShowResult{VM: vm}, err)
-	case "vm.start":
-		params, err := rpc.DecodeParams[api.VMRefParams](req)
-		if err != nil {
-			return rpc.NewError("bad_request", err.Error())
-		}
-		vm, err := d.StartVM(ctx, params.IDOrName)
-		return marshalResultOrError(api.VMShowResult{VM: vm}, err)
-	case "vm.stop":
-		params, err := rpc.DecodeParams[api.VMRefParams](req)
-		if err != nil {
-			return rpc.NewError("bad_request", err.Error())
-		}
-		vm, err := d.StopVM(ctx, params.IDOrName)
-		return marshalResultOrError(api.VMShowResult{VM: vm}, err)
-	case "vm.kill":
-		params, err := rpc.DecodeParams[api.VMKillParams](req)
-		if err != nil {
-			return rpc.NewError("bad_request", err.Error())
-		}
-		vm, err := d.KillVM(ctx, params)
-		return marshalResultOrError(api.VMShowResult{VM: vm}, err)
-	case "vm.restart":
-		params, err := rpc.DecodeParams[api.VMRefParams](req)
-		if err != nil {
-			return rpc.NewError("bad_request", err.Error())
-		}
-		vm, err := d.RestartVM(ctx, params.IDOrName)
-		return marshalResultOrError(api.VMShowResult{VM: vm}, err)
-	case "vm.delete":
-		params, err := rpc.DecodeParams[api.VMRefParams](req)
-		if err != nil {
-			return rpc.NewError("bad_request", err.Error())
-		}
-		vm, err := d.DeleteVM(ctx, params.IDOrName)
-		return marshalResultOrError(api.VMShowResult{VM: vm}, err)
-	case "vm.set":
-		params, err := rpc.DecodeParams[api.VMSetParams](req)
-		if err != nil {
-			return rpc.NewError("bad_request", err.Error())
-		}
-		vm, err := d.SetVM(ctx, params)
-		return marshalResultOrError(api.VMShowResult{VM: vm}, err)
-	case "vm.stats":
-		params, err := rpc.DecodeParams[api.VMRefParams](req)
-		if err != nil {
-			return rpc.NewError("bad_request", err.Error())
-		}
-		vm, stats, err := d.GetVMStats(ctx, params.IDOrName)
-		return marshalResultOrError(api.VMStatsResult{VM: vm, Stats: stats}, err)
-	case "vm.logs":
-		params, err := rpc.DecodeParams[api.VMRefParams](req)
-		if err != nil {
-			return rpc.NewError("bad_request", err.Error())
-		}
-		vm, err := d.FindVM(ctx, params.IDOrName)
-		if err != nil {
-			return rpc.NewError("not_found", err.Error())
-		}
-		return marshalResultOrError(api.VMLogsResult{LogPath: vm.Runtime.LogPath}, nil)
-	case "vm.ssh":
-		params, err := rpc.DecodeParams[api.VMRefParams](req)
-		if err != nil {
-			return rpc.NewError("bad_request", err.Error())
-		}
-		vm, err := d.TouchVM(ctx, params.IDOrName)
-		if err != nil {
-			return rpc.NewError("not_found", err.Error())
-		}
-		if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
-			return rpc.NewError("not_running", fmt.Sprintf("vm %s is not running", vm.Name))
-		}
-		return marshalResultOrError(api.VMSSHResult{Name: vm.Name, GuestIP: vm.Runtime.GuestIP}, nil)
-	case "vm.health":
-		params, err := rpc.DecodeParams[api.VMRefParams](req)
-		if err != nil {
-			return rpc.NewError("bad_request", err.Error())
-		}
-		result, err := d.HealthVM(ctx, params.IDOrName)
-		return marshalResultOrError(result, err)
-	case "vm.ping":
-		params, err := rpc.DecodeParams[api.VMRefParams](req)
-		if err != nil {
-			return rpc.NewError("bad_request", err.Error())
-		}
-		result, err := d.PingVM(ctx, params.IDOrName)
-		return marshalResultOrError(result, err)
-	case "vm.ports":
-		params, err := rpc.DecodeParams[api.VMRefParams](req)
-		if err != nil {
-			return rpc.NewError("bad_request", err.Error())
-		}
-		result, err := d.PortsVM(ctx, params.IDOrName)
-		return marshalResultOrError(result, err)
-	case "image.list":
-		images, err := d.store.ListImages(ctx)
-		return marshalResultOrError(api.ImageListResult{Images: images}, err)
-	case "image.show":
-		params, err := rpc.DecodeParams[api.ImageRefParams](req)
-		if err != nil {
-			return rpc.NewError("bad_request", err.Error())
-		}
-		image, err := d.FindImage(ctx, params.IDOrName)
-		return marshalResultOrError(api.ImageShowResult{Image: image}, err)
-	case "image.build":
-		params, err := rpc.DecodeParams[api.ImageBuildParams](req)
-		if err != nil {
-			return rpc.NewError("bad_request", err.Error())
-		}
-		image, err := d.BuildImage(ctx, params)
-		return marshalResultOrError(api.ImageShowResult{Image: image}, err)
-	case "image.register":
-		params, err := rpc.DecodeParams[api.ImageRegisterParams](req)
-		if err != nil {
-			return rpc.NewError("bad_request", err.Error())
-		}
-		image, err := d.RegisterImage(ctx, params)
-		return marshalResultOrError(api.ImageShowResult{Image: image}, err)
-	case "image.delete":
-		params, err := rpc.DecodeParams[api.ImageRefParams](req)
-		if err != nil {
-			return rpc.NewError("bad_request", err.Error())
-		}
-		image, err := d.DeleteImage(ctx, params.IDOrName)
-		return marshalResultOrError(api.ImageShowResult{Image: image}, err)
-	default:
-		return rpc.NewError("unknown_method", req.Method)
+	h, ok := rpcHandlers[req.Method]
+	if !ok {
+		return stampOpID(rpc.NewError("unknown_method", req.Method))
 	}
+	return stampOpID(h(ctx, d, req))
 }

 func (d *Daemon) backgroundLoop() {
@ -398,280 +407,194 @@ func (d *Daemon) backgroundLoop() {
 		case <-d.closing:
 			return
 		case <-statsTicker.C:
-			if err := d.pollStats(context.Background()); err != nil && d.logger != nil {
+			if err := d.stats.pollStats(context.Background()); err != nil && d.logger != nil {
 				d.logger.Error("background stats poll failed", "error", err.Error())
 			}
 		case <-staleTicker.C:
-			if err := d.stopStaleVMs(context.Background()); err != nil && d.logger != nil {
+			if err := d.stats.stopStaleVMs(context.Background()); err != nil && d.logger != nil {
 				d.logger.Error("background stale sweep failed", "error", err.Error())
 			}
+			d.vm.pruneVMCreateOperations(time.Now().Add(-10 * time.Minute))
 		}
 	}
 }

-func (d *Daemon) startVMDNS(addr string) error {
-	server, err := vmdns.New(addr, d.logger)
-	if err != nil {
-		return err
-	}
-	d.vmDNS = server
-	if d.logger != nil {
-		d.logger.Info("vm dns serving", "dns_addr", server.Addr())
-	}
-	return nil
-}
-
-func (d *Daemon) stopVMDNS() error {
-	if d.vmDNS == nil {
-		return nil
-	}
-	err := d.vmDNS.Close()
-	d.vmDNS = nil
-	return err
-}
-
-func (d *Daemon) ensureDefaultImage(ctx context.Context) error {
-	if d.config.DefaultImageName == "" {
-		return nil
-	}
-	desired, ok := d.desiredDefaultImage()
-	if !ok {
-		if d.logger != nil {
-			d.logger.Debug("default image skipped", "image_name", d.config.DefaultImageName, "rootfs_path", d.config.DefaultRootfs, "kernel_path", d.config.DefaultKernel)
-		}
-		return nil
-	}
-
-	image, err := d.store.GetImageByName(ctx, d.config.DefaultImageName)
-	switch {
-	case err == nil:
-		if image.Managed {
-			if d.logger != nil {
-				d.logger.Debug("managed default image left untouched", append(imageLogAttrs(image), "managed", image.Managed)...)
-			}
-			return nil
-		}
-		if defaultImageMatches(image, desired) {
-			if d.logger != nil {
-				d.logger.Debug("default image already current", imageLogAttrs(image)...)
-			}
-			return nil
-		}
-		updated := desired
-		updated.ID = image.ID
-		updated.CreatedAt = image.CreatedAt
-		updated.UpdatedAt = model.Now()
-		if err := d.store.UpsertImage(ctx, updated); err != nil {
-			return err
-		}
-		if d.logger != nil {
-			d.logger.Info("default image reconciled", append(imageLogAttrs(updated), "previous_rootfs_path", image.RootfsPath, "previous_work_seed_path", image.WorkSeedPath, "previous_kernel_path", image.KernelPath)...)
-		}
-		return nil
-	case errors.Is(err, sql.ErrNoRows):
-		id, err := model.NewID()
-		if err != nil {
-			return err
-		}
-		now := model.Now()
-		desired.ID = id
-		desired.CreatedAt = now
-		desired.UpdatedAt = now
-		if err := d.store.UpsertImage(ctx, desired); err != nil {
-			return err
-		}
-		if d.logger != nil {
-			d.logger.Info("default image registered", append(imageLogAttrs(desired), "managed", desired.Managed)...)
-		}
-		return nil
-	default:
-		return err
-	}
-}
-
-func (d *Daemon) desiredDefaultImage() (model.Image, bool) {
-	rootfs := d.config.DefaultRootfs
-	kernel := d.config.DefaultKernel
-	if !exists(rootfs) || !exists(kernel) {
-		return model.Image{}, false
-	}
-	return model.Image{
-		Name:         d.config.DefaultImageName,
-		Managed:      false,
-		ArtifactDir:  "",
-		RootfsPath:   rootfs,
-		WorkSeedPath: d.config.DefaultWorkSeed,
-		KernelPath:   kernel,
-		InitrdPath:   d.config.DefaultInitrd,
-		ModulesDir:   d.config.DefaultModulesDir,
-		PackagesPath: d.config.DefaultPackagesFile,
-		Docker:       strings.Contains(filepath.Base(rootfs), "docker"),
-	}, true
-}
-
-func defaultImageMatches(current, desired model.Image) bool {
-	return current.Name == desired.Name &&
-		current.Managed == desired.Managed &&
-		current.ArtifactDir == desired.ArtifactDir &&
-		current.RootfsPath == desired.RootfsPath &&
-		current.WorkSeedPath == desired.WorkSeedPath &&
-		current.KernelPath == desired.KernelPath &&
-		current.InitrdPath == desired.InitrdPath &&
-		current.ModulesDir == desired.ModulesDir &&
-		current.PackagesPath == desired.PackagesPath &&
-		current.Docker == desired.Docker
-}
-
 func (d *Daemon) reconcile(ctx context.Context) error {
-	op := d.beginOperation("daemon.reconcile")
+	op := d.beginOperation(ctx, "daemon.reconcile")
 	vms, err := d.store.ListVMs(ctx)
 	if err != nil {
 		return op.fail(err)
 	}
 	for _, vm := range vms {
-		if err := d.withVMLockByIDErr(ctx, vm.ID, func(vm model.VMRecord) error {
+		if err := d.vm.withVMLockByIDErr(ctx, vm.ID, func(vm model.VMRecord) error {
 			if vm.State != model.VMStateRunning {
+				// Belt-and-braces: a stopped VM should never have a
+				// scratch file or a cache entry. Clean up anything
+				// left by an ungraceful previous daemon crash.
+				d.vm.clearVMHandles(vm)
 				return nil
 			}
-			if system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
+			// Rebuild the in-memory handle cache by loading the per-VM
+			// scratch file and verifying the firecracker process is
+			// still alive.
+			h, alive, err := d.vm.rediscoverHandles(ctx, vm)
+			if err != nil && d.logger != nil {
+				d.logger.Warn("rediscover handles failed", "vm_id", vm.ID, "error", err.Error())
+			}
+			// Either way, seed the cache with what the scratch file
+			// claimed. If alive, subsequent vmAlive() calls pass; if
+			// not, cleanupRuntime needs these handles to know which
+			// kernel resources (DM / loops / tap) to tear down.
+			d.vm.setVMHandlesInMemory(vm.ID, h)
+			if alive {
 				return nil
 			}
 			op.stage("stale_vm", vmLogAttrs(vm)...)
-			_ = d.cleanupRuntime(ctx, vm, true)
+			_ = d.vm.cleanupRuntime(ctx, vm, true)
 			vm.State = model.VMStateStopped
 			vm.Runtime.State = model.VMStateStopped
-			clearRuntimeHandles(&vm)
+			clearRuntimeTeardownState(&vm)
+			d.vm.clearVMHandles(vm)
 			vm.UpdatedAt = model.Now()
 			return d.store.UpsertVM(ctx, vm)
 		}); err != nil {
 			return op.fail(err, "vm_id", vm.ID)
 		}
 	}
-	if err := d.rebuildDNS(ctx); err != nil {
+	if err := d.vm.rebuildDNS(ctx); err != nil {
 		return op.fail(err)
 	}
 	op.done()
 	return nil
 }

+// FindVM stays on Daemon as a thin forwarder to the VM service lookup.
+// Dispatch code reads the facade directly; tests that pre-date the
+// service split keep compiling.
 func (d *Daemon) FindVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
-	if idOrName == "" {
-		return model.VMRecord{}, errors.New("vm id or name is required")
-	}
-	if vm, err := d.store.GetVM(ctx, idOrName); err == nil {
-		return vm, nil
-	}
-	vms, err := d.store.ListVMs(ctx)
-	if err != nil {
-		return model.VMRecord{}, err
-	}
-	matchCount := 0
-	var match model.VMRecord
-	for _, vm := range vms {
-		if strings.HasPrefix(vm.ID, idOrName) || strings.HasPrefix(vm.Name, idOrName) {
-			match = vm
-			matchCount++
-		}
-	}
-	if matchCount == 1 {
-		return match, nil
-	}
-	if matchCount > 1 {
-		return model.VMRecord{}, fmt.Errorf("multiple VMs match %q", idOrName)
-	}
-	return model.VMRecord{}, fmt.Errorf("vm %q not found", idOrName)
+	return d.vm.FindVM(ctx, idOrName)
 }

+// FindImage stays on Daemon as a thin forwarder to the image service
+// lookup so callers reading dispatch code see the obvious facade, and
+// tests that pre-date the service split still compile.
 func (d *Daemon) FindImage(ctx context.Context, idOrName string) (model.Image, error) {
-	if idOrName == "" {
-		return model.Image{}, errors.New("image id or name is required")
-	}
-	if image, err := d.store.GetImageByName(ctx, idOrName); err == nil {
-		return image, nil
-	}
-	if image, err := d.store.GetImageByID(ctx, idOrName); err == nil {
-		return image, nil
-	}
-	images, err := d.store.ListImages(ctx)
-	if err != nil {
-		return model.Image{}, err
-	}
-	matchCount := 0
-	var match model.Image
-	for _, image := range images {
-		if strings.HasPrefix(image.ID, idOrName) || strings.HasPrefix(image.Name, idOrName) {
-			match = image
-			matchCount++
-		}
-	}
-	if matchCount == 1 {
-		return match, nil
-	}
-	if matchCount > 1 {
-		return model.Image{}, fmt.Errorf("multiple images match %q", idOrName)
-	}
-	return model.Image{}, fmt.Errorf("image %q not found", idOrName)
+	return d.img.FindImage(ctx, idOrName)
 }

 func (d *Daemon) TouchVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
-	return d.withVMLockByRef(ctx, idOrName, func(vm model.VMRecord) (model.VMRecord, error) {
-		system.TouchNow(&vm)
-		if err := d.store.UpsertVM(ctx, vm); err != nil {
-			return model.VMRecord{}, err
+	return d.vm.TouchVM(ctx, idOrName)
+}
+
+// wireServices populates the four focused services and their peer
+// references from the infrastructure already on d (runner, logger,
+// config, layout, store, closing, plus the SSH-client test seams).
+// Idempotent: each service is skipped if the field is already non-nil,
+// so tests can preinstall stubs for the services they want to fake and
+// let wireServices fill the rest. The peer-service closures on
+// WorkspaceService capture d rather than a direct *VMService pointer so
+// the ws↔vm construction order doesn't recurse: the closures read d.vm
+// at call time, by which point it is populated.
+func wireServices(d *Daemon) {
+	if d.priv == nil {
+		clientUID, clientGID := d.clientUID, d.clientGID
+		if clientUID == 0 && clientGID == 0 {
+			clientUID, clientGID = -1, -1
 		}
-		return vm, nil
-	})
-}
-
-func (d *Daemon) withVMLockByRef(ctx context.Context, idOrName string, fn func(model.VMRecord) (model.VMRecord, error)) (model.VMRecord, error) {
-	vm, err := d.FindVM(ctx, idOrName)
-	if err != nil {
-		return model.VMRecord{}, err
+		d.priv = newLocalPrivilegedOps(d.runner, d.logger, d.config, d.layout, clientUID, clientGID)
 	}
-	return d.withVMLockByID(ctx, vm.ID, fn)
-}
-
-func (d *Daemon) withVMLockByID(ctx context.Context, id string, fn func(model.VMRecord) (model.VMRecord, error)) (model.VMRecord, error) {
-	if strings.TrimSpace(id) == "" {
-		return model.VMRecord{}, errors.New("vm id is required")
+	if d.net == nil {
+		d.net = newHostNetwork(hostNetworkDeps{
+			runner:  d.runner,
+			logger:  d.logger,
+			config:  d.config,
+			layout:  d.layout,
+			closing: d.closing,
+			priv:    d.priv,
+		})
 	}
-	unlock := d.lockVMID(id)
-	defer unlock()
-
-	vm, err := d.store.GetVMByID(ctx, id)
-	if err != nil {
-		if errors.Is(err, sql.ErrNoRows) {
-			return model.VMRecord{}, fmt.Errorf("vm %q not found", id)
-		}
-		return model.VMRecord{}, err
+	if d.img == nil {
+		d.img = newImageService(imageServiceDeps{
+			runner:         d.runner,
+			logger:         d.logger,
+			config:         d.config,
+			layout:         d.layout,
+			store:          d.store,
+			beginOperation: d.beginOperation,
+		})
 	}
-	return fn(vm)
-}
-
-func (d *Daemon) withVMLockByIDErr(ctx context.Context, id string, fn func(model.VMRecord) error) error {
-	_, err := d.withVMLockByID(ctx, id, func(vm model.VMRecord) (model.VMRecord, error) {
-		if err := fn(vm); err != nil {
-			return model.VMRecord{}, err
-		}
-		return vm, nil
-	})
-	return err
-}
-
-func (d *Daemon) lockVMID(id string) func() {
-	d.vmLocksMu.Lock()
-	if d.vmLocks == nil {
-		d.vmLocks = make(map[string]*sync.Mutex)
+	if d.ws == nil {
+		d.ws = newWorkspaceService(workspaceServiceDeps{
+			runner:        d.runner,
+			logger:        d.logger,
+			config:        d.config,
+			layout:        d.layout,
+			store:         d.store,
+			repoInspector: ws.NewInspector(),
+			vmResolver: func(ctx context.Context, idOrName string) (model.VMRecord, error) {
+				return d.vm.FindVM(ctx, idOrName)
+			},
+			aliveChecker: func(vm model.VMRecord) bool {
+				return d.vm.vmAlive(vm)
+			},
+			waitGuestSSH: d.waitForGuestSSH,
+			dialGuest:    d.dialGuest,
+			imageResolver: func(ctx context.Context, idOrName string) (model.Image, error) {
+				return d.FindImage(ctx, idOrName)
+			},
+			imageWorkSeed: func(ctx context.Context, image model.Image, fingerprint string) error {
+				return d.img.refreshManagedWorkSeedFingerprint(ctx, image, fingerprint)
+			},
+			withVMLockByRef: func(ctx context.Context, idOrName string, fn func(model.VMRecord) (model.VMRecord, error)) (model.VMRecord, error) {
+				return d.vm.withVMLockByRef(ctx, idOrName, fn)
+			},
+			beginOperation: d.beginOperation,
+		})
 	}
-	lock, ok := d.vmLocks[id]
-	if !ok {
-		lock = &sync.Mutex{}
-		d.vmLocks[id] = lock
+	if d.vm == nil {
+		d.vm = newVMService(vmServiceDeps{
+			runner:          d.runner,
+			logger:          d.logger,
+			config:          d.config,
+			layout:          d.layout,
+			store:           d.store,
+			net:             d.net,
+			img:             d.img,
+			ws:              d.ws,
+			priv:            d.priv,
+			capHooks:        d.buildCapabilityHooks(),
+			beginOperation:  d.beginOperation,
+			vsockHostDevice: defaultVsockHostDevice,
+		})
+	}
+	if d.stats == nil {
+		// Closures capture d rather than d.vm directly, so they re-read
+		// d.vm at call time. Wire order (d.vm constructed above) makes
+		// the closures safe, but this pattern also protects against a
+		// future test that swaps d.vm after initial wire.
+		d.stats = newStatsService(statsServiceDeps{
+			runner:         d.runner,
+			logger:         d.logger,
+			config:         d.config,
+			store:          d.store,
+			net:            d.net,
+			beginOperation: d.beginOperation,
+			vmAlive:        func(vm model.VMRecord) bool { return d.vm.vmAlive(vm) },
+			vmHandles:      func(id string) model.VMHandles { return d.vm.vmHandles(id) },
+			withVMLockByRef: func(ctx context.Context, idOrName string, fn func(model.VMRecord) (model.VMRecord, error)) (model.VMRecord, error) {
+				return d.vm.withVMLockByRef(ctx, idOrName, fn)
+			},
+			withVMLockByIDErr: func(ctx context.Context, id string, fn func(model.VMRecord) error) error {
+				return d.vm.withVMLockByIDErr(ctx, id, fn)
+			},
+			cleanupRuntime: func(ctx context.Context, vm model.VMRecord, preserve bool) error {
+				return d.vm.cleanupRuntime(ctx, vm, preserve)
+			},
+		})
+	}
+	if len(d.vmCaps) == 0 {
+		d.vmCaps = d.defaultCapabilities()
 	}
-	d.vmLocksMu.Unlock()
-
-	lock.Lock()
-	return lock.Unlock
 }

 func marshalResultOrError(v any, err error) rpc.Response {
--- a/internal/daemon/daemon_test.go
+++ b/internal/daemon/daemon_test.go
@ -1,542 +1,255 @@
 package daemon

 import (
-	"bufio"
 	"context"
 	"encoding/json"
+	"errors"
+	"io"
+	"log/slog"
 	"net"
 	"os"
 	"path/filepath"
 	"strings"
+	"syscall"
 	"testing"
 	"time"

 	"banger/internal/api"
+	"banger/internal/buildinfo"
 	"banger/internal/model"
+	"banger/internal/paths"
 	"banger/internal/rpc"
-	"banger/internal/store"
+	"banger/internal/system"
 )

-func TestEnsureDefaultImageUsesConfiguredDefaultRootfs(t *testing.T) {
-	dir := t.TempDir()
-	rootfs, kernel, _, _, _ := writeDefaultImageArtifacts(t, dir)
-	db := openDefaultImageStore(t, dir)
-
-	d := &Daemon{
-		config: model.DaemonConfig{
-			DefaultImageName: "default",
-			DefaultRootfs:    rootfs,
-			DefaultKernel:    kernel,
-		},
-		store: db,
-	}
-
-	if err := d.ensureDefaultImage(context.Background()); err != nil {
-		t.Fatalf("ensureDefaultImage: %v", err)
-	}
-
-	image, err := db.GetImageByName(context.Background(), "default")
-	if err != nil {
-		t.Fatalf("GetImageByName: %v", err)
-	}
-	if image.RootfsPath != rootfs {
-		t.Fatalf("RootfsPath = %q, want %q", image.RootfsPath, rootfs)
-	}
-	if image.KernelPath != kernel {
-		t.Fatalf("KernelPath = %q, want %q", image.KernelPath, kernel)
-	}
-	if image.Managed {
-		t.Fatal("default image should be unmanaged")
+// TestAuthorizeConnRejectsNonUnixConn pins the type guard at the top
+// of authorizeConn: SO_PEERCRED only makes sense on a unix socket, so
+// anything else must be refused outright. net.Pipe gives us a
+// connection that satisfies net.Conn but isn't a *net.UnixConn, which
+// is exactly the shape we need to exercise the early-return.
+func TestAuthorizeConnRejectsNonUnixConn(t *testing.T) {
+	d := &Daemon{}
+	pipeA, pipeB := net.Pipe()
+	defer pipeA.Close()
+	defer pipeB.Close()
+	if err := d.authorizeConn(pipeA); err == nil {
+		t.Fatal("authorizeConn(pipe) succeeded, want error")
 	}
 }

-func TestEnsureDefaultImageLeavesCurrentUnmanagedDefaultUntouched(t *testing.T) {
+// TestAuthorizeConnAcceptsOwnerUIDOverUnixSocket pins the happy path:
+// when the test process connects to a freshly bound unix socket as
+// itself, the daemon's peer-cred check matches d.clientUID and lets
+// the connection through.
+func TestAuthorizeConnAcceptsOwnerUIDOverUnixSocket(t *testing.T) {
 	dir := t.TempDir()
-	rootfs, kernel, initrd, modulesDir, packages := writeDefaultImageArtifacts(t, dir)
-	db := openDefaultImageStore(t, dir)
-	now := time.Date(2026, time.March, 16, 12, 0, 0, 0, time.UTC)
+	sockPath := filepath.Join(dir, "test.sock")
+	listener, err := net.Listen("unix", sockPath)
+	if err != nil {
+		t.Fatalf("listen: %v", err)
+	}
+	defer listener.Close()
+
+	type result struct {
+		err error
+	}
+	got := make(chan result, 1)
+	go func() {
+		conn, err := listener.Accept()
+		if err != nil {
+			got <- result{err: err}
+			return
+		}
+		defer conn.Close()
+		d := &Daemon{clientUID: os.Getuid()}
+		got <- result{err: d.authorizeConn(conn)}
+	}()
+
+	client, err := net.Dial("unix", sockPath)
+	if err != nil {
+		t.Fatalf("dial: %v", err)
+	}
+	defer client.Close()
+
+	select {
+	case r := <-got:
+		if r.err != nil {
+			t.Fatalf("authorizeConn(unix self) = %v, want nil", r.err)
+		}
+	case <-time.After(2 * time.Second):
+		t.Fatal("authorizeConn never returned")
+	}
+}
+
+func TestRegisterImageRequiresKernel(t *testing.T) {
+	rootfs := filepath.Join(t.TempDir(), "rootfs.ext4")
+	if err := os.WriteFile(rootfs, []byte("rootfs"), 0o644); err != nil {
+		t.Fatalf("write rootfs: %v", err)
+	}
+	d := &Daemon{store: openDaemonStore(t)}
+	wireServices(d)
+
+	_, err := d.img.RegisterImage(context.Background(), api.ImageRegisterParams{
+		Name:       "missing-kernel",
+		RootfsPath: rootfs,
+	})
+	if err == nil || !strings.Contains(err.Error(), "kernel path is required") {
+		t.Fatalf("RegisterImage() error = %v", err)
+	}
+}
+
+func TestDispatchPingIncludesBuildInfo(t *testing.T) {
+	d := &Daemon{pid: 42}
+	wireServices(d)
+
+	resp := d.dispatch(context.Background(), rpc.Request{Version: rpc.Version, Method: "ping"})
+	if !resp.OK {
+		t.Fatalf("dispatch(ping) = %+v, want ok", resp)
+	}
+
+	var got api.PingResult
+	if err := json.Unmarshal(resp.Result, &got); err != nil {
+		t.Fatalf("Unmarshal(PingResult): %v", err)
+	}
+
+	info := buildinfo.Current()
+	if got.Status != "ok" || got.PID != 42 {
+		t.Fatalf("PingResult = %+v, want status/pid populated", got)
+	}
+	if got.Version != info.Version || got.Commit != info.Commit || got.BuiltAt != info.BuiltAt {
+		t.Fatalf("PingResult build info = %+v, want %+v", got, info)
+	}
+}
+
+func TestServeReturnsOnContextCancel(t *testing.T) {
+	dir := t.TempDir()
+	runtimeDir := filepath.Join(dir, "runtime")
+	if err := os.MkdirAll(runtimeDir, 0o755); err != nil {
+		t.Fatalf("MkdirAll runtime: %v", err)
+	}
+	socketPath := filepath.Join(runtimeDir, "bangerd.sock")
+	probe, err := net.Listen("unix", filepath.Join(runtimeDir, "probe.sock"))
+	if err != nil {
+		if errors.Is(err, syscall.EPERM) || strings.Contains(err.Error(), "operation not permitted") {
+			t.Skipf("unix socket listen blocked in this environment: %v", err)
+		}
+		t.Fatalf("probe listen: %v", err)
+	}
+	_ = probe.Close()
+	_ = os.Remove(filepath.Join(runtimeDir, "probe.sock"))
+	d := &Daemon{
+		layout: paths.Layout{
+			RuntimeDir: runtimeDir,
+			SocketPath: socketPath,
+		},
+		config: model.DaemonConfig{
+			StatsPollInterval: time.Hour,
+		},
+		store:     openDaemonStore(t),
+		runner:    system.NewRunner(),
+		logger:    slog.New(slog.NewTextHandler(io.Discard, nil)),
+		closing:   make(chan struct{}),
+		clientUID: -1,
+		clientGID: -1,
+	}
+	wireServices(d)
+
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	serveErr := make(chan error, 1)
+	go func() {
+		serveErr <- d.Serve(ctx)
+	}()
+
+	deadline := time.Now().Add(2 * time.Second)
+	for {
+		if _, err := os.Stat(socketPath); err == nil {
+			break
+		}
+		select {
+		case err := <-serveErr:
+			t.Fatalf("Serve() returned before socket was ready: %v", err)
+		default:
+		}
+		if time.Now().After(deadline) {
+			t.Fatalf("socket %s not created before deadline", socketPath)
+		}
+		time.Sleep(25 * time.Millisecond)
+	}
+
+	cancel()
+
+	select {
+	case err := <-serveErr:
+		if err != nil {
+			t.Fatalf("Serve() error = %v, want nil on context cancel", err)
+		}
+	case <-time.After(2 * time.Second):
+		t.Fatal("Serve() did not return after context cancel")
+	}
+}
+
+func TestPromoteImageCopiesBootArtifactsIntoArtifactDir(t *testing.T) {
+	dir := t.TempDir()
+	rootfs := filepath.Join(dir, "rootfs.ext4")
+	kernel := filepath.Join(dir, "vmlinux")
+	initrd := filepath.Join(dir, "initrd.img")
+	modulesDir := filepath.Join(dir, "modules")
+	if err := os.MkdirAll(modulesDir, 0o755); err != nil {
+		t.Fatalf("mkdir modules: %v", err)
+	}
+	for path, data := range map[string]string{
+		rootfs:                              "rootfs",
+		kernel:                              "kernel",
+		initrd:                              "initrd",
+		filepath.Join(modulesDir, "depmod"): "modules",
+	} {
+		if err := os.WriteFile(path, []byte(data), 0o644); err != nil {
+			t.Fatalf("write %s: %v", path, err)
+		}
+	}
+
+	db := openDaemonStore(t)
 	image := model.Image{
-		ID:           "default-id",
-		Name:         "default",
-		Managed:      false,
-		RootfsPath:   rootfs,
-		KernelPath:   kernel,
-		InitrdPath:   initrd,
-		ModulesDir:   modulesDir,
-		PackagesPath: packages,
-		Docker:       true,
-		CreatedAt:    now,
-		UpdatedAt:    now,
+		ID:         "img-promote",
+		Name:       "void",
+		Managed:    false,
+		RootfsPath: rootfs,
+		KernelPath: kernel,
+		InitrdPath: initrd,
+		ModulesDir: modulesDir,
+		CreatedAt:  model.Now(),
+		UpdatedAt:  model.Now(),
 	}
 	if err := db.UpsertImage(context.Background(), image); err != nil {
 		t.Fatalf("UpsertImage: %v", err)
 	}

-	d := &Daemon{
-		config: model.DaemonConfig{
-			DefaultImageName:    "default",
-			DefaultRootfs:       rootfs,
-			DefaultKernel:       kernel,
-			DefaultInitrd:       initrd,
-			DefaultModulesDir:   modulesDir,
-			DefaultPackagesFile: packages,
-		},
-		store: db,
-	}
-
-	if err := d.ensureDefaultImage(context.Background()); err != nil {
-		t.Fatalf("ensureDefaultImage: %v", err)
-	}
-
-	got, err := db.GetImageByName(context.Background(), "default")
-	if err != nil {
-		t.Fatalf("GetImageByName: %v", err)
-	}
-	if got.ID != image.ID {
-		t.Fatalf("ID = %q, want %q", got.ID, image.ID)
-	}
-	if !got.UpdatedAt.Equal(image.UpdatedAt) {
-		t.Fatalf("UpdatedAt = %s, want unchanged %s", got.UpdatedAt, image.UpdatedAt)
-	}
-}
-
-func TestEnsureDefaultImageReconcilesStaleUnmanagedDefaultInPlace(t *testing.T) {
-	dir := t.TempDir()
-	rootfs, kernel, initrd, modulesDir, packages := writeDefaultImageArtifacts(t, dir)
-	db := openDefaultImageStore(t, dir)
-	now := time.Date(2026, time.March, 16, 12, 0, 0, 0, time.UTC)
-	stale := model.Image{
-		ID:           "default-id",
-		Name:         "default",
-		Managed:      false,
-		RootfsPath:   "/home/thales/projects/personal/banger/rootfs-docker.ext4",
-		KernelPath:   "/home/thales/projects/personal/banger/wtf/root/boot/vmlinux-6.8.0-94-generic",
-		InitrdPath:   "/home/thales/projects/personal/banger/wtf/root/boot/initrd.img-6.8.0-94-generic",
-		ModulesDir:   "/home/thales/projects/personal/banger/wtf/root/lib/modules/6.8.0-94-generic",
-		PackagesPath: "/home/thales/projects/personal/banger/packages.apt",
-		Docker:       true,
-		CreatedAt:    now,
-		UpdatedAt:    now,
-	}
-	if err := db.UpsertImage(context.Background(), stale); err != nil {
-		t.Fatalf("UpsertImage: %v", err)
-	}
-	vm := testVM("uses-default", stale.ID, "172.16.0.25")
-	if err := db.UpsertVM(context.Background(), vm); err != nil {
-		t.Fatalf("UpsertVM: %v", err)
+	imagesDir := filepath.Join(dir, "images")
+	if err := os.MkdirAll(imagesDir, 0o755); err != nil {
+		t.Fatalf("mkdir images dir: %v", err)
 	}

 	d := &Daemon{
-		config: model.DaemonConfig{
-			DefaultImageName:    "default",
-			DefaultRootfs:       rootfs,
-			DefaultKernel:       kernel,
-			DefaultInitrd:       initrd,
-			DefaultModulesDir:   modulesDir,
-			DefaultPackagesFile: packages,
-		},
-		store: db,
+		layout: paths.Layout{ImagesDir: imagesDir},
+		store:  db,
+		runner: system.NewRunner(),
 	}
-
-	if err := d.ensureDefaultImage(context.Background()); err != nil {
-		t.Fatalf("ensureDefaultImage: %v", err)
-	}
-
-	got, err := db.GetImageByName(context.Background(), "default")
+	wireServices(d)
+	got, err := d.img.PromoteImage(context.Background(), image.Name)
 	if err != nil {
-		t.Fatalf("GetImageByName: %v", err)
+		t.Fatalf("PromoteImage: %v", err)
 	}
-	if got.ID != stale.ID {
-		t.Fatalf("ID = %q, want preserved %q", got.ID, stale.ID)
+	if !got.Managed {
+		t.Fatal("promoted image should be managed")
 	}
-	if !got.CreatedAt.Equal(stale.CreatedAt) {
-		t.Fatalf("CreatedAt = %s, want preserved %s", got.CreatedAt, stale.CreatedAt)
-	}
-	if got.RootfsPath != rootfs || got.KernelPath != kernel || got.InitrdPath != initrd || got.ModulesDir != modulesDir || got.PackagesPath != packages {
-		t.Fatalf("stale default not reconciled: %+v", got)
-	}
-	if !got.UpdatedAt.After(stale.UpdatedAt) {
-		t.Fatalf("UpdatedAt = %s, want newer than %s", got.UpdatedAt, stale.UpdatedAt)
-	}
-	gotVM, err := db.GetVMByID(context.Background(), vm.ID)
-	if err != nil {
-		t.Fatalf("GetVMByID: %v", err)
-	}
-	if gotVM.ImageID != stale.ID {
-		t.Fatalf("VM image ID = %q, want preserved %q", gotVM.ImageID, stale.ID)
-	}
-}
-
-func TestEnsureDefaultImageLeavesManagedDefaultUntouched(t *testing.T) {
-	dir := t.TempDir()
-	rootfs, kernel, _, _, _ := writeDefaultImageArtifacts(t, dir)
-	db := openDefaultImageStore(t, dir)
-	now := time.Date(2026, time.March, 16, 12, 0, 0, 0, time.UTC)
-	managed := model.Image{
-		ID:         "managed-default",
-		Name:       "default",
-		Managed:    true,
-		RootfsPath: "/managed/rootfs.ext4",
-		KernelPath: "/managed/vmlinux",
-		CreatedAt:  now,
-		UpdatedAt:  now,
-	}
-	if err := db.UpsertImage(context.Background(), managed); err != nil {
-		t.Fatalf("UpsertImage: %v", err)
-	}
-
-	d := &Daemon{
-		config: model.DaemonConfig{
-			DefaultImageName: "default",
-			DefaultRootfs:    rootfs,
-			DefaultKernel:    kernel,
-		},
-		store: db,
-	}
-
-	if err := d.ensureDefaultImage(context.Background()); err != nil {
-		t.Fatalf("ensureDefaultImage: %v", err)
-	}
-
-	got, err := db.GetImageByName(context.Background(), "default")
-	if err != nil {
-		t.Fatalf("GetImageByName: %v", err)
-	}
-	if got.RootfsPath != managed.RootfsPath || got.KernelPath != managed.KernelPath {
-		t.Fatalf("managed default was rewritten: %+v", got)
-	}
-}
-
-func TestEnsureDefaultImageSkipsRewriteWhenCurrentArtifactsMissing(t *testing.T) {
-	dir := t.TempDir()
-	db := openDefaultImageStore(t, dir)
-	now := time.Date(2026, time.March, 16, 12, 0, 0, 0, time.UTC)
-	stale := model.Image{
-		ID:         "default-id",
-		Name:       "default",
-		Managed:    false,
-		RootfsPath: "/old/rootfs.ext4",
-		KernelPath: "/old/vmlinux",
-		CreatedAt:  now,
-		UpdatedAt:  now,
-	}
-	if err := db.UpsertImage(context.Background(), stale); err != nil {
-		t.Fatalf("UpsertImage: %v", err)
-	}
-
-	d := &Daemon{
-		config: model.DaemonConfig{
-			DefaultImageName: "default",
-			DefaultRootfs:    filepath.Join(dir, "missing-rootfs.ext4"),
-			DefaultKernel:    filepath.Join(dir, "missing-vmlinux"),
-		},
-		store: db,
-	}
-
-	if err := d.ensureDefaultImage(context.Background()); err != nil {
-		t.Fatalf("ensureDefaultImage: %v", err)
-	}
-
-	got, err := db.GetImageByName(context.Background(), "default")
-	if err != nil {
-		t.Fatalf("GetImageByName: %v", err)
-	}
-	if got.RootfsPath != stale.RootfsPath || got.KernelPath != stale.KernelPath {
-		t.Fatalf("default image should have stayed stale when no current artifacts exist: %+v", got)
-	}
-}
-
-func TestRegisterImageCreatesUnmanagedImage(t *testing.T) {
-	dir := t.TempDir()
-	rootfs, kernel, initrd, modulesDir, _ := writeDefaultImageArtifacts(t, dir)
-	workSeed := filepath.Join(dir, "rootfs-void.work-seed.ext4")
-	packages := filepath.Join(dir, "packages.void")
-	if err := os.WriteFile(workSeed, []byte("seed"), 0o644); err != nil {
-		t.Fatalf("WriteFile(workSeed): %v", err)
-	}
-	if err := os.WriteFile(packages, []byte("base-minimal\nopenssh\n"), 0o644); err != nil {
-		t.Fatalf("WriteFile(packages): %v", err)
-	}
-	db := openDefaultImageStore(t, dir)
-	d := &Daemon{
-		config: model.DaemonConfig{
-			DefaultKernel:     kernel,
-			DefaultInitrd:     initrd,
-			DefaultModulesDir: modulesDir,
-		},
-		store: db,
-	}
-
-	image, err := d.RegisterImage(context.Background(), api.ImageRegisterParams{
-		Name:         "void-exp",
-		RootfsPath:   rootfs,
-		WorkSeedPath: workSeed,
-		PackagesPath: packages,
-	})
-	if err != nil {
-		t.Fatalf("RegisterImage: %v", err)
-	}
-	if image.Managed {
-		t.Fatal("registered image should be unmanaged")
-	}
-	if image.Name != "void-exp" || image.RootfsPath != rootfs || image.WorkSeedPath != workSeed || image.KernelPath != kernel {
-		t.Fatalf("registered image = %+v", image)
-	}
-}
-
-func TestRegisterImageUpdatesExistingUnmanagedImageInPlace(t *testing.T) {
-	dir := t.TempDir()
-	_, kernel, initrd, modulesDir, _ := writeDefaultImageArtifacts(t, dir)
-	newRootfs := filepath.Join(dir, "rootfs-void-next.ext4")
-	newWorkSeed := filepath.Join(dir, "rootfs-void-next.work-seed.ext4")
-	packages := filepath.Join(dir, "packages.void")
-	for _, path := range []string{newRootfs, newWorkSeed} {
-		if err := os.WriteFile(path, []byte("next"), 0o644); err != nil {
-			t.Fatalf("WriteFile(%s): %v", path, err)
+	for _, path := range []string{got.RootfsPath, got.KernelPath, got.InitrdPath, got.ModulesDir} {
+		if !strings.HasPrefix(path, got.ArtifactDir) {
+			t.Fatalf("artifact path %q does not live under %q", path, got.ArtifactDir)
+		}
+		if _, err := os.Stat(path); err != nil {
+			t.Fatalf("stat %s: %v", path, err)
 		}
 	}
-	if err := os.WriteFile(packages, []byte("base-minimal\n"), 0o644); err != nil {
-		t.Fatalf("WriteFile(packages): %v", err)
-	}
-	db := openDefaultImageStore(t, dir)
-	now := time.Date(2026, time.March, 16, 12, 0, 0, 0, time.UTC)
-	existing := model.Image{
-		ID:           "void-image-id",
-		Name:         "void-exp",
-		Managed:      false,
-		RootfsPath:   filepath.Join(dir, "old-rootfs.ext4"),
-		KernelPath:   kernel,
-		InitrdPath:   initrd,
-		ModulesDir:   modulesDir,
-		PackagesPath: packages,
-		CreatedAt:    now,
-		UpdatedAt:    now,
-	}
-	if err := db.UpsertImage(context.Background(), existing); err != nil {
-		t.Fatalf("UpsertImage: %v", err)
-	}
-	d := &Daemon{
-		config: model.DaemonConfig{
-			DefaultKernel:     kernel,
-			DefaultInitrd:     initrd,
-			DefaultModulesDir: modulesDir,
-		},
-		store: db,
-	}
-
-	image, err := d.RegisterImage(context.Background(), api.ImageRegisterParams{
-		Name:         "void-exp",
-		RootfsPath:   newRootfs,
-		WorkSeedPath: newWorkSeed,
-		PackagesPath: packages,
-	})
-	if err != nil {
-		t.Fatalf("RegisterImage: %v", err)
-	}
-	if image.ID != existing.ID || !image.CreatedAt.Equal(existing.CreatedAt) {
-		t.Fatalf("updated image identity changed: %+v", image)
-	}
-	if image.RootfsPath != newRootfs || image.WorkSeedPath != newWorkSeed {
-		t.Fatalf("updated image paths not applied: %+v", image)
-	}
-}
-
-func TestRegisterImageRejectsManagedOverwrite(t *testing.T) {
-	dir := t.TempDir()
-	rootfs, kernel, _, _, _ := writeDefaultImageArtifacts(t, dir)
-	db := openDefaultImageStore(t, dir)
-	now := time.Date(2026, time.March, 16, 12, 0, 0, 0, time.UTC)
-	if err := db.UpsertImage(context.Background(), model.Image{
-		ID:         "managed-id",
-		Name:       "void-exp",
-		Managed:    true,
-		RootfsPath: rootfs,
-		KernelPath: kernel,
-		CreatedAt:  now,
-		UpdatedAt:  now,
-	}); err != nil {
-		t.Fatalf("UpsertImage: %v", err)
-	}
-	d := &Daemon{config: model.DaemonConfig{DefaultKernel: kernel}, store: db}
-
-	_, err := d.RegisterImage(context.Background(), api.ImageRegisterParams{
-		Name:       "void-exp",
-		RootfsPath: rootfs,
-	})
-	if err == nil || !strings.Contains(err.Error(), "cannot be updated via register") {
-		t.Fatalf("RegisterImage(managed) error = %v", err)
-	}
-}
-
-func openDefaultImageStore(t *testing.T, dir string) *store.Store {
-	t.Helper()
-	db, err := store.Open(filepath.Join(dir, "state.db"))
-	if err != nil {
-		t.Fatalf("open store: %v", err)
-	}
-	t.Cleanup(func() {
-		_ = db.Close()
-	})
-	return db
-}
-
-func writeDefaultImageArtifacts(t *testing.T, dir string) (rootfs, kernel, initrd, modulesDir, packages string) {
-	t.Helper()
-	rootfs = filepath.Join(dir, "rootfs-docker.ext4")
-	kernel = filepath.Join(dir, "vmlinux")
-	initrd = filepath.Join(dir, "initrd.img")
-	modulesDir = filepath.Join(dir, "modules")
-	packages = filepath.Join(dir, "packages.apt")
-	files := []string{
-		rootfs,
-		kernel,
-		initrd,
-		packages,
-		filepath.Join(modulesDir, "modules.dep"),
-	}
-	for _, path := range files {
-		if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
-			t.Fatalf("mkdir %s: %v", filepath.Dir(path), err)
-		}
-		if err := os.WriteFile(path, []byte("test"), 0o644); err != nil {
-			t.Fatalf("write %s: %v", path, err)
-		}
-	}
-	return rootfs, kernel, initrd, modulesDir, packages
-}
-
-func TestStartVMDNSFailsWhenAddressBusy(t *testing.T) {
-	t.Parallel()
-
-	packetConn, err := net.ListenPacket("udp", "127.0.0.1:0")
-	if err != nil {
-		t.Fatalf("ListenPacket: %v", err)
-	}
-	defer packetConn.Close()
-
-	d := &Daemon{}
-	if err := d.startVMDNS(packetConn.LocalAddr().String()); err == nil {
-		t.Fatal("startVMDNS() succeeded on occupied address, want failure")
-	}
-}
-
-func TestSetDNSPublishesIntoDaemonServer(t *testing.T) {
-	t.Parallel()
-
-	d := &Daemon{}
-	if err := d.startVMDNS("127.0.0.1:0"); err != nil {
-		t.Fatalf("startVMDNS: %v", err)
-	}
-	defer d.stopVMDNS()
-
-	if err := d.setDNS(context.Background(), "devbox", "172.16.0.8"); err != nil {
-		t.Fatalf("setDNS: %v", err)
-	}
-	if _, ok := d.vmDNS.Lookup("devbox.vm"); !ok {
-		t.Fatal("devbox.vm missing after setDNS")
-	}
-}
-
-func TestDispatchUsesPassedContext(t *testing.T) {
-	t.Parallel()
-
-	db := openDefaultImageStore(t, t.TempDir())
-	d := &Daemon{store: db}
-	ctx, cancel := context.WithCancel(context.Background())
-	cancel()
-
-	resp := d.dispatch(ctx, rpc.Request{
-		Version: rpc.Version,
-		Method:  "vm.list",
-		Params:  mustJSON(t, api.Empty{}),
-	})
-
-	if resp.OK {
-		t.Fatal("dispatch() succeeded with canceled context")
-	}
-	if resp.Error == nil || !strings.Contains(resp.Error.Message, context.Canceled.Error()) {
-		t.Fatalf("dispatch() error = %+v, want context canceled", resp.Error)
-	}
-}
-
-func TestHandleConnCancelsRequestWhenClientDisconnects(t *testing.T) {
-	t.Parallel()
-
-	server, client := net.Pipe()
-	defer client.Close()
-
-	requestCanceled := make(chan struct{})
-	done := make(chan struct{})
-	d := &Daemon{
-		closing: make(chan struct{}),
-		requestHandler: func(ctx context.Context, req rpc.Request) rpc.Response {
-			if req.Method != "block" {
-				t.Errorf("request method = %q, want block", req.Method)
-			}
-			<-ctx.Done()
-			close(requestCanceled)
-			return rpc.NewError("operation_failed", ctx.Err().Error())
-		},
-	}
-
-	go func() {
-		d.handleConn(server)
-		close(done)
-	}()
-
-	if err := json.NewEncoder(client).Encode(rpc.Request{Version: rpc.Version, Method: "block"}); err != nil {
-		t.Fatalf("encode request: %v", err)
-	}
-	if err := client.Close(); err != nil {
-		t.Fatalf("close client: %v", err)
-	}
-
-	select {
-	case <-requestCanceled:
-	case <-time.After(2 * time.Second):
-		t.Fatal("request context was not canceled after client disconnect")
-	}
-
-	select {
-	case <-done:
-	case <-time.After(2 * time.Second):
-		t.Fatal("handleConn did not return after client disconnect")
-	}
-}
-
-func TestWatchRequestDisconnectCancelsContextOnEOF(t *testing.T) {
-	t.Parallel()
-
-	server, client := net.Pipe()
-	defer server.Close()
-
-	reader := bufio.NewReader(server)
-	ctx, cancel := context.WithCancel(context.Background())
-	t.Cleanup(cancel)
-
-	d := &Daemon{closing: make(chan struct{})}
-	stop := d.watchRequestDisconnect(server, reader, "block", cancel)
-	defer stop()
-
-	if err := client.Close(); err != nil {
-		t.Fatalf("close client: %v", err)
-	}
-
-	select {
-	case <-ctx.Done():
-		if !strings.Contains(ctx.Err().Error(), context.Canceled.Error()) {
-			t.Fatalf("ctx.Err() = %v, want canceled", ctx.Err())
-		}
-	case <-time.After(2 * time.Second):
-		t.Fatal("watchRequestDisconnect did not cancel context")
-	}
-}
-
-func mustJSON(t *testing.T, v any) []byte {
-	t.Helper()
-	data, err := json.Marshal(v)
-	if err != nil {
-		t.Fatalf("json.Marshal(%T): %v", v, err)
-	}
-	return data
 }
--- a/internal/daemon/daemon_testing_test.go
+++ b/internal/daemon/daemon_testing_test.go
@ -0,0 +1,241 @@
+package daemon
+
+import (
+	"bytes"
+	"io"
+	"log/slog"
+	"path/filepath"
+	"testing"
+
+	"banger/internal/model"
+	"banger/internal/paths"
+	"banger/internal/store"
+	"banger/internal/system"
+)
+
+// testDaemonOpts collects everything newTestDaemon knows how to
+// override. Nothing is exported: the zero value is "sensible defaults",
+// tests pick overrides by option function.
+type testDaemonOpts struct {
+	runner          system.CommandRunner
+	config          *model.DaemonConfig
+	store           *store.Store
+	logger          *slog.Logger
+	layout          *paths.Layout
+	vmCaps          []vmCapability
+	vmCapsSet       bool
+	vsockHostDevice string
+}
+
+// testDaemonOption applies a single override to testDaemonOpts. Pass
+// any combination to newTestDaemon; later options win on conflict.
+type testDaemonOption func(*testDaemonOpts)
+
+// withRunner sets the system.CommandRunner used by HostNetwork,
+// ImageService, WorkspaceService, and VMService. Most tests want
+// permissiveRunner or scriptedRunner; the default is a permissive
+// runner that returns empty output with no error.
+func withRunner(r system.CommandRunner) testDaemonOption {
+	return func(o *testDaemonOpts) { o.runner = r }
+}
+
+// withConfig replaces the DaemonConfig. Useful for exercising config-
+// dependent code paths (bridge name, firecracker binary path,
+// default image name, etc.) without going through config.Load.
+func withConfig(cfg model.DaemonConfig) testDaemonOption {
+	return func(o *testDaemonOpts) { o.config = &cfg }
+}
+
+// withStore reuses an externally-opened store instead of opening a
+// fresh tempdir DB. Useful when the test needs to pre-seed rows
+// before the daemon is wired.
+func withStore(st *store.Store) testDaemonOption {
+	return func(o *testDaemonOpts) { o.store = st }
+}
+
+// withLogger routes daemon logs somewhere specific. Default is
+// io.Discard so a passing test run stays quiet; failing tests that
+// want structured log content can pass their own buffer-backed slog.
+func withLogger(l *slog.Logger) testDaemonOption {
+	return func(o *testDaemonOpts) { o.logger = l }
+}
+
+// withLayout overrides the paths.Layout. Defaults build all dirs
+// under t.TempDir() so tests don't interfere with each other and
+// don't write into the user's real ~/.local/state/banger.
+func withLayout(layout paths.Layout) testDaemonOption {
+	return func(o *testDaemonOpts) { o.layout = &layout }
+}
+
+// withVMCaps installs a specific capability list on the daemon.
+// Default is an empty slice, which means wireServices skips the
+// built-in workDisk/dns/nat capabilities — most harness tests don't
+// want those firing real side-effects. Pass capability fakes to
+// exercise dispatch paths.
+func withVMCaps(caps ...vmCapability) testDaemonOption {
+	return func(o *testDaemonOpts) {
+		o.vmCaps = caps
+		o.vmCapsSet = true
+	}
+}
+
+// withVsockHostDevice overrides the /dev/vhost-vsock path VMService
+// checks during preflight. Useful for tests that need RequireFile to
+// succeed against a tempfile without root access to the real device.
+func withVsockHostDevice(path string) testDaemonOption {
+	return func(o *testDaemonOpts) { o.vsockHostDevice = path }
+}
+
+// newTestDaemon builds a wired *Daemon backed by tempdir state,
+// ready for tests that drive service methods or dispatch logic.
+// All infrastructure comes from either t.TempDir() or the
+// provided overrides; nothing touches the invoking user's real
+// state.
+//
+// What the harness gives you by default:
+//
+//   - paths.Layout rooted at t.TempDir() (distinct StateDir,
+//     ConfigDir, CacheDir, VMsDir, ImagesDir, KernelsDir, SSHDir,
+//     KnownHostsPath)
+//   - fresh store.Store opened against a tempdir state.db with all
+//     migrations run, auto-closed on t.Cleanup
+//   - permissiveRunner returning empty output + no error for every
+//     Run/RunSudo call (override with scriptedRunner or any other
+//     system.CommandRunner when you need assertion-style scripting)
+//   - io.Discard logger (quiet tests)
+//   - empty vmCaps (so default capability side-effects don't fire)
+//   - defaultVsockHostDevice on VMService (tests that need this to
+//     resolve via RequireFile should pass withVsockHostDevice to a
+//     tempfile)
+//
+// Returns the wired *Daemon. Every service pointer is non-nil;
+// d.store is non-nil; d.vmCaps is exactly what the test asked for.
+func newTestDaemon(t *testing.T, opts ...testDaemonOption) *Daemon {
+	t.Helper()
+	applied := testDaemonOpts{}
+	for _, opt := range opts {
+		opt(&applied)
+	}
+
+	layout := applied.layout
+	if layout == nil {
+		dir := t.TempDir()
+		layout = &paths.Layout{
+			StateDir:       filepath.Join(dir, "state"),
+			ConfigDir:      filepath.Join(dir, "config"),
+			CacheDir:       filepath.Join(dir, "cache"),
+			VMsDir:         filepath.Join(dir, "state", "vms"),
+			ImagesDir:      filepath.Join(dir, "state", "images"),
+			KernelsDir:     filepath.Join(dir, "state", "kernels"),
+			SSHDir:         filepath.Join(dir, "state", "ssh"),
+			KnownHostsPath: filepath.Join(dir, "state", "ssh", "known_hosts"),
+			DBPath:         filepath.Join(dir, "state", "state.db"),
+			SocketPath:     filepath.Join(dir, "state", "banger.sock"),
+			RuntimeDir:     filepath.Join(dir, "runtime"),
+		}
+	}
+
+	st := applied.store
+	if st == nil {
+		st = openDaemonStore(t)
+	}
+
+	runner := applied.runner
+	if runner == nil {
+		runner = &permissiveRunner{}
+	}
+
+	logger := applied.logger
+	if logger == nil {
+		logger = slog.New(slog.NewTextHandler(io.Discard, nil))
+	}
+
+	cfg := model.DaemonConfig{
+		StatsPollInterval: model.DefaultStatsPollInterval,
+		BridgeName:        model.DefaultBridgeName,
+		BridgeIP:          model.DefaultBridgeIP,
+		CIDR:              model.DefaultCIDR,
+		DefaultDNS:        model.DefaultDNS,
+	}
+	if applied.config != nil {
+		cfg = *applied.config
+	}
+
+	d := &Daemon{
+		layout: *layout,
+		config: cfg,
+		store:  st,
+		runner: runner,
+		logger: logger,
+		vmCaps: applied.vmCaps,
+	}
+	wireServices(d)
+	// wireServices fills in the default workDisk/dns/nat capability
+	// list when vmCaps is empty at call time — that's the production
+	// path. Harness callers who didn't opt in to capabilities via
+	// withVMCaps explicitly want them OFF so their test doesn't
+	// accidentally fire real NAT rules or a DNS publish. Reset to
+	// nil here; withVMCaps sets vmCapsSet to skip this reset.
+	if !applied.vmCapsSet {
+		d.vmCaps = nil
+	}
+	if applied.vsockHostDevice != "" {
+		d.vm.vsockHostDevice = applied.vsockHostDevice
+	}
+	return d
+}
+
+// TestNewTestDaemonDefaults pins the contract new callers rely on:
+// a zero-option call returns a fully-wired daemon with every service
+// pointer populated, a writable tempdir-backed store, and an empty
+// capability list (so nothing fires real side-effects). If any of
+// those invariants drift, every test that switches to newTestDaemon
+// will silently start exercising different behaviour.
+func TestNewTestDaemonDefaults(t *testing.T) {
+	d := newTestDaemon(t)
+
+	if d.net == nil || d.img == nil || d.ws == nil || d.vm == nil {
+		t.Fatalf("wireServices left a service nil: net=%v img=%v ws=%v vm=%v",
+			d.net != nil, d.img != nil, d.ws != nil, d.vm != nil)
+	}
+	if d.store == nil {
+		t.Fatal("store is nil; harness must provide a working store")
+	}
+	if len(d.vmCaps) != 0 {
+		t.Fatalf("vmCaps = %d, want 0 (harness default must not fire real capabilities)", len(d.vmCaps))
+	}
+	if d.vm.vsockHostDevice != defaultVsockHostDevice {
+		t.Fatalf("vsockHostDevice = %q, want default %q", d.vm.vsockHostDevice, defaultVsockHostDevice)
+	}
+}
+
+// TestNewTestDaemonOptionsOverride verifies the option functions
+// actually land on the resulting Daemon. Guard against a silent
+// rename breaking option plumbing.
+func TestNewTestDaemonOptionsOverride(t *testing.T) {
+	var buf bytes.Buffer
+	customLogger := slog.New(slog.NewTextHandler(&buf, nil))
+	customRunner := &countingRunner{}
+	customVsock := filepath.Join(t.TempDir(), "vhost-vsock")
+	customCap := testCapability{name: "marker"}
+
+	d := newTestDaemon(t,
+		withLogger(customLogger),
+		withRunner(customRunner),
+		withVsockHostDevice(customVsock),
+		withVMCaps(customCap),
+	)
+
+	if d.logger != customLogger {
+		t.Error("withLogger: logger not overridden")
+	}
+	if d.runner != customRunner {
+		t.Error("withRunner: runner not overridden")
+	}
+	if d.vm.vsockHostDevice != customVsock {
+		t.Errorf("withVsockHostDevice: got %q, want %q", d.vm.vsockHostDevice, customVsock)
+	}
+	if len(d.vmCaps) != 1 || d.vmCaps[0].Name() != "marker" {
+		t.Errorf("withVMCaps: vmCaps = %v, want one 'marker' cap", d.vmCaps)
+	}
+}
--- a/internal/daemon/dispatch.go
+++ b/internal/daemon/dispatch.go
@ -0,0 +1,309 @@
+package daemon
+
+import (
+	"context"
+	"fmt"
+
+	"banger/internal/api"
+	"banger/internal/buildinfo"
+	"banger/internal/rpc"
+)
+
+// handler is the signature every RPC method dispatches through. Keeps
+// Daemon.dispatch a one-liner — lookup + invoke — instead of the old
+// ~240-line `switch`. Handlers close over a `*Daemon` parameter at
+// call time (passed by the driver) rather than baked into the map,
+// so tests that stand up a *Daemon with custom wiring re-use the
+// same table without re-registering anything.
+type handler func(ctx context.Context, d *Daemon, req rpc.Request) rpc.Response
+
+// paramHandler wraps the common "decode params of type P, call
+// service returning (R, error), wrap R" flow that 28 of 34 methods
+// follow. Compile-time type-safe — no reflection. P and R are
+// deduced from the function literal passed in, so per-handler
+// registration reads as "what's the RPC shape + what's the service
+// call" and nothing else.
+func paramHandler[P any, R any](call func(ctx context.Context, d *Daemon, p P) (R, error)) handler {
+	return func(ctx context.Context, d *Daemon, req rpc.Request) rpc.Response {
+		p, err := rpc.DecodeParams[P](req)
+		if err != nil {
+			return rpc.NewError("bad_request", err.Error())
+		}
+		result, err := call(ctx, d, p)
+		return marshalResultOrError(result, err)
+	}
+}
+
+// noParamHandler is the decode-free variant for RPC methods that
+// take no params (ping, shutdown, *.list, kernel.catalog).
+func noParamHandler[R any](call func(ctx context.Context, d *Daemon) (R, error)) handler {
+	return func(ctx context.Context, d *Daemon, _ rpc.Request) rpc.Response {
+		result, err := call(ctx, d)
+		return marshalResultOrError(result, err)
+	}
+}
+
+// rpcHandlers maps every supported method name to its handler. Adding
+// or removing a method is a single-line diff here — unlike the old
+// switch, there's no four-line decode/call/wrap boilerplate to copy.
+// The four special-case handlers (vm.logs, vm.ssh, ping, shutdown)
+// live below the map; they need pre-service validation or raw result
+// encoding that the generic wrapper can't express.
+var rpcHandlers = map[string]handler{
+	"ping":                   pingHandler,
+	"shutdown":               shutdownHandler,
+	"daemon.operations.list": noParamHandler(daemonOperationsListDispatch),
+
+	"vm.create":        paramHandler(vmCreateDispatch),
+	"vm.create.begin":  paramHandler(vmCreateBeginDispatch),
+	"vm.create.status": paramHandler(vmCreateStatusDispatch),
+	"vm.create.cancel": paramHandler(vmCreateCancelDispatch),
+	"vm.list":          noParamHandler(vmListDispatch),
+	"vm.show":          paramHandler(vmShowDispatch),
+	"vm.start":         paramHandler(vmStartDispatch),
+	"vm.stop":          paramHandler(vmStopDispatch),
+	"vm.kill":          paramHandler(vmKillDispatch),
+	"vm.restart":       paramHandler(vmRestartDispatch),
+	"vm.delete":        paramHandler(vmDeleteDispatch),
+	"vm.set":           paramHandler(vmSetDispatch),
+	"vm.stats":         paramHandler(vmStatsDispatch),
+	"vm.logs":          vmLogsHandler,
+	"vm.ssh":           vmSSHHandler,
+	"vm.health":        paramHandler(vmHealthDispatch),
+	"vm.ping":          paramHandler(vmPingDispatch),
+	"vm.ports":         paramHandler(vmPortsDispatch),
+
+	"vm.workspace.prepare": paramHandler(workspacePrepareDispatch),
+	"vm.workspace.export":  paramHandler(workspaceExportDispatch),
+
+	"image.list":        noParamHandler(imageListDispatch),
+	"image.show":        paramHandler(imageShowDispatch),
+	"image.register":    paramHandler(imageRegisterDispatch),
+	"image.promote":     paramHandler(imagePromoteDispatch),
+	"image.delete":      paramHandler(imageDeleteDispatch),
+	"image.pull":        paramHandler(imagePullDispatch),
+	"image.cache.prune": paramHandler(imageCachePruneDispatch),
+
+	"kernel.list":    noParamHandler(kernelListDispatch),
+	"kernel.show":    paramHandler(kernelShowDispatch),
+	"kernel.delete":  paramHandler(kernelDeleteDispatch),
+	"kernel.import":  paramHandler(kernelImportDispatch),
+	"kernel.pull":    paramHandler(kernelPullDispatch),
+	"kernel.catalog": noParamHandler(kernelCatalogDispatch),
+}
+
+// ---- Service-call adapters (kept thin; the interesting shape is up
+// ---- in the `paramHandler` generic. These exist so the map entries
+// ---- stay readable at a glance.)
+
+func vmCreateDispatch(ctx context.Context, d *Daemon, p api.VMCreateParams) (api.VMShowResult, error) {
+	vm, err := d.vm.CreateVM(ctx, p)
+	return api.VMShowResult{VM: vm}, err
+}
+
+func vmCreateBeginDispatch(ctx context.Context, d *Daemon, p api.VMCreateParams) (api.VMCreateBeginResult, error) {
+	op, err := d.vm.BeginVMCreate(ctx, p)
+	return api.VMCreateBeginResult{Operation: op}, err
+}
+
+func vmCreateStatusDispatch(ctx context.Context, d *Daemon, p api.VMCreateStatusParams) (api.VMCreateStatusResult, error) {
+	op, err := d.vm.VMCreateStatus(ctx, p.ID)
+	return api.VMCreateStatusResult{Operation: op}, err
+}
+
+func vmCreateCancelDispatch(ctx context.Context, d *Daemon, p api.VMCreateStatusParams) (api.Empty, error) {
+	return api.Empty{}, d.vm.CancelVMCreate(ctx, p.ID)
+}
+
+func vmListDispatch(ctx context.Context, d *Daemon) (api.VMListResult, error) {
+	vms, err := d.store.ListVMs(ctx)
+	return api.VMListResult{VMs: vms}, err
+}
+
+func vmShowDispatch(ctx context.Context, d *Daemon, p api.VMRefParams) (api.VMShowResult, error) {
+	vm, err := d.vm.FindVM(ctx, p.IDOrName)
+	return api.VMShowResult{VM: vm}, err
+}
+
+func vmStartDispatch(ctx context.Context, d *Daemon, p api.VMRefParams) (api.VMShowResult, error) {
+	vm, err := d.vm.StartVM(ctx, p.IDOrName)
+	return api.VMShowResult{VM: vm}, err
+}
+
+func vmStopDispatch(ctx context.Context, d *Daemon, p api.VMRefParams) (api.VMShowResult, error) {
+	vm, err := d.vm.StopVM(ctx, p.IDOrName)
+	return api.VMShowResult{VM: vm}, err
+}
+
+func vmKillDispatch(ctx context.Context, d *Daemon, p api.VMKillParams) (api.VMShowResult, error) {
+	vm, err := d.vm.KillVM(ctx, p)
+	return api.VMShowResult{VM: vm}, err
+}
+
+func vmRestartDispatch(ctx context.Context, d *Daemon, p api.VMRefParams) (api.VMShowResult, error) {
+	vm, err := d.vm.RestartVM(ctx, p.IDOrName)
+	return api.VMShowResult{VM: vm}, err
+}
+
+func vmDeleteDispatch(ctx context.Context, d *Daemon, p api.VMRefParams) (api.VMShowResult, error) {
+	vm, err := d.vm.DeleteVM(ctx, p.IDOrName)
+	return api.VMShowResult{VM: vm}, err
+}
+
+func vmSetDispatch(ctx context.Context, d *Daemon, p api.VMSetParams) (api.VMShowResult, error) {
+	vm, err := d.vm.SetVM(ctx, p)
+	return api.VMShowResult{VM: vm}, err
+}
+
+func vmStatsDispatch(ctx context.Context, d *Daemon, p api.VMRefParams) (api.VMStatsResult, error) {
+	vm, stats, err := d.stats.GetVMStats(ctx, p.IDOrName)
+	return api.VMStatsResult{VM: vm, Stats: stats}, err
+}
+
+func vmHealthDispatch(ctx context.Context, d *Daemon, p api.VMRefParams) (api.VMHealthResult, error) {
+	return d.stats.HealthVM(ctx, p.IDOrName)
+}
+
+func vmPingDispatch(ctx context.Context, d *Daemon, p api.VMRefParams) (api.VMPingResult, error) {
+	return d.stats.PingVM(ctx, p.IDOrName)
+}
+
+func vmPortsDispatch(ctx context.Context, d *Daemon, p api.VMRefParams) (api.VMPortsResult, error) {
+	return d.stats.PortsVM(ctx, p.IDOrName)
+}
+
+func workspacePrepareDispatch(ctx context.Context, d *Daemon, p api.VMWorkspacePrepareParams) (api.VMWorkspacePrepareResult, error) {
+	ws, err := d.ws.PrepareVMWorkspace(ctx, p)
+	return api.VMWorkspacePrepareResult{Workspace: ws}, err
+}
+
+func workspaceExportDispatch(ctx context.Context, d *Daemon, p api.WorkspaceExportParams) (api.WorkspaceExportResult, error) {
+	return d.ws.ExportVMWorkspace(ctx, p)
+}
+
+func imageListDispatch(ctx context.Context, d *Daemon) (api.ImageListResult, error) {
+	images, err := d.store.ListImages(ctx)
+	return api.ImageListResult{Images: images}, err
+}
+
+func imageShowDispatch(ctx context.Context, d *Daemon, p api.ImageRefParams) (api.ImageShowResult, error) {
+	image, err := d.img.FindImage(ctx, p.IDOrName)
+	return api.ImageShowResult{Image: image}, err
+}
+
+func imageRegisterDispatch(ctx context.Context, d *Daemon, p api.ImageRegisterParams) (api.ImageShowResult, error) {
+	image, err := d.img.RegisterImage(ctx, p)
+	return api.ImageShowResult{Image: image}, err
+}
+
+func imagePromoteDispatch(ctx context.Context, d *Daemon, p api.ImageRefParams) (api.ImageShowResult, error) {
+	image, err := d.img.PromoteImage(ctx, p.IDOrName)
+	return api.ImageShowResult{Image: image}, err
+}
+
+func imageDeleteDispatch(ctx context.Context, d *Daemon, p api.ImageRefParams) (api.ImageShowResult, error) {
+	image, err := d.img.DeleteImage(ctx, p.IDOrName)
+	return api.ImageShowResult{Image: image}, err
+}
+
+func imagePullDispatch(ctx context.Context, d *Daemon, p api.ImagePullParams) (api.ImageShowResult, error) {
+	image, err := d.img.PullImage(ctx, p)
+	return api.ImageShowResult{Image: image}, err
+}
+
+func imageCachePruneDispatch(ctx context.Context, d *Daemon, p api.ImageCachePruneParams) (api.ImageCachePruneResult, error) {
+	return d.img.PruneOCICache(ctx, p)
+}
+
+func daemonOperationsListDispatch(ctx context.Context, d *Daemon) (api.OperationsListResult, error) {
+	return d.ListOperations(ctx)
+}
+
+func kernelListDispatch(ctx context.Context, d *Daemon) (api.KernelListResult, error) {
+	return d.img.KernelList(ctx)
+}
+
+func kernelShowDispatch(ctx context.Context, d *Daemon, p api.KernelRefParams) (api.KernelShowResult, error) {
+	entry, err := d.img.KernelShow(ctx, p.Name)
+	return api.KernelShowResult{Entry: entry}, err
+}
+
+func kernelDeleteDispatch(ctx context.Context, d *Daemon, p api.KernelRefParams) (api.Empty, error) {
+	return api.Empty{}, d.img.KernelDelete(ctx, p.Name)
+}
+
+func kernelImportDispatch(ctx context.Context, d *Daemon, p api.KernelImportParams) (api.KernelShowResult, error) {
+	entry, err := d.img.KernelImport(ctx, p)
+	return api.KernelShowResult{Entry: entry}, err
+}
+
+func kernelPullDispatch(ctx context.Context, d *Daemon, p api.KernelPullParams) (api.KernelShowResult, error) {
+	entry, err := d.img.KernelPull(ctx, p)
+	return api.KernelShowResult{Entry: entry}, err
+}
+
+func kernelCatalogDispatch(ctx context.Context, d *Daemon) (api.KernelCatalogResult, error) {
+	return d.img.KernelCatalog(ctx)
+}
+
+// ---- Special-case handlers: pre-service validation, custom error
+// ---- codes, or raw rpc.NewResult encoding — things the generic
+// ---- wrapper can't express.
+
+// pingHandler is info-only: no service call, just a snapshot of
+// build metadata. Raw rpc.NewResult to match the pre-refactor
+// encoding; marshalResultOrError would over-wrap this.
+func pingHandler(_ context.Context, d *Daemon, _ rpc.Request) rpc.Response {
+	info := buildinfo.Current()
+	result, _ := rpc.NewResult(api.PingResult{
+		Status:  "ok",
+		PID:     d.pid,
+		Version: info.Version,
+		Commit:  info.Commit,
+		BuiltAt: info.BuiltAt,
+	})
+	return result
+}
+
+// shutdownHandler triggers async daemon shutdown. `d.Close` runs in
+// a goroutine so the RPC response reaches the client before the
+// listener closes.
+func shutdownHandler(_ context.Context, d *Daemon, _ rpc.Request) rpc.Response {
+	go d.Close()
+	result, _ := rpc.NewResult(api.ShutdownResult{Status: "stopping"})
+	return result
+}
+
+// vmLogsHandler needs the "not_found" error code (distinct from
+// "operation_failed") when FindVM misses, so the CLI can print a
+// cleaner message. The generic paramHandler maps every service err
+// to "operation_failed".
+func vmLogsHandler(ctx context.Context, d *Daemon, req rpc.Request) rpc.Response {
+	params, err := rpc.DecodeParams[api.VMRefParams](req)
+	if err != nil {
+		return rpc.NewError("bad_request", err.Error())
+	}
+	vm, err := d.vm.FindVM(ctx, params.IDOrName)
+	if err != nil {
+		return rpc.NewError("not_found", err.Error())
+	}
+	return marshalResultOrError(api.VMLogsResult{LogPath: vm.Runtime.LogPath}, nil)
+}
+
+// vmSSHHandler does two pre-service validations: FindVM / TouchVM
+// for "not_found", then vmAlive for "not_running". Both distinct
+// error codes feed cleaner CLI output.
+func vmSSHHandler(ctx context.Context, d *Daemon, req rpc.Request) rpc.Response {
+	params, err := rpc.DecodeParams[api.VMRefParams](req)
+	if err != nil {
+		return rpc.NewError("bad_request", err.Error())
+	}
+	vm, err := d.vm.TouchVM(ctx, params.IDOrName)
+	if err != nil {
+		return rpc.NewError("not_found", err.Error())
+	}
+	if !d.vm.vmAlive(vm) {
+		return rpc.NewError("not_running", fmt.Sprintf("vm %s is not running", vm.Name))
+	}
+	return marshalResultOrError(api.VMSSHResult{Name: vm.Name, GuestIP: vm.Runtime.GuestIP}, nil)
+}
--- a/internal/daemon/dispatch_test.go
+++ b/internal/daemon/dispatch_test.go
@ -0,0 +1,143 @@
+package daemon
+
+import (
+	"context"
+	"sort"
+	"strings"
+	"testing"
+
+	"banger/internal/rpc"
+)
+
+// TestRPCHandlersMatchDocumentedMethods pins the surface of the RPC
+// table: adding or removing a method should be an explicit, reviewable
+// change. If the keyset drifts and this test isn't updated alongside,
+// that's a red flag — either the documented list is stale, or a
+// method sneaked in without being discussed.
+//
+// The expected list is the single source of truth for "methods
+// banger speaks." Any production code consulting it (CLI completions,
+// docs generator) can grep this test.
+func TestRPCHandlersMatchDocumentedMethods(t *testing.T) {
+	expected := []string{
+		"image.cache.prune",
+		"image.delete",
+		"image.list",
+		"image.promote",
+		"image.pull",
+		"image.register",
+		"image.show",
+
+		"kernel.catalog",
+		"kernel.delete",
+		"kernel.import",
+		"kernel.list",
+		"kernel.pull",
+		"kernel.show",
+
+		"daemon.operations.list",
+
+		"ping",
+		"shutdown",
+
+		"vm.create",
+		"vm.create.begin",
+		"vm.create.cancel",
+		"vm.create.status",
+		"vm.delete",
+		"vm.health",
+		"vm.kill",
+		"vm.list",
+		"vm.logs",
+		"vm.ping",
+		"vm.ports",
+		"vm.restart",
+		"vm.set",
+		"vm.show",
+		"vm.ssh",
+		"vm.start",
+		"vm.stats",
+		"vm.stop",
+
+		"vm.workspace.export",
+		"vm.workspace.prepare",
+	}
+
+	got := make([]string, 0, len(rpcHandlers))
+	for name := range rpcHandlers {
+		got = append(got, name)
+	}
+	sort.Strings(got)
+	sort.Strings(expected)
+
+	if len(got) != len(expected) {
+		t.Fatalf("method count: got %d, want %d\n  got:  %v\n  want: %v", len(got), len(expected), got, expected)
+	}
+	for i := range expected {
+		if got[i] != expected[i] {
+			t.Fatalf("method[%d]: got %q, want %q\n  full got:  %v\n  full want: %v", i, got[i], expected[i], got, expected)
+		}
+	}
+}
+
+// TestRPCHandlersAllNonNil catches a silly-but-possible footgun:
+// registering a method with a nil function literal.
+func TestRPCHandlersAllNonNil(t *testing.T) {
+	for name, h := range rpcHandlers {
+		if h == nil {
+			t.Errorf("rpcHandlers[%q] = nil", name)
+		}
+	}
+}
+
+// TestDispatchStampsOpIDOnError pins the contract that every error
+// response leaving dispatch carries an op_id, even on the
+// short-circuit paths (bad_version, unknown_method) that never
+// reach a handler. Operators rely on this id to correlate a CLI
+// failure to a daemon log line.
+func TestDispatchStampsOpIDOnError(t *testing.T) {
+	d := &Daemon{}
+	t.Run("unknown_method", func(t *testing.T) {
+		resp := d.dispatch(context.Background(), rpc.Request{Version: rpc.Version, Method: "no.such.method"})
+		if resp.OK {
+			t.Fatalf("expected error response, got %+v", resp)
+		}
+		if resp.Error == nil || resp.Error.Code != "unknown_method" {
+			t.Fatalf("error = %+v, want unknown_method", resp.Error)
+		}
+		if !strings.HasPrefix(resp.Error.OpID, "op-") {
+			t.Fatalf("op_id = %q, want op-* prefix", resp.Error.OpID)
+		}
+	})
+	t.Run("bad_version", func(t *testing.T) {
+		resp := d.dispatch(context.Background(), rpc.Request{Version: rpc.Version + 99, Method: "ping"})
+		if resp.OK {
+			t.Fatalf("expected error response, got %+v", resp)
+		}
+		if resp.Error == nil || resp.Error.Code != "bad_version" {
+			t.Fatalf("error = %+v, want bad_version", resp.Error)
+		}
+		if !strings.HasPrefix(resp.Error.OpID, "op-") {
+			t.Fatalf("op_id = %q, want op-* prefix", resp.Error.OpID)
+		}
+	})
+}
+
+// TestDispatchPropagatesOpIDFromContext covers the case where a
+// handler returns its own rpc.NewError with an empty op_id (most
+// service errors do); the dispatch wrapper must stamp the
+// dispatch-generated id on the way out.
+func TestDispatchPropagatesOpIDFromContext(t *testing.T) {
+	d := &Daemon{
+		requestHandler: func(_ context.Context, _ rpc.Request) rpc.Response {
+			return rpc.NewError("operation_failed", "deliberate test failure")
+		},
+	}
+	resp := d.dispatch(context.Background(), rpc.Request{Version: rpc.Version, Method: "anything"})
+	if resp.OK || resp.Error == nil {
+		t.Fatalf("expected error response, got %+v", resp)
+	}
+	if !strings.HasPrefix(resp.Error.OpID, "op-") {
+		t.Fatalf("dispatch did not stamp op_id: %+v", resp.Error)
+	}
+}
--- a/internal/daemon/dmsnap/dmsnap.go
+++ b/internal/daemon/dmsnap/dmsnap.go
@ -0,0 +1,128 @@
+// Package dmsnap wraps the host-side device-mapper snapshot operations used
+// to give each VM a copy-on-write view over a shared rootfs image. It issues
+// losetup/dmsetup via a system.CommandRunner-compatible runner.
+package dmsnap
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"strings"
+	"time"
+)
+
+// Runner is the narrow command-runner surface dmsnap needs. system.Runner
+// satisfies it.
+type Runner interface {
+	RunSudo(ctx context.Context, args ...string) ([]byte, error)
+}
+
+// Handles records the loop devices and dm target allocated for a snapshot.
+// Callers pass it back to Cleanup to unwind in the right order.
+type Handles struct {
+	BaseLoop string
+	COWLoop  string
+	DMName   string
+	DMDev    string
+}
+
+// Create sets up a dm-snapshot named dmName layering cowPath over rootfsPath.
+// On failure it cleans up whatever it had attached so far.
+func Create(ctx context.Context, runner Runner, rootfsPath, cowPath, dmName string) (handles Handles, err error) {
+	defer func() {
+		if err == nil {
+			return
+		}
+		if cleanupErr := Cleanup(context.Background(), runner, handles); cleanupErr != nil {
+			err = errors.Join(err, cleanupErr)
+		}
+	}()
+
+	baseBytes, err := runner.RunSudo(ctx, "losetup", "-f", "--show", "--read-only", rootfsPath)
+	if err != nil {
+		return handles, err
+	}
+	handles.BaseLoop = strings.TrimSpace(string(baseBytes))
+
+	cowBytes, err := runner.RunSudo(ctx, "losetup", "-f", "--show", cowPath)
+	if err != nil {
+		return handles, err
+	}
+	handles.COWLoop = strings.TrimSpace(string(cowBytes))
+
+	sectorsBytes, err := runner.RunSudo(ctx, "blockdev", "--getsz", handles.BaseLoop)
+	if err != nil {
+		return handles, err
+	}
+	sectors := strings.TrimSpace(string(sectorsBytes))
+
+	if _, err := runner.RunSudo(ctx, "dmsetup", "create", dmName, "--table", fmt.Sprintf("0 %s snapshot %s %s P 8", sectors, handles.BaseLoop, handles.COWLoop)); err != nil {
+		return handles, err
+	}
+	handles.DMName = dmName
+	handles.DMDev = "/dev/mapper/" + dmName
+	return handles, nil
+}
+
+// Cleanup tears down a snapshot: remove the dm target, then detach the loops.
+// Missing-handle errors (already cleaned up) are ignored.
+func Cleanup(ctx context.Context, runner Runner, handles Handles) error {
+	var cleanupErr error
+
+	switch {
+	case handles.DMName != "":
+		if err := Remove(ctx, runner, handles.DMName); err != nil {
+			cleanupErr = errors.Join(cleanupErr, err)
+		}
+	case handles.DMDev != "":
+		if err := Remove(ctx, runner, handles.DMDev); err != nil {
+			cleanupErr = errors.Join(cleanupErr, err)
+		}
+	}
+
+	if handles.COWLoop != "" {
+		if _, err := runner.RunSudo(ctx, "losetup", "-d", handles.COWLoop); err != nil {
+			if !isMissing(err) {
+				cleanupErr = errors.Join(cleanupErr, err)
+			}
+		}
+	}
+	if handles.BaseLoop != "" {
+		if _, err := runner.RunSudo(ctx, "losetup", "-d", handles.BaseLoop); err != nil {
+			if !isMissing(err) {
+				cleanupErr = errors.Join(cleanupErr, err)
+			}
+		}
+	}
+
+	return cleanupErr
+}
+
+// Remove retries dmsetup remove while the device is briefly busy after
+// detach. Missing targets succeed.
+func Remove(ctx context.Context, runner Runner, target string) error {
+	deadline := time.Now().Add(15 * time.Second)
+	for {
+		if _, err := runner.RunSudo(ctx, "dmsetup", "remove", target); err != nil {
+			if isMissing(err) {
+				return nil
+			}
+			if strings.Contains(err.Error(), "Device or resource busy") && time.Now().Before(deadline) {
+				time.Sleep(100 * time.Millisecond)
+				continue
+			}
+			return err
+		}
+		return nil
+	}
+}
+
+func isMissing(err error) bool {
+	if err == nil {
+		return false
+	}
+	msg := err.Error()
+	return strings.Contains(msg, "No such device or address") ||
+		strings.Contains(msg, "not found") ||
+		strings.Contains(msg, "does not exist")
+}
--- a/internal/daemon/dmsnap/dmsnap_test.go
+++ b/internal/daemon/dmsnap/dmsnap_test.go
@ -0,0 +1,288 @@
+package dmsnap
+
+import (
+	"context"
+	"errors"
+	"strings"
+	"testing"
+)
+
+// scriptedRunner records every RunSudo call's argv and plays back a
+// scripted sequence of (out, err) responses. Going past the script is
+// a fatal error so an unexpected extra call shows up clearly. Mirrors
+// the pattern used by internal/daemon/fcproc/fcproc_test.go but stays
+// local to dmsnap (this is a leaf package).
+type scriptedRunner struct {
+	t       *testing.T
+	scripts []scriptedReply
+	calls   [][]string
+}
+
+type scriptedReply struct {
+	out []byte
+	err error
+}
+
+func (r *scriptedRunner) RunSudo(_ context.Context, args ...string) ([]byte, error) {
+	r.t.Helper()
+	r.calls = append(r.calls, append([]string(nil), args...))
+	if len(r.scripts) == 0 {
+		r.t.Fatalf("unexpected RunSudo call %d: %v", len(r.calls), args)
+	}
+	step := r.scripts[0]
+	r.scripts = r.scripts[1:]
+	return step.out, step.err
+}
+
+func argsContain(args []string, want ...string) bool {
+	if len(args) < len(want) {
+		return false
+	}
+	for i, w := range want {
+		if args[i] != w {
+			return false
+		}
+	}
+	return true
+}
+
+// TestCreateOrdersOpsAndPopulatesHandles pins the four-step setup
+// sequence Create runs in: losetup base (read-only), losetup cow,
+// blockdev getsz, dmsetup create with a snapshot table. If the order
+// drifts the helper would build dm targets backed by the wrong
+// device, which silently corrupts every VM that uses the snapshot.
+func TestCreateOrdersOpsAndPopulatesHandles(t *testing.T) {
+	runner := &scriptedRunner{
+		t: t,
+		scripts: []scriptedReply{
+			{out: []byte("/dev/loop0\n")}, // losetup -f --show --read-only rootfs
+			{out: []byte("/dev/loop1\n")}, // losetup -f --show cow
+			{out: []byte("16384\n")},      // blockdev --getsz /dev/loop0
+			{},                            // dmsetup create
+		},
+	}
+
+	handles, err := Create(context.Background(), runner, "/state/rootfs.ext4", "/state/cow.img", "fc-rootfs-test")
+	if err != nil {
+		t.Fatalf("Create: %v", err)
+	}
+
+	if len(runner.calls) != 4 {
+		t.Fatalf("got %d RunSudo calls, want 4", len(runner.calls))
+	}
+	if !argsContain(runner.calls[0], "losetup", "-f", "--show", "--read-only", "/state/rootfs.ext4") {
+		t.Fatalf("call 0 = %v, want read-only losetup of rootfs", runner.calls[0])
+	}
+	if !argsContain(runner.calls[1], "losetup", "-f", "--show", "/state/cow.img") {
+		t.Fatalf("call 1 = %v, want losetup of cow", runner.calls[1])
+	}
+	if !argsContain(runner.calls[2], "blockdev", "--getsz", "/dev/loop0") {
+		t.Fatalf("call 2 = %v, want blockdev getsz on base loop", runner.calls[2])
+	}
+	if !argsContain(runner.calls[3], "dmsetup", "create", "fc-rootfs-test") {
+		t.Fatalf("call 3 = %v, want dmsetup create of dm name", runner.calls[3])
+	}
+	// The snapshot table must reference the base + cow loops in that
+	// order. Pin it so a future refactor can't accidentally swap them
+	// (which would make the COW the read-only side and corrupt every
+	// write).
+	tableArg := runner.calls[3][len(runner.calls[3])-1]
+	if !strings.Contains(tableArg, "snapshot /dev/loop0 /dev/loop1") {
+		t.Fatalf("dmsetup table = %q, want 'snapshot /dev/loop0 /dev/loop1'", tableArg)
+	}
+
+	if handles.BaseLoop != "/dev/loop0" || handles.COWLoop != "/dev/loop1" {
+		t.Fatalf("loops = %+v, want base=loop0 cow=loop1", handles)
+	}
+	if handles.DMName != "fc-rootfs-test" || handles.DMDev != "/dev/mapper/fc-rootfs-test" {
+		t.Fatalf("dm names = %+v, want fc-rootfs-test", handles)
+	}
+}
+
+// TestCreateFailureRunsCleanup verifies that a partial setup is
+// unwound on failure: if dmsetup create fails after both loops are
+// attached, Create must release them via losetup -d before returning.
+// Without this the host accumulates orphan loop devices on every
+// failed VM start.
+func TestCreateFailureRunsCleanup(t *testing.T) {
+	dmCreateErr := errors.New("dmsetup table refused")
+	runner := &scriptedRunner{
+		t: t,
+		scripts: []scriptedReply{
+			{out: []byte("/dev/loop0\n")}, // losetup base
+			{out: []byte("/dev/loop1\n")}, // losetup cow
+			{out: []byte("16384\n")},      // blockdev getsz
+			{err: dmCreateErr},            // dmsetup create fails
+			{},                            // cleanup: losetup -d /dev/loop1
+			{},                            // cleanup: losetup -d /dev/loop0
+		},
+	}
+
+	_, err := Create(context.Background(), runner, "/state/rootfs.ext4", "/state/cow.img", "fc-rootfs-test")
+	if !errors.Is(err, dmCreateErr) {
+		t.Fatalf("Create error = %v, want dmsetup error to bubble", err)
+	}
+	if len(runner.calls) != 6 {
+		t.Fatalf("got %d RunSudo calls, want 6 (4 setup + 2 cleanup)", len(runner.calls))
+	}
+	// Cleanup order: cow first, then base, mirroring stack unwind.
+	if !argsContain(runner.calls[4], "losetup", "-d", "/dev/loop1") {
+		t.Fatalf("call 4 = %v, want losetup -d on cow loop", runner.calls[4])
+	}
+	if !argsContain(runner.calls[5], "losetup", "-d", "/dev/loop0") {
+		t.Fatalf("call 5 = %v, want losetup -d on base loop", runner.calls[5])
+	}
+}
+
+// TestCleanupOrdersDmsetupBeforeLosetup pins the destruction order:
+// the dm target must come down BEFORE the loops it sits on are
+// detached, otherwise dmsetup remove sees EBUSY because the target's
+// backing devices vanished mid-flight.
+func TestCleanupOrdersDmsetupBeforeLosetup(t *testing.T) {
+	runner := &scriptedRunner{
+		t: t,
+		scripts: []scriptedReply{
+			{}, // dmsetup remove fc-rootfs-test
+			{}, // losetup -d cow
+			{}, // losetup -d base
+		},
+	}
+
+	handles := Handles{
+		BaseLoop: "/dev/loop0",
+		COWLoop:  "/dev/loop1",
+		DMName:   "fc-rootfs-test",
+		DMDev:    "/dev/mapper/fc-rootfs-test",
+	}
+	if err := Cleanup(context.Background(), runner, handles); err != nil {
+		t.Fatalf("Cleanup: %v", err)
+	}
+	if len(runner.calls) != 3 {
+		t.Fatalf("got %d RunSudo calls, want 3", len(runner.calls))
+	}
+	if !argsContain(runner.calls[0], "dmsetup", "remove", "fc-rootfs-test") {
+		t.Fatalf("call 0 = %v, want dmsetup remove first", runner.calls[0])
+	}
+	if !argsContain(runner.calls[1], "losetup", "-d", "/dev/loop1") {
+		t.Fatalf("call 1 = %v, want cow loop detach second", runner.calls[1])
+	}
+	if !argsContain(runner.calls[2], "losetup", "-d", "/dev/loop0") {
+		t.Fatalf("call 2 = %v, want base loop detach last", runner.calls[2])
+	}
+}
+
+// TestCleanupFallsBackToDMDevWhenNameEmpty covers the "we only know
+// the /dev/mapper path" branch — Remove accepts either form, and
+// Cleanup picks DMDev when DMName isn't recorded (older state files
+// only stored the path).
+func TestCleanupFallsBackToDMDevWhenNameEmpty(t *testing.T) {
+	runner := &scriptedRunner{
+		t: t,
+		scripts: []scriptedReply{
+			{}, // dmsetup remove /dev/mapper/fc-rootfs-test
+			{}, // losetup -d cow
+			{}, // losetup -d base
+		},
+	}
+	handles := Handles{
+		BaseLoop: "/dev/loop0",
+		COWLoop:  "/dev/loop1",
+		DMDev:    "/dev/mapper/fc-rootfs-test",
+		// DMName intentionally empty.
+	}
+	if err := Cleanup(context.Background(), runner, handles); err != nil {
+		t.Fatalf("Cleanup: %v", err)
+	}
+	if !argsContain(runner.calls[0], "dmsetup", "remove", "/dev/mapper/fc-rootfs-test") {
+		t.Fatalf("call 0 = %v, want dmsetup remove of DMDev path", runner.calls[0])
+	}
+}
+
+// TestCleanupTolerantOfMissingLoops pins the idempotency contract:
+// running cleanup against handles whose loops are already detached
+// (e.g. a daemon crash mid-cleanup, then a second pass) returns nil
+// rather than failing. dmsnap.isMissing recognises kernel/losetup's
+// "No such device" wording.
+func TestCleanupTolerantOfMissingLoops(t *testing.T) {
+	missing := errors.New("losetup: /dev/loop1: No such device or address")
+	runner := &scriptedRunner{
+		t: t,
+		scripts: []scriptedReply{
+			{},             // dmsetup remove ok
+			{err: missing}, // losetup -d cow: already gone
+			{err: missing}, // losetup -d base: already gone
+		},
+	}
+	handles := Handles{
+		BaseLoop: "/dev/loop0",
+		COWLoop:  "/dev/loop1",
+		DMName:   "fc-rootfs-test",
+	}
+	if err := Cleanup(context.Background(), runner, handles); err != nil {
+		t.Fatalf("Cleanup: %v, want nil for already-gone loops", err)
+	}
+}
+
+// TestCleanupSurfacesUnexpectedLoopErrors confirms that NON-missing
+// errors do bubble up — the idempotency guard is narrow on purpose,
+// so an EBUSY or permission error from losetup actually fails the
+// cleanup.
+func TestCleanupSurfacesUnexpectedLoopErrors(t *testing.T) {
+	wedged := errors.New("losetup: /dev/loop1: device is busy")
+	runner := &scriptedRunner{
+		t: t,
+		scripts: []scriptedReply{
+			{},
+			{err: wedged},
+			{},
+		},
+	}
+	handles := Handles{
+		BaseLoop: "/dev/loop0",
+		COWLoop:  "/dev/loop1",
+		DMName:   "fc-rootfs-test",
+	}
+	err := Cleanup(context.Background(), runner, handles)
+	if !errors.Is(err, wedged) {
+		t.Fatalf("Cleanup error = %v, want busy error to bubble", err)
+	}
+}
+
+// TestRemoveReturnsNilOnMissingTarget mirrors the loop-cleanup
+// idempotency guard: an absent dm target is the desired end state, so
+// Remove returns nil without retrying.
+func TestRemoveReturnsNilOnMissingTarget(t *testing.T) {
+	missing := errors.New("dmsetup: target not found")
+	runner := &scriptedRunner{
+		t: t,
+		scripts: []scriptedReply{
+			{err: missing},
+		},
+	}
+	if err := Remove(context.Background(), runner, "fc-rootfs-test"); err != nil {
+		t.Fatalf("Remove: %v, want nil for missing target", err)
+	}
+	if len(runner.calls) != 1 {
+		t.Fatalf("got %d RunSudo calls, want 1 (missing should not retry)", len(runner.calls))
+	}
+}
+
+// TestRemoveBubblesNonRetryableErrors covers the third Remove branch:
+// errors that aren't busy and aren't missing must surface immediately
+// so the daemon can record the failure and clean up by other means.
+func TestRemoveBubblesNonRetryableErrors(t *testing.T) {
+	denied := errors.New("dmsetup: permission denied")
+	runner := &scriptedRunner{
+		t: t,
+		scripts: []scriptedReply{
+			{err: denied},
+		},
+	}
+	err := Remove(context.Background(), runner, "fc-rootfs-test")
+	if !errors.Is(err, denied) {
+		t.Fatalf("Remove error = %v, want permission error to bubble", err)
+	}
+	if len(runner.calls) != 1 {
+		t.Fatalf("got %d RunSudo calls, want 1 (permission error should not retry)", len(runner.calls))
+	}
+}
--- a/internal/daemon/dns_routing.go
+++ b/internal/daemon/dns_routing.go
@ -0,0 +1,47 @@
+package daemon
+
+import (
+	"context"
+	"strings"
+)
+
+const vmResolverRouteDomain = "~vm"
+
+func (n *HostNetwork) syncVMDNSResolverRouting(ctx context.Context) error {
+	if n == nil || n.vmDNS == nil {
+		return nil
+	}
+	if strings.TrimSpace(n.config.BridgeName) == "" {
+		return nil
+	}
+	if _, err := n.lookupExecutable("resolvectl"); err != nil {
+		return nil
+	}
+	if _, err := n.runner.Run(ctx, "ip", "link", "show", n.config.BridgeName); err != nil {
+		return nil
+	}
+	serverAddr := strings.TrimSpace(n.vmDNSAddr(n.vmDNS))
+	if serverAddr == "" {
+		return nil
+	}
+	return n.privOps().SyncResolverRouting(ctx, serverAddr)
+}
+
+func (n *HostNetwork) clearVMDNSResolverRouting(ctx context.Context) error {
+	if n == nil || strings.TrimSpace(n.config.BridgeName) == "" {
+		return nil
+	}
+	if _, err := n.lookupExecutable("resolvectl"); err != nil {
+		return nil
+	}
+	if _, err := n.runner.Run(ctx, "ip", "link", "show", n.config.BridgeName); err != nil {
+		return nil
+	}
+	return n.privOps().ClearResolverRouting(ctx)
+}
+
+func (n *HostNetwork) ensureVMDNSResolverRouting(ctx context.Context) {
+	if err := n.syncVMDNSResolverRouting(ctx); err != nil && n.logger != nil {
+		n.logger.Warn("vm dns resolver route sync failed", "bridge", n.config.BridgeName, "error", err.Error())
+	}
+}
--- a/internal/daemon/dns_routing_test.go
+++ b/internal/daemon/dns_routing_test.go
@ -0,0 +1,62 @@
+package daemon
+
+import (
+	"context"
+	"testing"
+
+	"banger/internal/model"
+	"banger/internal/vmdns"
+)
+
+func TestSyncVMDNSResolverRoutingConfiguresResolved(t *testing.T) {
+	runner := &scriptedRunner{
+		t: t,
+		steps: []runnerStep{
+			{call: runnerCall{name: "ip", args: []string{"link", "show", model.DefaultBridgeName}}, out: []byte("1: br-fc\n")},
+			sudoStep("", nil, "resolvectl", "dns", model.DefaultBridgeName, "127.0.0.1:42069"),
+			sudoStep("", nil, "resolvectl", "domain", model.DefaultBridgeName, vmResolverRouteDomain),
+			sudoStep("", nil, "resolvectl", "default-route", model.DefaultBridgeName, "no"),
+		},
+	}
+	cfg := model.DaemonConfig{BridgeName: model.DefaultBridgeName}
+	n := &HostNetwork{
+		runner: runner, config: cfg, vmDNS: new(vmdns.Server),
+		lookupExecutable: func(name string) (string, error) {
+			if name == "resolvectl" {
+				return "/usr/bin/resolvectl", nil
+			}
+			return "", nil
+		},
+		vmDNSAddr: func(*vmdns.Server) string { return "127.0.0.1:42069" },
+	}
+
+	if err := n.syncVMDNSResolverRouting(context.Background()); err != nil {
+		t.Fatalf("syncVMDNSResolverRouting: %v", err)
+	}
+	runner.assertExhausted()
+}
+
+func TestClearVMDNSResolverRoutingRevertsBridgeConfig(t *testing.T) {
+	runner := &scriptedRunner{
+		t: t,
+		steps: []runnerStep{
+			{call: runnerCall{name: "ip", args: []string{"link", "show", model.DefaultBridgeName}}, out: []byte("1: br-fc\n")},
+			sudoStep("", nil, "resolvectl", "revert", model.DefaultBridgeName),
+		},
+	}
+	cfg := model.DaemonConfig{BridgeName: model.DefaultBridgeName}
+	n := &HostNetwork{
+		runner: runner, config: cfg,
+		lookupExecutable: func(name string) (string, error) {
+			if name == "resolvectl" {
+				return "/usr/bin/resolvectl", nil
+			}
+			return "", nil
+		},
+	}
+
+	if err := n.clearVMDNSResolverRouting(context.Background()); err != nil {
+		t.Fatalf("clearVMDNSResolverRouting: %v", err)
+	}
+	runner.assertExhausted()
+}
--- a/internal/daemon/doc.go
+++ b/internal/daemon/doc.go
@ -0,0 +1,87 @@
+// Package daemon hosts the Banger owner-daemon process.
+//
+// The daemon exposes a JSON-RPC endpoint over a Unix socket. The
+// *Daemon type is a thin composition root: it holds shared
+// infrastructure (store, runner, logger, layout, config, listener,
+// privileged-ops adapter) plus pointers to four focused services and
+// forwards RPCs to them.
+//
+// On the supported systemd install path, this package runs inside
+// `bangerd.service` as the configured owner user and delegates
+// privileged host-kernel operations to `bangerd-root.service` through
+// the privileged-ops seam. Non-system/dev paths use the same seam with
+// an in-process adapter instead.
+//
+// Services:
+//
+//	*HostNetwork      Bridge / tap pool / NAT / DNS / firecracker
+//	                  process / DM snapshots / vsock readiness.
+//	                  Owns tapPool and vmDNS.
+//	*ImageService     Register / promote / delete / pull (bundle +
+//	                  OCI) / kernel catalog / managed-seed refresh.
+//	                  Owns imageOpsMu.
+//	*WorkspaceService workspace.prepare / workspace.export + the
+//	                  per-VM authorised-key and git-identity sync
+//	                  that runs at start. Owns workspaceLocks.
+//	*VMService        VM lifecycle (create/start/stop/restart/kill/
+//	                  delete/set), stats, ports, preflight. Owns
+//	                  vmLocks, createVMMu, createOps, handles.
+//
+// Subpackages (stateless helpers):
+//
+//	internal/daemon/opstate    Generic Registry[T AsyncOp].
+//	internal/daemon/dmsnap     Device-mapper COW snapshot lifecycle.
+//	internal/daemon/fcproc     Firecracker process helpers.
+//	internal/daemon/imagemgr   Image subsystem helpers.
+//	internal/daemon/workspace  Workspace helpers.
+//
+// File inventory:
+//
+//	daemon.go            Composition root, Open/Close/Serve, dispatch,
+//	                     reconcile orchestrator, backgroundLoop.
+//	host_network.go      HostNetwork struct + constructor.
+//	image_service.go     ImageService struct + constructor + FindImage.
+//	workspace_service.go WorkspaceService struct + constructor.
+//	vm_service.go        VMService struct + constructor + FindVM,
+//	                     TouchVM, withVMLock* family, lockVMID.
+//
+//	nat.go, dns_routing.go, tap_pool.go, snapshot.go   HostNetwork methods.
+//	images.go, images_pull.go, image_seed.go, kernels.go   ImageService methods.
+//	workspace.go, vm_authsync.go                       WorkspaceService methods.
+//	vm_lifecycle.go, vm_create.go, vm_create_ops.go,
+//	vm_stats.go, vm_set.go, vm_disk.go, vm_handles.go,
+//	ports.go, preflight.go                             VMService methods.
+//
+//	vm.go                Cross-service constants, rebuildDNS /
+//	                     cleanupRuntime / generateName (*VMService),
+//	                     and small stateless utilities.
+//	capabilities.go      Pluggable capability hooks executed at VM
+//	                     start. Each capability is a plain struct
+//	                     with explicit service-pointer fields
+//	                     (workDiskCapability carries vm+ws+store,
+//	                     dnsCapability carries net, natCapability
+//	                     carries vm+net+logger). wireServices builds
+//	                     the default list; VMService invokes hooks
+//	                     through a capabilityHooks seam. No hook
+//	                     reaches back to *Daemon.
+//	vm_locks.go          vmLockSet primitive.
+//	guest_ssh.go         guestSSHClient, dialGuest, waitForGuestSSH.
+//	ssh_client_config.go Daemon-managed SSH client key material.
+//	doctor.go            Host diagnostics.
+//	logger.go            slog configuration.
+//	runtime_assets.go    Companion-binary paths.
+//
+// Lock ordering:
+//
+//	VMService.vmLocks[id]  →  WorkspaceService.workspaceLocks[id]
+//	                       →  {VMService.createVMMu, ImageService.imageOpsMu}
+//	                       →  subsystem-local locks
+//
+// vmLocks[id] and workspaceLocks[id] are NEVER held at the same
+// time. workspace.prepare acquires vmLocks[id] only long enough to
+// validate VM state, releases it, then acquires workspaceLocks[id]
+// for the slow guest I/O phase. Lifecycle ops (start/stop/delete/
+// set) hold vmLocks[id] across the whole flow. Subsystem-local
+// locks (tapPool.mu, opstate.Registry mu, handleCache.mu) are
+// leaves. See ARCHITECTURE.md for details.
+package daemon
--- a/internal/daemon/doctor.go
+++ b/internal/daemon/doctor.go
@ -3,99 +3,598 @@ package daemon
 import (
 	"context"
 	"fmt"
+	"os"
+	"path/filepath"
+	"runtime"
 	"strings"
+	"syscall"

+	"time"
+
+	"banger/internal/buildinfo"
 	"banger/internal/config"
+	"banger/internal/firecracker"
+	"banger/internal/imagecat"
+	"banger/internal/installmeta"
 	"banger/internal/model"
 	"banger/internal/paths"
+	"banger/internal/store"
 	"banger/internal/system"
 )

+// systemdSystemDir is the path systemd reads enabled units from. Pulled
+// out as a var (not a const) so the security-posture tests can swap it
+// for a tempdir without faking /etc/systemd/system on the test host.
+var systemdSystemDir = "/etc/systemd/system"
+
 func Doctor(ctx context.Context) (system.Report, error) {
-	layout, err := paths.Resolve()
+	userLayout, err := paths.Resolve()
 	if err != nil {
 		return system.Report{}, err
 	}
-	cfg, err := config.Load(layout)
+	cfg, err := config.Load(userLayout)
 	if err != nil {
 		return system.Report{}, err
 	}
+	layout := paths.ResolveSystem()
+	// Doctor must be read-only: running it should never mutate the
+	// state DB (no migrations, no WAL checkpoint, no pragma writes).
+	// Skip OpenReadOnly entirely when the DB file doesn't exist —
+	// that's a fresh install, not an error condition. The first
+	// daemon start will create the file. storeMissing differentiates
+	// "no DB yet" (pass) from "DB present but unreadable" (fail) in
+	// the report.
 	d := &Daemon{
-		layout: layout,
-		config: cfg,
-		runner: system.NewRunner(),
+		layout:     layout,
+		userLayout: userLayout,
+		config:     cfg,
+		runner:     system.NewRunner(),
 	}
-	return d.doctorReport(ctx), nil
+	var storeErr error
+	storeMissing := false
+	if _, statErr := os.Stat(layout.DBPath); statErr != nil {
+		if os.IsNotExist(statErr) {
+			storeMissing = true
+		} else {
+			storeErr = statErr
+		}
+	} else {
+		db, err := store.OpenReadOnly(layout.DBPath)
+		if err != nil {
+			storeErr = err
+		} else {
+			defer db.Close()
+			d.store = db
+		}
+	}
+	wireServices(d)
+	return d.doctorReport(ctx, storeErr, storeMissing), nil
 }

-func (d *Daemon) doctorReport(ctx context.Context) system.Report {
+func (d *Daemon) doctorReport(ctx context.Context, storeErr error, storeMissing bool) system.Report {
 	report := system.Report{}

-	report.AddPreflight("runtime bundle", d.runtimeBundleChecks(), runtimeBundleStatus(d.config))
+	addArchitectureCheck(&report)
+	addBangerVersionCheck(&report, installmeta.DefaultPath)
+
+	switch {
+	case storeMissing:
+		report.AddPass("state store", "will be created on first daemon start at "+d.layout.DBPath)
+	case storeErr != nil:
+		report.AddFail(
+			"state store",
+			fmt.Sprintf("open %s: %v", d.layout.DBPath, storeErr),
+			"remove or restore the file if corrupt; otherwise check its permissions",
+		)
+	default:
+		report.AddPass("state store", "readable at "+d.layout.DBPath)
+	}
+
+	report.AddPreflight("host runtime", d.runtimeChecks(), runtimeStatus(d.config))
 	report.AddPreflight("core vm lifecycle", d.coreVMLifecycleChecks(), "required host tools available")
-	report.AddPreflight("vsock guest agent", d.vsockChecks(), "vsock agent prerequisites available")
+	report.AddPreflight("vsock guest agent", d.vsockChecks(), "vsock guest agent prerequisites available")
+	d.addVMDefaultsCheck(&report)
+	d.addSSHShortcutCheck(&report)
 	d.addCapabilityDoctorChecks(ctx, &report)
-	report.AddPreflight("image build", d.imageBuildChecks(ctx), "image build prerequisites available")
+	d.addFirecrackerVersionCheck(ctx, &report)
+	d.addSecurityPostureChecks(ctx, &report)

 	return report
 }

-func (d *Daemon) runtimeBundleChecks() *system.Preflight {
-	checks := system.NewPreflight()
-	hint := paths.RuntimeBundleHint()
-	checks.RequireExecutable(d.config.FirecrackerBin, "firecracker binary", hint)
-	checks.RequireFile(d.config.SSHKeyPath, "runtime ssh private key", `refresh the runtime bundle`)
-	checks.RequireExecutable(d.config.VSockAgentPath, "vsock agent", `run 'make build' or refresh the runtime bundle`)
-	checks.RequireFile(d.config.DefaultRootfs, "default rootfs image", `set "default_rootfs" or refresh the runtime bundle`)
-	checks.RequireFile(d.config.DefaultKernel, "kernel image", `set "default_kernel" or refresh the runtime bundle`)
-	if strings.TrimSpace(d.config.DefaultInitrd) != "" {
-		checks.RequireFile(d.config.DefaultInitrd, "initrd image", `set "default_initrd" or refresh the runtime bundle`)
+// addFirecrackerVersionCheck verifies the configured firecracker
+// binary exists, is recent enough for banger's expectations
+// (firecracker.MinSupportedVersion), and surfaces a distro-aware
+// install hint if it's missing. Three outcomes:
+//
+//   - present + version in [Min, Tested]: PASS.
+//   - present + version above Tested: WARN. Newer firecracker
+//     usually works (the API is stable within a major), but it's
+//     outside banger's tested window.
+//   - present + version below Min: FAIL with the upgrade hint.
+//   - missing entirely: FAIL with a guess at the user's package
+//     manager plus the upstream Releases URL.
+//
+// We intentionally don't use the generic RequireExecutable preflight
+// for this check — its static hint string can't carry the distro
+// dispatch.
+func (d *Daemon) addFirecrackerVersionCheck(ctx context.Context, report *system.Report) {
+	binPath := strings.TrimSpace(d.config.FirecrackerBin)
+	if binPath == "" {
+		binPath = "firecracker"
 	}
-	if strings.TrimSpace(d.config.DefaultPackagesFile) != "" {
-		checks.RequireFile(d.config.DefaultPackagesFile, "package manifest", `set "default_packages_file" or refresh the runtime bundle`)
+	resolved, err := system.LookupExecutable(binPath)
+	if err != nil {
+		details := []string{fmt.Sprintf("not found: %s", binPath)}
+		details = append(details, firecrackerInstallHint(osReleaseSource)...)
+		report.AddFail("firecracker binary", details...)
+		return
+	}
+	parsed, err := firecracker.QueryVersion(ctx, d.runner, resolved)
+	if err != nil {
+		report.AddFail("firecracker binary",
+			fmt.Sprintf("`%s --version` failed: %v", resolved, err),
+			"reinstall firecracker; see https://github.com/firecracker-microvm/firecracker/releases")
+		return
+	}
+	reported := parsed.String()
+	min := firecracker.MustParseSemVer(firecracker.MinSupportedVersion)
+	tested := firecracker.MustParseSemVer(firecracker.KnownTestedVersion)
+	switch {
+	case parsed.Compare(min) < 0:
+		report.AddFail("firecracker binary",
+			fmt.Sprintf("%s at %s; banger requires ≥ v%s", reported, resolved, firecracker.MinSupportedVersion),
+			"upgrade firecracker — see https://github.com/firecracker-microvm/firecracker/releases")
+	case parsed.Compare(tested) > 0:
+		report.AddWarn("firecracker binary",
+			fmt.Sprintf("%s at %s (newer than banger's tested v%s; usually works)", reported, resolved, firecracker.KnownTestedVersion))
+	default:
+		report.AddPass("firecracker binary",
+			fmt.Sprintf("%s at %s (within tested range; min v%s, tested v%s)",
+				reported, resolved, firecracker.MinSupportedVersion, firecracker.KnownTestedVersion))
+	}
+}
+
+// osReleaseSource is the file the install-hint reads to detect the
+// host distro. Var rather than const so doctor tests can swap in a
+// fixture.
+var osReleaseSource = "/etc/os-release"
+
+// firecrackerInstallHint returns 1-2 detail lines describing how to
+// install firecracker on the current host: a one-line guess based on
+// /etc/os-release when the distro is recognised, plus the upstream
+// Releases URL as a universal fallback. Anything we can't recognise
+// gets only the URL — better silence than wrong instructions.
+func firecrackerInstallHint(osReleasePath string) []string {
+	hints := []string{}
+	if cmd := guessFirecrackerInstallCommand(osReleasePath); cmd != "" {
+		hints = append(hints, "install: "+cmd)
+	}
+	hints = append(hints, "or download a static binary from https://github.com/firecracker-microvm/firecracker/releases")
+	return hints
+}
+
+// guessFirecrackerInstallCommand reads osReleasePath and returns a
+// short, copy-pasteable install command for the detected distro, or
+// "" when no reliable mapping applies. We only suggest commands for
+// distros where firecracker is actually packaged — guessing wrong
+// here would send users on a wild goose chase.
+func guessFirecrackerInstallCommand(osReleasePath string) string {
+	data, err := os.ReadFile(osReleasePath)
+	if err != nil {
+		return ""
+	}
+	id, idLike := parseOSReleaseIDs(string(data))
+	candidates := append([]string{id}, strings.Fields(idLike)...)
+	for _, c := range candidates {
+		switch c {
+		case "debian":
+			// Packaged in Debian since trixie / bookworm-backports.
+			return "sudo apt install firecracker"
+		case "arch", "manjaro", "endeavouros":
+			// AUR; we don't assume a specific helper, but `paru` is the
+			// common one. Users who prefer yay/makepkg/etc. will
+			// substitute mentally.
+			return "paru -S firecracker  # or your preferred AUR helper"
+		case "nixos":
+			return "nix-env -iA nixos.firecracker  # or add to your configuration.nix"
+		}
+	}
+	return ""
+}
+
+// parseOSReleaseIDs extracts the ID and ID_LIKE values from an
+// /etc/os-release blob. Both are returned with surrounding quotes
+// stripped; missing keys return empty strings. We don't validate
+// the format beyond `KEY=value` — os-release is a simple format and
+// any drift would manifest as a quiet "no distro hint" rather than
+// a false positive.
+func parseOSReleaseIDs(content string) (id, idLike string) {
+	for _, line := range strings.Split(content, "\n") {
+		line = strings.TrimSpace(line)
+		if rest, ok := strings.CutPrefix(line, "ID="); ok {
+			id = strings.Trim(rest, `"`)
+		}
+		if rest, ok := strings.CutPrefix(line, "ID_LIKE="); ok {
+			idLike = strings.Trim(rest, `"`)
+		}
+	}
+	return id, idLike
+}
+
+// addSecurityPostureChecks verifies the install matches what
+// docs/privileges.md describes: helper + owner-daemon units active,
+// sockets at the expected mode/owner, unit files carrying the
+// hardening directives, and the firecracker binary owned by root +
+// non-writable. Drift between the doc and the running install would
+// silently weaken the trust model; surfacing it here makes the doc
+// load-bearing rather than aspirational.
+//
+// In non-system mode (no /etc/banger/install.toml) emits a single
+// warn pointing at the docs section that explains the looser dev-mode
+// trust model — a doctor PASS row in that mode would imply guarantees
+// the install isn't actually providing.
+func (d *Daemon) addSecurityPostureChecks(ctx context.Context, report *system.Report) {
+	d.addSecurityPostureChecksAt(ctx, report, installmeta.DefaultPath, systemdSystemDir)
+}
+
+// addSecurityPostureChecksAt is the seam tests use: pass a fake
+// install.toml + systemd dir to exercise the system-mode branch
+// without writing to /etc.
+func (d *Daemon) addSecurityPostureChecksAt(ctx context.Context, report *system.Report, installPath, systemdDir string) {
+	meta, err := installmeta.Load(installPath)
+	if err != nil {
+		report.AddWarn("security posture",
+			"running outside the system install (no "+installPath+")",
+			"helper SO_PEERCRED, narrow CapabilityBoundingSet, NoNewPrivileges, and ProtectSystem=strict are bypassed in this mode",
+			"see docs/privileges.md > 'Running outside the system install'; install via `sudo banger system install --owner $USER` for the supported trust model")
+		return
+	}
+	addServiceActiveCheck(ctx, d.runner, report, "helper service", installmeta.DefaultRootHelperService)
+	addServiceActiveCheck(ctx, d.runner, report, "owner daemon service", installmeta.DefaultService)
+	addSocketPermsCheck(report, "helper socket", installmeta.DefaultRootHelperSocketPath, meta.OwnerUID, 0o600)
+	addSocketPermsCheck(report, "daemon socket", installmeta.DefaultSocketPath, meta.OwnerUID, 0o600)
+	addUnitHardeningCheck(report, "helper unit hardening",
+		filepath.Join(systemdDir, installmeta.DefaultRootHelperService),
+		[]string{
+			"NoNewPrivileges=yes",
+			"ProtectSystem=strict",
+			"ProtectHome=yes",
+			"RestrictSUIDSGID=yes",
+			"LockPersonality=yes",
+			"CapabilityBoundingSet=",
+		})
+	addUnitHardeningCheck(report, "daemon unit hardening",
+		filepath.Join(systemdDir, installmeta.DefaultService),
+		[]string{
+			"User=" + meta.OwnerUser,
+			"NoNewPrivileges=yes",
+			"ProtectSystem=strict",
+			"ProtectHome=read-only",
+			"RestrictSUIDSGID=yes",
+			"LockPersonality=yes",
+		})
+	addExecutableOwnershipCheck(report, "firecracker binary ownership", d.config.FirecrackerBin)
+}
+
+// addServiceActiveCheck shells `systemctl is-active <svc>` and surfaces
+// the result. is-active exits non-zero for inactive/failed states but
+// always prints the state on stdout, so we read the trimmed output and
+// ignore the exit code. Anything other than "active" is a fail with a
+// systemctl-restart hint.
+func addServiceActiveCheck(ctx context.Context, runner system.CommandRunner, report *system.Report, name, service string) {
+	out, _ := runner.Run(ctx, "systemctl", "is-active", service)
+	state := strings.TrimSpace(string(out))
+	if state == "" {
+		state = "unknown"
+	}
+	if state == "active" {
+		report.AddPass(name, fmt.Sprintf("%s is active", service))
+		return
+	}
+	report.AddFail(name,
+		fmt.Sprintf("%s is %s, not active", service, state),
+		fmt.Sprintf("run `sudo systemctl restart %s` and re-run `banger doctor`", service))
+}
+
+// addSocketPermsCheck stat()s the socket path and compares mode +
+// owner against the values the install promises. Both daemon and
+// helper sockets are 0600 chowned to the registered owner UID; any
+// drift means filesystem perms aren't gating access the way the docs
+// describe.
+func addSocketPermsCheck(report *system.Report, name, path string, expectedUID int, expectedMode os.FileMode) {
+	info, err := os.Stat(path)
+	if err != nil {
+		report.AddFail(name,
+			fmt.Sprintf("%s: %v", path, err),
+			"is the service running? `sudo systemctl status` and check the runtime dir")
+		return
+	}
+	stat, ok := info.Sys().(*syscall.Stat_t)
+	if !ok {
+		report.AddWarn(name, fmt.Sprintf("%s: cannot read ownership metadata on this platform", path))
+		return
+	}
+	actualMode := info.Mode().Perm()
+	var problems []string
+	if actualMode != expectedMode {
+		problems = append(problems, fmt.Sprintf("mode is %#o, want %#o", actualMode, expectedMode))
+	}
+	if int(stat.Uid) != expectedUID {
+		problems = append(problems, fmt.Sprintf("uid is %d, want %d", stat.Uid, expectedUID))
+	}
+	if len(problems) > 0 {
+		problems = append(problems, "restart the service so the socket gets recreated with correct perms")
+		report.AddFail(name, fmt.Sprintf("%s: %s", path, strings.Join(problems, "; ")))
+		return
+	}
+	report.AddPass(name, fmt.Sprintf("%s: mode %#o, uid %d", path, actualMode, expectedUID))
+}
+
+// addUnitHardeningCheck reads the systemd unit file and confirms
+// every required directive is present as a literal substring. Brittle
+// to formatting changes (a comment-out would slip through), but
+// strong enough to catch the "someone hand-edited the unit and
+// dropped NoNewPrivileges" failure mode that motivates this check.
+// The directives list captures the security-relevant subset of the
+// renderer in commands_system.go; everything else (Description,
+// ExecStart, etc.) is operational and not worth pinning here.
+func addUnitHardeningCheck(report *system.Report, name, path string, required []string) {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		report.AddFail(name,
+			fmt.Sprintf("%s: %v", path, err),
+			"reinstall via `sudo banger system install` to refresh the unit")
+		return
+	}
+	content := string(data)
+	var missing []string
+	for _, directive := range required {
+		if !strings.Contains(content, directive) {
+			missing = append(missing, directive)
+		}
+	}
+	if len(missing) > 0 {
+		report.AddFail(name,
+			fmt.Sprintf("%s missing directives: %s", path, strings.Join(missing, ", ")),
+			"reinstall via `sudo banger system install` to refresh the unit")
+		return
+	}
+	report.AddPass(name, fmt.Sprintf("%s: %d hardening directives present", path, len(required)))
+}
+
+// addExecutableOwnershipCheck mirrors validateRootExecutable's runtime
+// check at doctor time: regular file, root-owned, executable, not
+// group/world writable, not a symlink. Doctor catching this once at
+// install time beats the helper failing every launch with a less
+// helpful message.
+func addExecutableOwnershipCheck(report *system.Report, name, path string) {
+	if strings.TrimSpace(path) == "" {
+		report.AddWarn(name, "no firecracker binary path configured")
+		return
+	}
+	info, err := os.Lstat(path)
+	if err != nil {
+		report.AddFail(name, fmt.Sprintf("%s: %v", path, err))
+		return
+	}
+	if info.Mode()&os.ModeSymlink != 0 {
+		report.AddFail(name,
+			fmt.Sprintf("%s is a symlink", path),
+			"the helper opens the binary with O_NOFOLLOW; resolve the symlink and update firecracker_bin in the daemon config")
+		return
+	}
+	if !info.Mode().IsRegular() {
+		report.AddFail(name, fmt.Sprintf("%s is not a regular file", path))
+		return
+	}
+	mode := info.Mode().Perm()
+	if mode&0o111 == 0 {
+		report.AddFail(name,
+			fmt.Sprintf("%s mode %#o is not executable", path, mode),
+			"chmod +x the binary")
+		return
+	}
+	if mode&0o022 != 0 {
+		report.AddFail(name,
+			fmt.Sprintf("%s mode %#o is group/world writable", path, mode),
+			"chmod g-w,o-w the binary so the helper accepts it")
+		return
+	}
+	stat, ok := info.Sys().(*syscall.Stat_t)
+	if !ok {
+		report.AddWarn(name, fmt.Sprintf("%s: cannot read ownership metadata on this platform", path))
+		return
+	}
+	if stat.Uid != 0 {
+		report.AddFail(name,
+			fmt.Sprintf("%s is owned by uid %d, want 0", path, stat.Uid),
+			"`sudo chown root` the firecracker binary")
+		return
+	}
+	report.AddPass(name, fmt.Sprintf("%s: regular, root-owned, mode %#o", path, mode))
+}
+
+// addSSHShortcutCheck surfaces a gentle warning when banger maintains
+// an ssh_config file but the user hasn't wired it into ~/.ssh/config.
+// This is intentionally a warn, not a fail — the shortcut is opt-in
+// convenience and `banger vm ssh` works either way.
+func (d *Daemon) addSSHShortcutCheck(report *system.Report) {
+	bangerConfig := BangerSSHConfigPath(d.userLayout)
+	if strings.TrimSpace(bangerConfig) == "" {
+		return
+	}
+	if _, err := os.Stat(bangerConfig); err != nil {
+		// No banger ssh_config rendered yet — nothing to include.
+		return
+	}
+	installed, err := UserSSHIncludeInstalled()
+	if err != nil {
+		report.AddWarn("ssh shortcut", fmt.Sprintf("could not read ~/.ssh/config: %v", err))
+		return
+	}
+	if installed {
+		report.AddPass("ssh shortcut", "enabled — `ssh <name>.vm` routes through banger")
+		return
+	}
+	report.AddWarn(
+		"ssh shortcut",
+		fmt.Sprintf("`ssh <name>.vm` not enabled (opt-in); run `banger ssh-config --install` or add `Include %s` to ~/.ssh/config", bangerConfig),
+	)
+}
+
+// addBangerVersionCheck reports the running CLI's version + commit
+// alongside whatever's recorded in /etc/banger/install.toml. When
+// the installed copy and the running binary disagree on version or
+// commit, doctor warns: a stale `banger` running against a freshly-
+// installed daemon (or vice versa) is the most common version-skew
+// pitfall, and a one-line warning is friendlier than tracking down
+// which side is wrong from a launch failure.
+//
+// Drift detection is suppressed when EITHER side is "dev"/"unknown"
+// (untagged build) — those don't have a real version to compare.
+func addBangerVersionCheck(report *system.Report, installPath string) {
+	cli := buildinfo.Current()
+	cliLine := fmt.Sprintf("CLI %s (commit %s, built %s)", cli.Version, shortCommit(cli.Commit), cli.BuiltAt)
+
+	meta, err := installmeta.Load(installPath)
+	if err != nil {
+		// Non-system mode (no install.toml). Just report what we have.
+		report.AddPass("banger version", cliLine)
+		return
+	}
+	installLine := fmt.Sprintf("install %s (commit %s, installed %s)", meta.Version, shortCommit(meta.Commit), meta.InstalledAt.Format(time.RFC3339))
+	if versionsDrift(cli, meta) {
+		report.AddWarn("banger version",
+			cliLine,
+			installLine,
+			"CLI and installed banger disagree; run `sudo banger system install` to refresh, or run the matching CLI binary")
+		return
+	}
+	report.AddPass("banger version", cliLine, installLine+" (matches CLI)")
+}
+
+func versionsDrift(cli buildinfo.Info, meta installmeta.Metadata) bool {
+	// Treat dev/unknown as "no real version on this side" — comparing
+	// a dev build against a tagged install is the local-development
+	// case, not a drift problem worth surfacing.
+	if cli.Version == "dev" || strings.TrimSpace(meta.Version) == "" {
+		return false
+	}
+	if cli.Version != meta.Version {
+		return true
+	}
+	if cli.Commit != "unknown" && strings.TrimSpace(meta.Commit) != "" && cli.Commit != meta.Commit {
+		return true
+	}
+	return false
+}
+
+func shortCommit(c string) string {
+	if len(c) > 8 {
+		return c[:8]
+	}
+	return c
+}
+
+// addArchitectureCheck surfaces a hard-fail when banger is running on
+// a non-amd64 host. Companion binaries are pinned to amd64 in the
+// Makefile, the published kernel catalog ships only x86_64 images, and
+// OCI import pulls linux/amd64 layers. Letting users discover this
+// through cryptic downstream failures is worse than saying it up front.
+func addArchitectureCheck(report *system.Report) {
+	if runtime.GOARCH == "amd64" {
+		report.AddPass("host architecture", "amd64")
+		return
+	}
+	report.AddFail(
+		"host architecture",
+		fmt.Sprintf("running on %s; banger today only supports amd64/x86_64 hosts", runtime.GOARCH),
+		"companion build, kernel catalog, and OCI import all assume linux/amd64",
+	)
+}
+
+// addVMDefaultsCheck surfaces the effective VM sizing that `vm run` /
+// `vm create` will apply when the user omits the flags. Shown as a
+// PASS check so it always renders, with per-field provenance
+// (config|auto|builtin) so users can tell what's driving each number.
+func (d *Daemon) addVMDefaultsCheck(report *system.Report) {
+	host, err := system.ReadHostResources()
+	var cpus int
+	var memBytes int64
+	if err == nil {
+		cpus = host.CPUCount
+		memBytes = host.TotalMemoryBytes
+	}
+	defaults := model.ResolveVMDefaults(d.config.VMDefaults, cpus, memBytes)
+	details := []string{
+		fmt.Sprintf("vcpu:   %d (%s)", defaults.VCPUCount, defaults.VCPUSource),
+		fmt.Sprintf("memory: %d MiB (%s)", defaults.MemoryMiB, defaults.MemorySource),
+		fmt.Sprintf("disk:   %s (%s)", model.FormatSizeBytes(defaults.WorkDiskSizeBytes), defaults.WorkDiskSource),
+		"override any of these in ~/.config/banger/config.toml under [vm_defaults]",
+	}
+	report.AddPass("vm defaults", details...)
+}
+
+func (d *Daemon) runtimeChecks() *system.Preflight {
+	checks := system.NewPreflight()
+	// Firecracker presence + version is a separate top-level check (see
+	// addFirecrackerVersionCheck) so the report can carry a distro-aware
+	// install hint when the binary is missing — RequireExecutable's
+	// static `hint` string can't do that.
+	checks.RequireFile(d.config.SSHKeyPath, "ssh private key", `set "ssh_key_path" or let banger create its default key`)
+	if helper, err := vsockAgentBinary(d.layout); err == nil {
+		checks.RequireExecutable(helper, "vsock agent helper", `run 'make build' or reinstall banger`)
+	} else {
+		checks.Addf("%v", err)
+	}
+	if d.store != nil && strings.TrimSpace(d.config.DefaultImageName) != "" {
+		name := d.config.DefaultImageName
+		image, err := d.store.GetImageByName(context.Background(), name)
+		if err == nil {
+			checks.RequireFile(image.RootfsPath, "default image rootfs", `re-register or rebuild the default image`)
+			checks.RequireFile(image.KernelPath, "default image kernel", `re-register or rebuild the default image`)
+			if strings.TrimSpace(image.InitrdPath) != "" {
+				checks.RequireFile(image.InitrdPath, "default image initrd", `re-register or rebuild the default image`)
+			}
+		} else if !defaultImageInCatalog(name) {
+			checks.Addf("default image %q is not registered and not in the imagecat catalog", name)
+		}
+		// If the default image isn't local but is cataloged, vm create
+		// will auto-pull it on first use — no error to surface.
 	}
 	return checks
 }

+func defaultImageInCatalog(name string) bool {
+	catalog, err := imagecat.LoadEmbedded()
+	if err != nil {
+		return false
+	}
+	_, err = catalog.Lookup(name)
+	return err == nil
+}
+
 func (d *Daemon) coreVMLifecycleChecks() *system.Preflight {
 	checks := system.NewPreflight()
-	d.addBaseStartCommandPrereqs(checks)
-	return checks
-}
-
-func (d *Daemon) imageBuildChecks(ctx context.Context) *system.Preflight {
-	checks := system.NewPreflight()
-	d.addImageBuildPrereqs(
-		ctx,
-		checks,
-		firstNonEmpty(d.config.DefaultBaseRootfs, d.config.DefaultRootfs),
-		d.config.DefaultKernel,
-		d.config.DefaultInitrd,
-		d.config.DefaultModulesDir,
-		"",
-	)
+	d.vm.addBaseStartCommandPrereqs(checks)
 	return checks
 }

 func (d *Daemon) vsockChecks() *system.Preflight {
 	checks := system.NewPreflight()
-	checks.RequireExecutable(d.config.VSockAgentPath, "vsock agent", `run 'make build' or refresh the runtime bundle`)
-	checks.RequireFile(vsockHostDevicePath, "vsock host device", "load the vhost_vsock kernel module on the host")
+	if helper, err := vsockAgentBinary(d.layout); err == nil {
+		checks.RequireExecutable(helper, "vsock agent helper", `run 'make build' or reinstall banger`)
+	} else {
+		checks.Addf("%v", err)
+	}
+	checks.RequireFile(d.vm.vsockHostDevice, "vsock host device", "load the vhost_vsock kernel module on the host")
 	return checks
 }

-func runtimeBundleStatus(cfg model.DaemonConfig) string {
-	if strings.TrimSpace(cfg.RuntimeDir) == "" {
-		return "runtime dir not configured"
+func runtimeStatus(cfg model.DaemonConfig) string {
+	if strings.TrimSpace(cfg.FirecrackerBin) == "" {
+		return "firecracker not configured"
 	}
-	return fmt.Sprintf("runtime dir %s", cfg.RuntimeDir)
-}
-
-func firstNonEmpty(values ...string) string {
-	for _, value := range values {
-		if strings.TrimSpace(value) != "" {
-			return value
-		}
-	}
-	return ""
+	return "firecracker and ssh key resolved"
 }
--- a/internal/daemon/doctor_test.go
+++ b/internal/daemon/doctor_test.go
@ -0,0 +1,590 @@
+package daemon
+
+import (
+	"context"
+	"errors"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"banger/internal/buildinfo"
+	"banger/internal/firecracker"
+	"banger/internal/installmeta"
+	"banger/internal/model"
+	"banger/internal/paths"
+	"banger/internal/system"
+)
+
+// permissiveRunner satisfies system.CommandRunner by returning a
+// configurable response for every call. Doctor tests don't care about
+// the exact ip/iptables commands run — they care that the aggregated
+// report surfaces each feature check correctly, so a one-size runner
+// keeps the test prelude short.
+type permissiveRunner struct {
+	out []byte
+	err error
+}
+
+func (r *permissiveRunner) Run(_ context.Context, _ string, _ ...string) ([]byte, error) {
+	return r.out, r.err
+}
+
+func (r *permissiveRunner) RunSudo(_ context.Context, _ ...string) ([]byte, error) {
+	return r.out, r.err
+}
+
+// buildDoctorDaemon stands up a Daemon the way doctorReport expects:
+// fake PATH with every tool the preflights look for, fake firecracker
+// + vsock companion binaries, fake vsock host device file, and a
+// permissive runner that claims a default-route via eth0 so NAT's
+// defaultUplink call succeeds. Returns the wired *Daemon.
+func buildDoctorDaemon(t *testing.T) *Daemon {
+	t.Helper()
+	binDir := t.TempDir()
+	for _, name := range []string{
+		"sudo", "ip", "dmsetup", "losetup", "blockdev", "truncate", "pgrep",
+		"chown", "chmod", "kill", "e2cp", "e2rm", "debugfs",
+		"iptables", "sysctl", "mkfs.ext4", "mount", "umount", "cp",
+	} {
+		writeFakeExecutable(t, filepath.Join(binDir, name))
+	}
+	t.Setenv("PATH", binDir)
+
+	firecrackerBin := filepath.Join(t.TempDir(), "firecracker")
+	if err := os.WriteFile(firecrackerBin, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil {
+		t.Fatalf("write firecracker: %v", err)
+	}
+	vsockHelper := filepath.Join(t.TempDir(), "banger-vsock-agent")
+	if err := os.WriteFile(vsockHelper, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil {
+		t.Fatalf("write vsock helper: %v", err)
+	}
+	t.Setenv("BANGER_VSOCK_AGENT_BIN", vsockHelper)
+
+	sshKey := filepath.Join(t.TempDir(), "id_ed25519")
+	if err := os.WriteFile(sshKey, []byte("unused"), 0o600); err != nil {
+		t.Fatalf("write ssh key: %v", err)
+	}
+
+	vsockHostDevice := filepath.Join(t.TempDir(), "vhost-vsock")
+	if err := os.WriteFile(vsockHostDevice, []byte{}, 0o644); err != nil {
+		t.Fatalf("write vsock host device: %v", err)
+	}
+
+	runner := &permissiveRunner{out: []byte("default via 10.0.0.1 dev eth0 proto static\n")}
+
+	d := &Daemon{
+		layout: paths.Layout{
+			ConfigDir: t.TempDir(),
+			StateDir:  t.TempDir(),
+			DBPath:    filepath.Join(t.TempDir(), "state.db"),
+		},
+		config: model.DaemonConfig{
+			FirecrackerBin:    firecrackerBin,
+			SSHKeyPath:        sshKey,
+			BridgeName:        model.DefaultBridgeName,
+			BridgeIP:          model.DefaultBridgeIP,
+			StatsPollInterval: model.DefaultStatsPollInterval,
+		},
+		runner: runner,
+	}
+	wireServices(d)
+	d.vm.vsockHostDevice = vsockHostDevice
+	// HostNetwork defaults its own runner to the one on the struct, but
+	// wireServices only copies the Daemon's runner if d.net is nil
+	// before that call — in this test we constructed d.net implicitly,
+	// so belt-and-braces the permissive runner onto HostNetwork too.
+	d.net.runner = runner
+	return d
+}
+
+// findCheck returns the first CheckResult with the given name, or nil
+// if no such check was emitted. The test helper rather than a method
+// on Report so the field scope stays tight.
+func findCheck(report system.Report, name string) *system.CheckResult {
+	for i := range report.Checks {
+		if report.Checks[i].Name == name {
+			return &report.Checks[i]
+		}
+	}
+	return nil
+}
+
+// TestDoctorReport_NonSystemModeEmitsSecurityWarn pins the non-
+// system-mode branch: when install.toml is absent the security
+// posture check must surface a warn that points at the dev-mode
+// caveat in docs/privileges.md. A pass row in this mode would
+// imply guarantees the install isn't actually providing. Drives
+// the seam variant so the test is independent of whether the host
+// happens to have /etc/banger/install.toml.
+func TestDoctorReport_NonSystemModeEmitsSecurityWarn(t *testing.T) {
+	d := buildDoctorDaemon(t)
+	report := system.Report{}
+	missingInstall := filepath.Join(t.TempDir(), "install.toml")
+	d.addSecurityPostureChecksAt(context.Background(), &report, missingInstall, t.TempDir())
+
+	check := findCheck(report, "security posture")
+	if check == nil {
+		t.Fatal("security posture check missing from report")
+	}
+	if check.Status != system.CheckStatusWarn {
+		t.Fatalf("security posture status = %q, want warn", check.Status)
+	}
+	joined := strings.Join(check.Details, " ")
+	if !strings.Contains(joined, "outside the system install") {
+		t.Fatalf("warn details = %q, want mention of non-system mode", joined)
+	}
+	if !strings.Contains(joined, "docs/privileges.md") {
+		t.Fatalf("warn details = %q, want pointer to docs/privileges.md", joined)
+	}
+}
+
+func TestAddSocketPermsCheckRejectsWrongMode(t *testing.T) {
+	socketPath := filepath.Join(t.TempDir(), "fake.sock")
+	if err := os.WriteFile(socketPath, []byte{}, 0o644); err != nil {
+		t.Fatalf("write fake socket: %v", err)
+	}
+	report := system.Report{}
+	addSocketPermsCheck(&report, "test socket", socketPath, os.Getuid(), 0o600)
+	check := findCheck(report, "test socket")
+	if check == nil {
+		t.Fatal("expected test socket check")
+	}
+	if check.Status != system.CheckStatusFail {
+		t.Fatalf("status = %q, want fail when mode is 0644 vs 0600 expected", check.Status)
+	}
+	joined := strings.Join(check.Details, " ")
+	if !strings.Contains(joined, "mode is") {
+		t.Fatalf("details = %q, want mode-mismatch message", joined)
+	}
+}
+
+func TestAddSocketPermsCheckPassesWhenModeAndOwnerMatch(t *testing.T) {
+	socketPath := filepath.Join(t.TempDir(), "fake.sock")
+	if err := os.WriteFile(socketPath, []byte{}, 0o600); err != nil {
+		t.Fatalf("write fake socket: %v", err)
+	}
+	report := system.Report{}
+	addSocketPermsCheck(&report, "test socket", socketPath, os.Getuid(), 0o600)
+	check := findCheck(report, "test socket")
+	if check == nil {
+		t.Fatal("expected test socket check")
+	}
+	if check.Status != system.CheckStatusPass {
+		t.Fatalf("status = %q, want pass when mode + uid match; details = %v", check.Status, check.Details)
+	}
+}
+
+func TestAddUnitHardeningCheckFlagsMissingDirective(t *testing.T) {
+	unitPath := filepath.Join(t.TempDir(), "bangerd.service")
+	if err := os.WriteFile(unitPath, []byte("[Service]\nUser=alice\nProtectSystem=strict\n"), 0o644); err != nil {
+		t.Fatalf("write unit: %v", err)
+	}
+	report := system.Report{}
+	addUnitHardeningCheck(&report, "unit hardening", unitPath, []string{"User=alice", "NoNewPrivileges=yes", "ProtectSystem=strict"})
+	check := findCheck(report, "unit hardening")
+	if check == nil {
+		t.Fatal("expected unit hardening check")
+	}
+	if check.Status != system.CheckStatusFail {
+		t.Fatalf("status = %q, want fail when NoNewPrivileges is missing", check.Status)
+	}
+	joined := strings.Join(check.Details, " ")
+	if !strings.Contains(joined, "NoNewPrivileges=yes") {
+		t.Fatalf("details = %q, want mention of the missing directive", joined)
+	}
+}
+
+func TestAddUnitHardeningCheckPassesWhenAllPresent(t *testing.T) {
+	unitPath := filepath.Join(t.TempDir(), "bangerd-root.service")
+	body := "[Service]\nNoNewPrivileges=yes\nProtectSystem=strict\nProtectHome=yes\nCapabilityBoundingSet=CAP_CHOWN\n"
+	if err := os.WriteFile(unitPath, []byte(body), 0o644); err != nil {
+		t.Fatalf("write unit: %v", err)
+	}
+	report := system.Report{}
+	addUnitHardeningCheck(&report, "unit hardening", unitPath, []string{"NoNewPrivileges=yes", "ProtectSystem=strict", "CapabilityBoundingSet="})
+	check := findCheck(report, "unit hardening")
+	if check == nil {
+		t.Fatal("expected unit hardening check")
+	}
+	if check.Status != system.CheckStatusPass {
+		t.Fatalf("status = %q, want pass when every directive is present; details = %v", check.Status, check.Details)
+	}
+}
+
+func TestAddExecutableOwnershipCheckRejectsSymlink(t *testing.T) {
+	dir := t.TempDir()
+	real := filepath.Join(dir, "fc")
+	if err := os.WriteFile(real, []byte{}, 0o755); err != nil {
+		t.Fatalf("write fc: %v", err)
+	}
+	link := filepath.Join(dir, "fc-symlink")
+	if err := os.Symlink(real, link); err != nil {
+		t.Fatalf("symlink: %v", err)
+	}
+	report := system.Report{}
+	addExecutableOwnershipCheck(&report, "fc binary", link)
+	check := findCheck(report, "fc binary")
+	if check == nil {
+		t.Fatal("expected fc binary check")
+	}
+	if check.Status != system.CheckStatusFail {
+		t.Fatalf("status = %q, want fail for symlinked binary", check.Status)
+	}
+	joined := strings.Join(check.Details, " ")
+	if !strings.Contains(joined, "symlink") {
+		t.Fatalf("details = %q, want symlink rejection message", joined)
+	}
+}
+
+func TestAddExecutableOwnershipCheckRejectsGroupWritable(t *testing.T) {
+	if os.Getuid() == 0 {
+		t.Skip("test runs as root; can't construct a non-root-owned check target meaningfully")
+	}
+	path := filepath.Join(t.TempDir(), "fc")
+	if err := os.WriteFile(path, []byte{}, 0o775); err != nil {
+		t.Fatalf("write fc: %v", err)
+	}
+	report := system.Report{}
+	addExecutableOwnershipCheck(&report, "fc binary", path)
+	check := findCheck(report, "fc binary")
+	if check == nil {
+		t.Fatal("expected fc binary check")
+	}
+	if check.Status != system.CheckStatusFail {
+		t.Fatalf("status = %q, want fail when binary is group/world writable", check.Status)
+	}
+}
+
+// TestDoctorReport_SystemModeRunsAllSecurityChecks pins the system-mode
+// branch end-to-end: with a fake install.toml + fake systemd dir it
+// must contribute every security row (services, sockets, unit
+// hardening, fc ownership). Statuses themselves vary because we can't
+// easily fake root-owned files in a test, but every check name must
+// appear so a future refactor can't silently drop one.
+func TestDoctorReport_SystemModeRunsAllSecurityChecks(t *testing.T) {
+	d := buildDoctorDaemon(t)
+
+	installDir := t.TempDir()
+	installPath := filepath.Join(installDir, "install.toml")
+	if err := os.WriteFile(installPath, []byte("owner_user = \"alice\"\nowner_uid = 1000\nowner_gid = 1000\nowner_home = \"/home/alice\"\ninstalled_at = 2026-04-28T00:00:00Z\n"), 0o644); err != nil {
+		t.Fatalf("write install.toml: %v", err)
+	}
+	systemdDir := t.TempDir()
+	for _, svc := range []string{"bangerd.service", "bangerd-root.service"} {
+		if err := os.WriteFile(filepath.Join(systemdDir, svc), []byte(""), 0o644); err != nil {
+			t.Fatalf("write fake unit %s: %v", svc, err)
+		}
+	}
+
+	report := system.Report{}
+	d.addSecurityPostureChecksAt(context.Background(), &report, installPath, systemdDir)
+
+	for _, name := range []string{
+		"helper service",
+		"owner daemon service",
+		"helper socket",
+		"daemon socket",
+		"helper unit hardening",
+		"daemon unit hardening",
+		"firecracker binary ownership",
+	} {
+		if findCheck(report, name) == nil {
+			t.Errorf("system-mode security check %q missing from report", name)
+		}
+	}
+	if findCheck(report, "security posture") != nil {
+		t.Error("system mode should NOT emit the non-system-mode warn")
+	}
+}
+
+func TestDoctorReport_StoreErrorSurfacesAsFail(t *testing.T) {
+	d := buildDoctorDaemon(t)
+	report := d.doctorReport(context.Background(), errors.New("simulated open failure"), false)
+
+	check := findCheck(report, "state store")
+	if check == nil {
+		t.Fatal("state store check missing from report")
+	}
+	if check.Status != system.CheckStatusFail {
+		t.Fatalf("state store status = %q, want fail (store error should surface)", check.Status)
+	}
+	joined := strings.Join(check.Details, " ")
+	if !strings.Contains(joined, "simulated open failure") {
+		t.Fatalf("state store details = %q, want the storeErr message", joined)
+	}
+}
+
+func TestDoctorReport_StoreMissingSurfacesAsPassForFreshInstall(t *testing.T) {
+	d := buildDoctorDaemon(t)
+	// Fresh install: the DB file simply doesn't exist yet. doctor must
+	// not treat that as a failure — nothing's broken, the first daemon
+	// start will create the file. The status message should say so,
+	// so a user running `banger doctor` before ever booting a VM
+	// doesn't see a scary red check.
+	report := d.doctorReport(context.Background(), nil, true)
+
+	check := findCheck(report, "state store")
+	if check == nil {
+		t.Fatal("state store check missing from report")
+	}
+	if check.Status != system.CheckStatusPass {
+		t.Fatalf("state store status = %q, want pass for a missing DB on fresh install", check.Status)
+	}
+	joined := strings.Join(check.Details, " ")
+	if !strings.Contains(joined, "will be created") {
+		t.Fatalf("state store details = %q, want mention of 'will be created' so users know this is expected", joined)
+	}
+}
+
+func TestDoctorReport_StoreSuccessSurfacesAsPass(t *testing.T) {
+	d := buildDoctorDaemon(t)
+	report := d.doctorReport(context.Background(), nil, false)
+
+	check := findCheck(report, "state store")
+	if check == nil {
+		t.Fatal("state store check missing from report")
+	}
+	if check.Status != system.CheckStatusPass {
+		t.Fatalf("state store status = %q, want pass", check.Status)
+	}
+}
+
+func TestDoctorReport_MissingFirecrackerFailsFirecrackerBinaryCheck(t *testing.T) {
+	d := buildDoctorDaemon(t)
+	// Point at a nonexistent path. Note: the doctor's PATH lookup
+	// looks for the basename, so use an absolute non-existent path
+	// (that's the configured-path branch — bare-name lookups would
+	// fall through to the test-fixture binDir which DOES contain a
+	// fake `firecracker`).
+	d.config.FirecrackerBin = filepath.Join(t.TempDir(), "does-not-exist")
+
+	report := d.doctorReport(context.Background(), nil, false)
+	check := findCheck(report, "firecracker binary")
+	if check == nil {
+		t.Fatal("firecracker binary check missing from report")
+	}
+	if check.Status != system.CheckStatusFail {
+		t.Fatalf("firecracker binary status = %q, want fail when binary missing", check.Status)
+	}
+	joined := strings.Join(check.Details, " ")
+	if !strings.Contains(joined, "firecracker-microvm/firecracker/releases") {
+		t.Fatalf("missing-binary report should include the upstream URL; got %q", joined)
+	}
+}
+
+// TestVersionsDriftToleratesDevAndUnknown pins the suppression
+// branches: a "dev"/"unknown" build on either side is the local-
+// development case, not a drift problem; we don't want every
+// developer-machine doctor run to emit a noisy warn.
+func TestVersionsDriftToleratesDevAndUnknown(t *testing.T) {
+	t.Parallel()
+	cliReleased := buildinfo.Info{Version: "0.1.0", Commit: "abcd1234efgh", BuiltAt: "2026-04-28"}
+	metaReleased := installmeta.Metadata{Version: "0.1.0", Commit: "abcd1234efgh"}
+
+	// Match → no drift.
+	if versionsDrift(cliReleased, metaReleased) {
+		t.Fatal("identical CLI and install metadata reported as drifted")
+	}
+	// Real version mismatch → drift.
+	bumped := metaReleased
+	bumped.Version = "0.2.0"
+	if !versionsDrift(cliReleased, bumped) {
+		t.Fatal("differing version not flagged as drift")
+	}
+	// Same version, different commit → drift (rebuilt without retag).
+	differCommit := metaReleased
+	differCommit.Commit = "deadbeefdead"
+	if !versionsDrift(cliReleased, differCommit) {
+		t.Fatal("differing commit at same version not flagged as drift")
+	}
+	// "dev" CLI vs released install → suppressed.
+	devCLI := buildinfo.Info{Version: "dev", Commit: "f00fb00b", BuiltAt: "now"}
+	if versionsDrift(devCLI, metaReleased) {
+		t.Fatal("dev CLI vs released install incorrectly flagged as drift")
+	}
+	// Empty install version → suppressed (predates the field).
+	emptyMeta := installmeta.Metadata{}
+	if versionsDrift(cliReleased, emptyMeta) {
+		t.Fatal("empty install metadata incorrectly flagged as drift")
+	}
+}
+
+// TestFirecrackerInstallHintDispatchesByDistro pins the per-distro
+// install command guess. Pinned IDs are the ones banger is willing to
+// suggest a concrete command for; everything else gets only the
+// upstream URL.
+func TestFirecrackerInstallHintDispatchesByDistro(t *testing.T) {
+	t.Parallel()
+	for _, tc := range []struct {
+		name     string
+		release  string
+		wantSub  string
+		wantNone bool
+	}{
+		{name: "debian", release: "ID=debian\nVERSION_CODENAME=bookworm\n", wantSub: "apt install firecracker"},
+		{name: "ubuntu_id_like_debian", release: "ID=ubuntu\nID_LIKE=debian\n", wantSub: "apt install firecracker"},
+		{name: "arch", release: "ID=arch\n", wantSub: "paru -S firecracker"},
+		{name: "manjaro_via_id_like", release: "ID=manjaro\nID_LIKE=arch\n", wantSub: "paru -S firecracker"},
+		{name: "nixos", release: "ID=nixos\n", wantSub: "nixos.firecracker"},
+		{name: "fedora_falls_back_to_url", release: "ID=fedora\n", wantNone: true},
+		{name: "missing_file", release: "", wantNone: true},
+	} {
+		tc := tc
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+			osPath := filepath.Join(t.TempDir(), "os-release")
+			if tc.release != "" {
+				if err := os.WriteFile(osPath, []byte(tc.release), 0o644); err != nil {
+					t.Fatalf("write os-release: %v", err)
+				}
+			}
+			hints := firecrackerInstallHint(osPath)
+			joined := strings.Join(hints, " ")
+			if !strings.Contains(joined, "firecracker-microvm/firecracker/releases") {
+				t.Fatalf("hints missing upstream URL; got %q", joined)
+			}
+			if tc.wantNone {
+				// Distro-specific hint must NOT be present — only the URL.
+				if len(hints) != 1 {
+					t.Fatalf("unrecognised distro got distro-specific hint(s); want only the URL line, got %v", hints)
+				}
+				return
+			}
+			if !strings.Contains(joined, tc.wantSub) {
+				t.Fatalf("hints %q do not contain expected substring %q", joined, tc.wantSub)
+			}
+			if len(hints) < 2 {
+				t.Fatalf("expected distro hint + URL; got only %v", hints)
+			}
+		})
+	}
+}
+
+// firecrackerVersionRunner is a CommandRunner that actually executes
+// firecracker --version (via system.Runner) but stubs everything else
+// with the permissive default. The doctor uses d.runner for the
+// firecracker version query AND for several other checks; this tiny
+// dispatcher lets us run a real script for one command without
+// rewiring the rest.
+type firecrackerVersionRunner struct {
+	real   system.Runner
+	canned []byte
+	bin    string
+}
+
+func (r *firecrackerVersionRunner) Run(ctx context.Context, name string, args ...string) ([]byte, error) {
+	if name == r.bin {
+		return r.real.Run(ctx, name, args...)
+	}
+	return r.canned, nil
+}
+
+func (r *firecrackerVersionRunner) RunSudo(_ context.Context, _ ...string) ([]byte, error) {
+	return r.canned, nil
+}
+
+// stubFirecrackerVersion replaces the test daemon's firecracker
+// stub with a script that prints the requested version line, then
+// swaps d.runner for one that actually executes the script when the
+// firecracker path is queried. Returns the resulting daemon ready
+// for doctorReport.
+func stubFirecrackerVersion(t *testing.T, d *Daemon, version string) {
+	t.Helper()
+	if err := os.WriteFile(d.config.FirecrackerBin, []byte("#!/bin/sh\necho 'Firecracker v"+version+"'\n"), 0o755); err != nil {
+		t.Fatalf("write firecracker stub: %v", err)
+	}
+	d.runner = &firecrackerVersionRunner{
+		real:   system.NewRunner(),
+		canned: []byte("default via 10.0.0.1 dev eth0 proto static\n"),
+		bin:    d.config.FirecrackerBin,
+	}
+}
+
+// TestFirecrackerVersionCheckPasses pins the happy path: when the
+// configured firecracker reports a tested-range version, doctor
+// emits a PASS row.
+func TestFirecrackerVersionCheckPasses(t *testing.T) {
+	d := buildDoctorDaemon(t)
+	stubFirecrackerVersion(t, d, firecracker.KnownTestedVersion)
+	report := d.doctorReport(context.Background(), nil, false)
+	check := findCheck(report, "firecracker binary")
+	if check == nil {
+		t.Fatal("firecracker binary check missing from report")
+	}
+	if check.Status != system.CheckStatusPass {
+		t.Fatalf("status = %q, want pass; details=%v", check.Status, check.Details)
+	}
+}
+
+// TestFirecrackerVersionCheckFailsBelowMin pins the too-old path:
+// a binary reporting a version below MinSupportedVersion must FAIL
+// with the upgrade hint.
+func TestFirecrackerVersionCheckFailsBelowMin(t *testing.T) {
+	d := buildDoctorDaemon(t)
+	stubFirecrackerVersion(t, d, "0.25.0")
+	report := d.doctorReport(context.Background(), nil, false)
+	check := findCheck(report, "firecracker binary")
+	if check == nil {
+		t.Fatal("firecracker binary check missing from report")
+	}
+	if check.Status != system.CheckStatusFail {
+		t.Fatalf("status = %q, want fail for below-min version", check.Status)
+	}
+}
+
+// TestFirecrackerVersionCheckWarnsAboveTested pins the over-tested
+// path: a binary reporting a version newer than KnownTestedVersion
+// must WARN — newer firecracker usually works, but it's outside the
+// tested window.
+func TestFirecrackerVersionCheckWarnsAboveTested(t *testing.T) {
+	d := buildDoctorDaemon(t)
+	stubFirecrackerVersion(t, d, "99.0.0")
+	report := d.doctorReport(context.Background(), nil, false)
+	check := findCheck(report, "firecracker binary")
+	if check == nil {
+		t.Fatal("firecracker binary check missing from report")
+	}
+	if check.Status != system.CheckStatusWarn {
+		t.Fatalf("status = %q, want warn for above-tested version", check.Status)
+	}
+}
+
+func TestDoctorReport_IncludesEveryDefaultCapability(t *testing.T) {
+	d := buildDoctorDaemon(t)
+	report := d.doctorReport(context.Background(), nil, false)
+
+	// Every registered capability that implements doctorCapability must
+	// contribute a check. Current defaults: work-disk, dns, nat. If a
+	// capability is added later it should either extend this list or
+	// register its own check name — either way, the assertion makes
+	// the contract visible.
+	for _, name := range []string{
+		"feature /root work disk",
+		"feature vm dns",
+		"feature nat",
+	} {
+		if findCheck(report, name) == nil {
+			t.Errorf("capability check %q missing from report", name)
+		}
+	}
+}
+
+func TestDoctorReport_EmitsVMDefaultsProvenance(t *testing.T) {
+	d := buildDoctorDaemon(t)
+	report := d.doctorReport(context.Background(), nil, false)
+
+	check := findCheck(report, "vm defaults")
+	if check == nil {
+		t.Fatal("vm defaults check missing from report")
+	}
+	if check.Status != system.CheckStatusPass {
+		t.Fatalf("vm defaults status = %q, want pass (this is an always-pass informational check)", check.Status)
+	}
+	joined := strings.Join(check.Details, "\n")
+	for _, needle := range []string{"vcpu:", "memory:", "disk:"} {
+		if !strings.Contains(joined, needle) {
+			t.Errorf("vm defaults details missing %q; got:\n%s", needle, joined)
+		}
+	}
+}
--- a/internal/daemon/fake_firecracker_test.go
+++ b/internal/daemon/fake_firecracker_test.go
@ -0,0 +1,26 @@
+package daemon
+
+import (
+	"fmt"
+	"os/exec"
+	"testing"
+)
+
+// startFakeFirecracker launches a bash sleep-loop rewritten to match
+// the firecracker command line a real process would expose, so
+// reconcile / handle-cache paths that grep /proc/<pid>/cmdline accept
+// it as a firecracker process. Killed on test cleanup.
+func startFakeFirecracker(t *testing.T, apiSock string) *exec.Cmd {
+	t.Helper()
+	cmd := exec.Command("bash", "-lc", fmt.Sprintf("exec -a %q sleep 60", "firecracker --api-sock "+apiSock))
+	if err := cmd.Start(); err != nil {
+		t.Fatalf("start fake firecracker: %v", err)
+	}
+	t.Cleanup(func() {
+		if cmd.Process != nil {
+			_ = cmd.Process.Kill()
+			_, _ = cmd.Process.Wait()
+		}
+	})
+	return cmd
+}
--- a/internal/daemon/fastpath_test.go
+++ b/internal/daemon/fastpath_test.go
@ -2,52 +2,20 @@ package daemon

 import (
 	"context"
+	"crypto/rand"
+	"crypto/rsa"
+	"crypto/x509"
+	"encoding/pem"
 	"errors"
 	"os"
 	"path/filepath"
 	"strconv"
 	"testing"

+	"banger/internal/guest"
 	"banger/internal/model"
 )

-func TestEnsureWorkDiskClonesSeedImageAndResizes(t *testing.T) {
-	t.Parallel()
-
-	vmDir := t.TempDir()
-	seedPath := filepath.Join(t.TempDir(), "root.work-seed.ext4")
-	if err := os.WriteFile(seedPath, []byte("seed-data"), 0o644); err != nil {
-		t.Fatalf("WriteFile(seed): %v", err)
-	}
-	workDiskPath := filepath.Join(vmDir, "root.ext4")
-	runner := &scriptedRunner{
-		t: t,
-		steps: []runnerStep{
-			{call: runnerCall{name: "e2fsck", args: []string{"-p", "-f", workDiskPath}}},
-			{call: runnerCall{name: "resize2fs", args: []string{workDiskPath}}},
-		},
-	}
-	d := &Daemon{runner: runner}
-	vm := testVM("seeded", "image-seeded", "172.16.0.60")
-	vm.Runtime.WorkDiskPath = workDiskPath
-	vm.Spec.WorkDiskSizeBytes = 2 * 1024 * 1024
-	image := testImage("image-seeded")
-	image.WorkSeedPath = seedPath
-
-	if err := d.ensureWorkDisk(context.Background(), &vm, image); err != nil {
-		t.Fatalf("ensureWorkDisk: %v", err)
-	}
-	runner.assertExhausted()
-
-	info, err := os.Stat(workDiskPath)
-	if err != nil {
-		t.Fatalf("Stat(work disk): %v", err)
-	}
-	if info.Size() != vm.Spec.WorkDiskSizeBytes {
-		t.Fatalf("work disk size = %d, want %d", info.Size(), vm.Spec.WorkDiskSizeBytes)
-	}
-}
-
 func TestTapPoolWarmsAndReusesIdleTap(t *testing.T) {
 	t.Parallel()

@ -69,19 +37,20 @@ func TestTapPoolWarmsAndReusesIdleTap(t *testing.T) {
 		},
 		closing: make(chan struct{}),
 	}
+	wireServices(d)

-	d.ensureTapPool(context.Background())
-	tapName, err := d.acquireTap(context.Background(), "tap-fallback")
+	d.net.ensureTapPool(context.Background())
+	tapName, err := d.net.acquireTap(context.Background(), "tap-fallback")
 	if err != nil {
 		t.Fatalf("acquireTap: %v", err)
 	}
 	if tapName != "tap-pool-0" {
 		t.Fatalf("tapName = %q, want tap-pool-0", tapName)
 	}
-	if err := d.releaseTap(context.Background(), tapName); err != nil {
+	if err := d.net.releaseTap(context.Background(), tapName); err != nil {
 		t.Fatalf("releaseTap: %v", err)
 	}
-	tapName, err = d.acquireTap(context.Background(), "tap-fallback")
+	tapName, err = d.net.acquireTap(context.Background(), "tap-fallback")
 	if err != nil {
 		t.Fatalf("acquireTap second time: %v", err)
 	}
@ -90,3 +59,39 @@ func TestTapPoolWarmsAndReusesIdleTap(t *testing.T) {
 	}
 	runner.assertExhausted()
 }
+
+func TestEnsureAuthorizedKeyOnWorkDiskSkipsRepairForMatchingSeededFingerprint(t *testing.T) {
+	t.Parallel()
+
+	privateKey, err := rsa.GenerateKey(rand.Reader, 1024)
+	if err != nil {
+		t.Fatalf("GenerateKey: %v", err)
+	}
+	privateKeyPEM := pem.EncodeToMemory(&pem.Block{
+		Type:  "RSA PRIVATE KEY",
+		Bytes: x509.MarshalPKCS1PrivateKey(privateKey),
+	})
+	sshKeyPath := filepath.Join(t.TempDir(), "id_rsa")
+	if err := os.WriteFile(sshKeyPath, privateKeyPEM, 0o600); err != nil {
+		t.Fatalf("WriteFile(private key): %v", err)
+	}
+	fingerprint, err := guest.AuthorizedPublicKeyFingerprint(sshKeyPath)
+	if err != nil {
+		t.Fatalf("AuthorizedPublicKeyFingerprint: %v", err)
+	}
+
+	runner := &scriptedRunner{t: t}
+	d := &Daemon{
+		runner: runner,
+		config: model.DaemonConfig{SSHKeyPath: sshKeyPath},
+	}
+	wireServices(d)
+	vm := testVM("seeded-fastpath", "image-seeded-fastpath", "172.16.0.62")
+	vm.Runtime.WorkDiskPath = filepath.Join(t.TempDir(), "root.ext4")
+	image := model.Image{SeededSSHPublicKeyFingerprint: fingerprint}
+
+	if err := d.ws.ensureAuthorizedKeyOnWorkDisk(context.Background(), &vm, image, workDiskPreparation{ClonedFromSeed: true}); err != nil {
+		t.Fatalf("ensureAuthorizedKeyOnWorkDisk: %v", err)
+	}
+	runner.assertExhausted()
+}
--- a/internal/daemon/fcproc/fcproc.go
+++ b/internal/daemon/fcproc/fcproc.go
@ -0,0 +1,773 @@
+// Package fcproc owns the host-side process primitives needed to launch,
+// inspect, and tear down Firecracker VMs: bridge/tap setup, binary
+// resolution, socket permissions, PID lookup, graceful and forceful
+// shutdown. Shared by the VM lifecycle and image build paths so neither
+// needs to import the other.
+package fcproc
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"log/slog"
+	"os"
+	"path/filepath"
+	"sort"
+	"strconv"
+	"strings"
+	"sync"
+	"syscall"
+	"time"
+
+	"golang.org/x/sys/unix"
+
+	"banger/internal/firecracker"
+	"banger/internal/system"
+)
+
+// errFirecrackerPIDNotFound is returned by findByJailerPidfile when the
+// pidfile is missing, unreadable, or doesn't point at a live firecracker
+// process. Surfaces to callers as a "this VM isn't running" signal, not
+// as a hard failure.
+var errFirecrackerPIDNotFound = errors.New("firecracker pid not found")
+
+// procDir is the kernel's per-process inspection directory. Var so tests
+// can swap in a fake /proc-shaped fixture in t.TempDir().
+var procDir = "/proc"
+
+// ErrWaitForExitTimeout is returned by WaitForExit when the deadline passes
+// before the process exits. Callers use errors.Is to detect it.
+var ErrWaitForExitTimeout = errors.New("timed out waiting for VM to exit")
+
+// Runner is the command-runner surface fcproc needs. system.Runner satisfies
+// it.
+type Runner interface {
+	Run(ctx context.Context, name string, args ...string) ([]byte, error)
+	RunSudo(ctx context.Context, args ...string) ([]byte, error)
+}
+
+// Config captures the host networking + runtime paths fcproc operations need.
+type Config struct {
+	FirecrackerBin string
+	BridgeName     string
+	BridgeIP       string
+	CIDR           string
+	RuntimeDir     string
+}
+
+// Manager owns the shared configuration + runner and exposes the per-process
+// helpers. Stateless beyond its dependencies — safe to share.
+type Manager struct {
+	runner Runner
+	cfg    Config
+	logger *slog.Logger
+}
+
+// New returns a Manager that issues commands through runner using cfg.
+func New(runner Runner, cfg Config, logger *slog.Logger) *Manager {
+	return &Manager{runner: runner, cfg: cfg, logger: logger}
+}
+
+// EnsureBridge makes sure the host bridge exists and is up.
+func (m *Manager) EnsureBridge(ctx context.Context) error {
+	if _, err := m.runner.Run(ctx, "ip", "link", "show", m.cfg.BridgeName); err == nil {
+		_, err = m.runner.RunSudo(ctx, "ip", "link", "set", m.cfg.BridgeName, "up")
+		return err
+	}
+	if _, err := m.runner.RunSudo(ctx, "ip", "link", "add", "name", m.cfg.BridgeName, "type", "bridge"); err != nil {
+		return err
+	}
+	if _, err := m.runner.RunSudo(ctx, "ip", "addr", "add", fmt.Sprintf("%s/%s", m.cfg.BridgeIP, m.cfg.CIDR), "dev", m.cfg.BridgeName); err != nil {
+		return err
+	}
+	_, err := m.runner.RunSudo(ctx, "ip", "link", "set", m.cfg.BridgeName, "up")
+	return err
+}
+
+// EnsureSocketDir creates the runtime socket directory at 0700. This is
+// the directory the daemon socket, per-VM firecracker API sockets, and
+// vsock sockets all live inside, so it must be readable only by the
+// invoking user.
+func (m *Manager) EnsureSocketDir() error {
+	mode := os.FileMode(0o700)
+	if os.Geteuid() == 0 {
+		mode = 0o711
+	}
+	if err := os.MkdirAll(m.cfg.RuntimeDir, mode); err != nil {
+		return err
+	}
+	return os.Chmod(m.cfg.RuntimeDir, mode)
+}
+
+// CreateTap (re)creates a TAP owned by the current uid/gid, attaches it to
+// the bridge, and brings both up.
+func (m *Manager) CreateTap(ctx context.Context, tap string) error {
+	return m.CreateTapOwned(ctx, tap, os.Getuid(), os.Getgid())
+}
+
+// CreateTapOwned (re)creates a TAP owned by uid:gid, attaches it to the
+// bridge, and brings both up.
+func (m *Manager) CreateTapOwned(ctx context.Context, tap string, uid, gid int) error {
+	if _, err := m.runner.Run(ctx, "ip", "link", "show", tap); err == nil {
+		_, _ = m.runner.RunSudo(ctx, "ip", "link", "del", tap)
+	}
+	if _, err := m.runner.RunSudo(ctx, "ip", "tuntap", "add", "dev", tap, "mode", "tap", "user", strconv.Itoa(uid), "group", strconv.Itoa(gid)); err != nil {
+		return err
+	}
+	if _, err := m.runner.RunSudo(ctx, "ip", "link", "set", tap, "master", m.cfg.BridgeName); err != nil {
+		return err
+	}
+	if _, err := m.runner.RunSudo(ctx, "ip", "link", "set", tap, "up"); err != nil {
+		return err
+	}
+	_, err := m.runner.RunSudo(ctx, "ip", "link", "set", m.cfg.BridgeName, "up")
+	return err
+}
+
+// ResolveBinary returns the path to the firecracker binary: either an
+// absolute path from config, or the first hit on PATH.
+func (m *Manager) ResolveBinary() (string, error) {
+	if m.cfg.FirecrackerBin == "" {
+		return "", fmt.Errorf("firecracker binary not configured; install firecracker or set firecracker_bin")
+	}
+	path := m.cfg.FirecrackerBin
+	if strings.ContainsRune(path, os.PathSeparator) {
+		if _, err := os.Stat(path); err != nil {
+			return "", fmt.Errorf("firecracker binary not found at %s; install firecracker or set firecracker_bin", path)
+		}
+		return path, nil
+	}
+	resolved, err := system.LookupExecutable(path)
+	if err != nil {
+		return "", fmt.Errorf("firecracker binary %q not found in PATH; install firecracker or set firecracker_bin", path)
+	}
+	return resolved, nil
+}
+
+// EnsureSocketAccess waits for the socket to appear then chowns/chmods it to
+// the current uid/gid, mode 0600.
+func (m *Manager) EnsureSocketAccess(ctx context.Context, socketPath, label string) error {
+	return m.EnsureSocketAccessFor(ctx, socketPath, label, os.Getuid(), os.Getgid())
+}
+
+// EnsureSocketAccessFor waits for the socket to appear then chowns/chmods it
+// to uid:gid, mode 0600.
+func (m *Manager) EnsureSocketAccessFor(ctx context.Context, socketPath, label string, uid, gid int) error {
+	return m.ensureSocketAccessFor(ctx, socketPath, label, uid, gid, 5*time.Second, 100*time.Millisecond)
+}
+
+// EnsureSocketAccessForAsync runs EnsureSocketAccessFor concurrently for each
+// non-empty path and returns a channel that receives a single error (nil on
+// full success) once all per-path operations complete. Caller MUST receive on
+// the channel to unblock the goroutine.
+//
+// Used during firecracker boot: the SDK's HTTP probe inside Machine.Start
+// connects to the API socket the moment it appears. When firecracker is
+// launched under sudo the socket is created root-owned, and the daemon's
+// connect(2) gets EACCES until something chowns it. Running the chown
+// concurrently with Start (instead of after Start returns, which deadlocks)
+// closes the race without a shell-level chown_watcher.
+//
+// Uses a 25ms poll cadence (vs 100ms for the synchronous variant) to win
+// against the SDK's tight HTTP retry loop.
+func (m *Manager) EnsureSocketAccessForAsync(ctx context.Context, socketPaths []string, uid, gid int) <-chan error {
+	var clean []string
+	for _, p := range socketPaths {
+		if strings.TrimSpace(p) != "" {
+			clean = append(clean, p)
+		}
+	}
+	done := make(chan error, 1)
+	if len(clean) == 0 {
+		done <- nil
+		close(done)
+		return done
+	}
+	go func() {
+		defer close(done)
+		var wg sync.WaitGroup
+		errCh := make(chan error, len(clean))
+		for _, p := range clean {
+			wg.Add(1)
+			go func(path string) {
+				defer wg.Done()
+				if err := m.ensureSocketAccessFor(ctx, path, "firecracker socket", uid, gid, 3*time.Second, 25*time.Millisecond); err != nil {
+					errCh <- err
+				}
+			}(p)
+		}
+		wg.Wait()
+		close(errCh)
+		for err := range errCh {
+			if err != nil {
+				done <- err
+				return
+			}
+		}
+		done <- nil
+	}()
+	return done
+}
+
+func (m *Manager) ensureSocketAccessFor(ctx context.Context, socketPath, label string, uid, gid int, timeout, interval time.Duration) error {
+	if err := pollPath(ctx, socketPath, timeout, interval, label); err != nil {
+		return err
+	}
+	return chownChmodNoFollow(ctx, m.runner, socketPath, uid, gid, 0o600)
+}
+
+// chownChmodNoFollow sets owner/group/mode on path without following
+// symlinks at the leaf. Required because the helper RPCs that drive
+// socket access run as root: a follow-symlink chmod/chown becomes an
+// arbitrary file-ownership primitive if the caller can plant a symlink
+// at the target.
+//
+// Linux idiom: open with O_PATH|O_NOFOLLOW (errors out if the leaf is a
+// symlink), Fstat the fd to confirm the file is a unix socket, then
+// chown via Fchownat(AT_EMPTY_PATH) and chmod via /proc/self/fd/N
+// (fchmod on an O_PATH fd returns EBADF, but the /proc path resolves
+// straight back to the inode the fd already pins, so no leaf re-traversal
+// happens).
+//
+// Falls back to `sudo chown -h` + `sudo chmod` for the local-priv mode
+// where the daemon isn't root and can't issue the syscalls itself; the
+// `-h` flag still avoids the symlink-follow on the chown side.
+func chownChmodNoFollow(ctx context.Context, runner Runner, path string, uid, gid int, mode os.FileMode) error {
+	if os.Geteuid() != 0 {
+		// Mode-then-owner ordering preserves the pre-existing failure
+		// semantics of the legacy `chmod 600 / chown` shell-out path
+		// (chmod-failure tests expect chown to be skipped). `chown -h`
+		// keeps the symlink-no-follow guarantee on this branch.
+		if _, err := runner.RunSudo(ctx, "chmod", fmt.Sprintf("%o", mode.Perm()), path); err != nil {
+			return err
+		}
+		_, err := runner.RunSudo(ctx, "chown", "-h", fmt.Sprintf("%d:%d", uid, gid), path)
+		return err
+	}
+	fd, err := unix.Open(path, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
+	if err != nil {
+		return fmt.Errorf("open %s: %w", path, err)
+	}
+	defer unix.Close(fd)
+	var st unix.Stat_t
+	if err := unix.Fstat(fd, &st); err != nil {
+		return fmt.Errorf("fstat %s: %w", path, err)
+	}
+	if st.Mode&unix.S_IFMT != unix.S_IFSOCK {
+		return fmt.Errorf("%s is not a unix socket (mode %#o)", path, st.Mode&unix.S_IFMT)
+	}
+	procPath := "/proc/self/fd/" + strconv.Itoa(fd)
+	if err := unix.Fchmodat(unix.AT_FDCWD, procPath, uint32(mode.Perm()), 0); err != nil {
+		return fmt.Errorf("chmod %s: %w", path, err)
+	}
+	if err := unix.Fchownat(fd, "", uid, gid, unix.AT_EMPTY_PATH); err != nil {
+		return fmt.Errorf("chown %s: %w", path, err)
+	}
+	return nil
+}
+
+// FindPID returns the PID of the firecracker process backing apiSock.
+//
+// Two strategies, tried in order:
+//
+//  1. pgrep -n -f apiSock — cheap, works for direct (non-jailer) launches
+//     because the host-side socket path appears verbatim in firecracker's
+//     cmdline.
+//  2. Jailer pidfile — for jailer'd firecrackers, pgrep can't match
+//     because the cmdline only carries the chroot-relative
+//     `--api-sock /firecracker.socket`. Jailer (v1.x) writes the
+//     post-exec firecracker PID to `<chroot>/firecracker.pid` by default.
+//     Read it; verify the PID is alive and its comm is `firecracker`.
+//     Caller must run with read access to the pidfile (root in the
+//     system-mode helper; daemon UID in dev mode where banger doesn't
+//     drop privs).
+//
+// This is what makes post-restart reconcile re-attach to surviving
+// guests instead of mistaking them for stale.
+func (m *Manager) FindPID(ctx context.Context, apiSock string) (int, error) {
+	if pid, err := m.findPIDByPgrep(ctx, apiSock); err == nil && pid > 0 {
+		return pid, nil
+	}
+	if pid, err := findByJailerPidfile(apiSock); err == nil && pid > 0 {
+		return pid, nil
+	}
+	return 0, errFirecrackerPIDNotFound
+}
+
+func (m *Manager) findPIDByPgrep(ctx context.Context, apiSock string) (int, error) {
+	out, err := m.runner.Run(ctx, "pgrep", "-n", "-f", apiSock)
+	if err != nil {
+		return 0, err
+	}
+	return strconv.Atoi(strings.TrimSpace(string(out)))
+}
+
+// findByJailerPidfile reads the jailer-written pidfile that lives at
+// `<chroot>/firecracker.pid` (sibling of the api socket inside the
+// chroot), verifies the PID is alive and its /proc/<pid>/comm is
+// `firecracker`, and returns it.
+//
+// Returns errFirecrackerPIDNotFound when the api-sock isn't a symlink
+// (direct launch — pidfile shape doesn't apply), the pidfile is
+// missing or unreadable (VM stopped, or caller lacks privileges),
+// the pidfile content is garbage, or the PID points at a process
+// that's gone or never was firecracker.
+func findByJailerPidfile(apiSock string) (int, error) {
+	target, err := os.Readlink(apiSock)
+	if err != nil {
+		return 0, errFirecrackerPIDNotFound
+	}
+	if !filepath.IsAbs(target) {
+		target = filepath.Join(filepath.Dir(apiSock), target)
+	}
+	pidPath := filepath.Join(filepath.Dir(target), "firecracker.pid")
+	pidBytes, err := os.ReadFile(pidPath)
+	if err != nil {
+		return 0, errFirecrackerPIDNotFound
+	}
+	pid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes)))
+	if err != nil || pid <= 0 {
+		return 0, errFirecrackerPIDNotFound
+	}
+	commBytes, err := os.ReadFile(filepath.Join(procDir, strconv.Itoa(pid), "comm"))
+	if err != nil {
+		return 0, errFirecrackerPIDNotFound
+	}
+	if strings.TrimSpace(string(commBytes)) != "firecracker" {
+		return 0, errFirecrackerPIDNotFound
+	}
+	return pid, nil
+}
+
+// ResolvePID prefers pgrep and falls back to the firecracker machine PID.
+// Returns 0 if neither source yields a PID.
+func (m *Manager) ResolvePID(ctx context.Context, machine *firecracker.Machine, apiSock string) int {
+	if pid, err := m.FindPID(ctx, apiSock); err == nil && pid > 0 {
+		return pid
+	}
+	if machine != nil {
+		if pid, err := machine.PID(); err == nil && pid > 0 {
+			return pid
+		}
+	}
+	return 0
+}
+
+// SendCtrlAltDel requests a graceful guest shutdown via the firecracker API
+// socket.
+func (m *Manager) SendCtrlAltDel(ctx context.Context, apiSock string) error {
+	if err := m.EnsureSocketAccess(ctx, apiSock, "firecracker api socket"); err != nil {
+		return err
+	}
+	client := firecracker.New(apiSock, m.logger)
+	return client.SendCtrlAltDel(ctx)
+}
+
+// WaitForExit polls until the process is gone or the timeout fires. Returns
+// ErrWaitForExitTimeout on timeout, ctx.Err() on cancellation.
+func (m *Manager) WaitForExit(ctx context.Context, pid int, apiSock string, timeout time.Duration) error {
+	deadline := time.Now().Add(timeout)
+	for {
+		if !system.ProcessRunning(pid, apiSock) {
+			return nil
+		}
+		if time.Now().After(deadline) {
+			return ErrWaitForExitTimeout
+		}
+		select {
+		case <-ctx.Done():
+			return ctx.Err()
+		case <-time.After(100 * time.Millisecond):
+		}
+	}
+}
+
+// Kill sends SIGKILL to pid.
+func (m *Manager) Kill(ctx context.Context, pid int) error {
+	_, err := m.runner.RunSudo(ctx, "kill", "-KILL", strconv.Itoa(pid))
+	return err
+}
+
+// ChrootDriveSpec describes how a single drive should appear inside the
+// jailer chroot. HostPath is the host-side source (a regular file or a
+// /dev/mapper/* block device); ChrootName is the bare filename it should
+// be reachable as inside the chroot (e.g. "rootfs"). The DM block device
+// case is detected via os.Stat (S_IFBLK) — the helper mknods a matching
+// node; everything else is hard-linked.
+type ChrootDriveSpec struct {
+	ChrootName string
+	HostPath   string
+}
+
+// PrepareJailerChroot stages the chroot tree at chrootRoot for the jailer
+// to take over on launch. After this call:
+//
+//   - chrootRoot exists, mode 0700, owned by uid:gid.
+//   - chrootRoot/<kernel-name> is a hard link of kernelHostPath, owned uid:gid.
+//   - chrootRoot/<initrd-name> is a hard link of initrdHostPath if set.
+//   - For each drive: a hard link (regular file source) or a freshly
+//     mknod'd block device with the source's major/minor (DM source).
+//   - If wantVSock, /dev/vhost-vsock is mknod'd into the chroot so
+//     firecracker can open it after chroot.
+//
+// All filesystem mutations go through runner.RunSudo when the caller isn't
+// root, so this works in dev (sudo) and system (root helper) modes alike.
+// Path components are validated by the caller (roothelper) — this helper
+// trusts them.
+func (m *Manager) PrepareJailerChroot(ctx context.Context, chrootRoot string, uid, gid int, firecrackerHostPath, kernelHostPath, kernelName, initrdHostPath, initrdName string, drives []ChrootDriveSpec, wantVSock bool) error {
+	if strings.TrimSpace(chrootRoot) == "" {
+		return fmt.Errorf("chroot root is required")
+	}
+	if err := m.sudo(ctx, "mkdir", "-p", chrootRoot); err != nil {
+		return fmt.Errorf("create chroot root: %w", err)
+	}
+	if err := m.sudo(ctx, "chmod", "0700", chrootRoot); err != nil {
+		return fmt.Errorf("chmod chroot root: %w", err)
+	}
+	if err := m.chown(ctx, chrootRoot, uid, gid); err != nil {
+		return fmt.Errorf("chown chroot root: %w", err)
+	}
+	// The daemon (uid) needs to traverse the intermediate directories to reach
+	// the sockets firecracker creates inside the chroot. The per-VM dir
+	// (<base>/firecracker/<vmid>/) is chowned to uid so the daemon can reach
+	// <vmid>/root/. The <base>/firecracker/ base and <base>/jail/ dirs get
+	// world-execute (--x) so any UID can traverse through them without listing
+	// their contents (the per-VM dirs are still protected by their own mode).
+	vmDir := filepath.Dir(chrootRoot)
+	if err := m.chown(ctx, vmDir, uid, gid); err != nil {
+		return fmt.Errorf("chown vm dir: %w", err)
+	}
+	fcBaseDir := filepath.Dir(vmDir)
+	if err := m.sudo(ctx, "chmod", "0711", fcBaseDir); err != nil {
+		return fmt.Errorf("chmod firecracker base dir: %w", err)
+	}
+	jailBaseDir := filepath.Dir(fcBaseDir)
+	if err := m.sudo(ctx, "chmod", "0711", jailBaseDir); err != nil {
+		return fmt.Errorf("chmod jail base dir: %w", err)
+	}
+	// Order matters: hard-link the kernel + file-backed drives BEFORE
+	// the self-bind below. link(2) refuses to cross mount points even
+	// when the underlying superblock is the same — once chrootRoot is a
+	// mount point, `ln /var/lib/.../kernel <chroot>/vmlinux` returns
+	// EXDEV.
+	if err := m.linkInto(ctx, chrootRoot, kernelHostPath, kernelName, uid, gid); err != nil {
+		return fmt.Errorf("link kernel: %w", err)
+	}
+	if strings.TrimSpace(initrdHostPath) != "" {
+		if err := m.linkInto(ctx, chrootRoot, initrdHostPath, initrdName, uid, gid); err != nil {
+			return fmt.Errorf("link initrd: %w", err)
+		}
+	}
+	for _, d := range drives {
+		if err := m.stageDrive(ctx, chrootRoot, d, uid, gid); err != nil {
+			return fmt.Errorf("stage drive %s: %w", d.ChrootName, err)
+		}
+	}
+	if wantVSock {
+		// The jailer creates /dev inside the chroot, but /dev/vhost-vsock must
+		// be pre-staged so firecracker can open it after the jailer chroots.
+		devDir := chrootRoot + "/dev"
+		if err := m.sudo(ctx, "mkdir", "-p", devDir); err != nil {
+			return fmt.Errorf("create chroot/dev: %w", err)
+		}
+		if err := m.chown(ctx, devDir, uid, gid); err != nil {
+			return fmt.Errorf("chown chroot/dev: %w", err)
+		}
+		if err := m.stageDevice(ctx, chrootRoot, "dev/vhost-vsock", "/dev/vhost-vsock", uid, gid); err != nil {
+			return fmt.Errorf("stage vhost-vsock: %w", err)
+		}
+	}
+	// Bind firecracker + the host libdirs into the chroot read-only.
+	// firecracker is dynamically linked (interpreter /lib64/ld-linux-*,
+	// libc, libgcc), and inside the chroot ENOENT on those is reported
+	// as "Failed to exec into Firecracker: No such file or directory" —
+	// the kernel's misleading ENOENT-for-missing-interpreter error.
+	//
+	// Done last so the link/mknod steps above don't have to cross the
+	// self-bind mount boundary (link(2) returns EXDEV at mount edges).
+	// Self-bind first so CleanupJailerChroot's `umount -lR` can recurse
+	// from chrootRoot itself; --make-private blocks propagation back to
+	// the host mount namespace.
+	// firecracker is copied (not bind-mounted) because jailer opens the
+	// binary O_RDWR — apparently to seal it or rewrite something — and
+	// fails with EROFS on a ro-bind.
+	chrootFC := chrootRoot + "/" + filepath.Base(firecrackerHostPath)
+	if err := m.sudo(ctx, "cp", "-f", firecrackerHostPath, chrootFC); err != nil {
+		return fmt.Errorf("copy firecracker into chroot: %w", err)
+	}
+	if err := m.sudo(ctx, "chmod", "0755", chrootFC); err != nil {
+		return fmt.Errorf("chmod firecracker in chroot: %w", err)
+	}
+	if err := m.chown(ctx, chrootFC, uid, gid); err != nil {
+		return fmt.Errorf("chown firecracker in chroot: %w", err)
+	}
+	if err := m.sudo(ctx, "mount", "--bind", chrootRoot, chrootRoot); err != nil {
+		return fmt.Errorf("self-bind chroot: %w", err)
+	}
+	// Remount without nosuid: the helper unit's ReadWritePaths binding marks
+	// /var/lib/banger nosuid, and bind mounts inherit that flag. The jailer
+	// needs to exec /firecracker as UID 1000, which the kernel denies on a
+	// nosuid mount when NoNewPrivileges is set on the unit.
+	if err := m.sudo(ctx, "mount", "-o", "remount,bind,suid", chrootRoot, chrootRoot); err != nil {
+		return fmt.Errorf("remount chroot suid: %w", err)
+	}
+	if err := m.sudo(ctx, "mount", "--make-private", chrootRoot); err != nil {
+		return fmt.Errorf("make-private chroot: %w", err)
+	}
+	// Pre-create /usr with world-traversable permissions. UMask=0077 on the
+	// helper unit causes plain mkdir to produce 0700 dirs; UID 1000 must be
+	// able to traverse /usr/ to reach the dynamic linker via lib64 → usr/lib.
+	if err := m.sudo(ctx, "install", "-d", "-m", "0755", chrootRoot+"/usr"); err != nil {
+		return fmt.Errorf("create chroot/usr: %w", err)
+	}
+	// Bind real libdirs and replicate the host's compat symlinks
+	// (/lib64 → /usr/lib, etc) inside the chroot so firecracker's
+	// PT_INTERP path (/lib64/ld-linux-*) resolves to the bound libs.
+	for _, libDir := range []string{"/usr/lib", "/usr/lib64", "/lib", "/lib64"} {
+		info, err := os.Lstat(libDir)
+		if err != nil {
+			continue
+		}
+		target := chrootRoot + libDir
+		if info.Mode()&os.ModeSymlink != 0 {
+			link, err := os.Readlink(libDir)
+			if err != nil {
+				continue
+			}
+			if err := m.sudo(ctx, "ln", "-sfn", link, target); err != nil {
+				return fmt.Errorf("symlink %s -> %s: %w", target, link, err)
+			}
+			continue
+		}
+		if !info.IsDir() {
+			continue
+		}
+		if err := m.bindDir(ctx, libDir, target, true); err != nil {
+			return fmt.Errorf("bind %s: %w", libDir, err)
+		}
+	}
+	return nil
+}
+
+// CleanupJailerChroot tears down a chroot built by PrepareJailerChroot:
+// lazy-recursive umount of every mount under (or at) chrootRoot, then a
+// findmnt-guarded `rm -rf`. The guard is load-bearing: if any bind mount
+// remained, `rm -rf` would descend into the bind source (e.g. /usr/lib)
+// and start deleting host files. The umount runs `-l` (lazy) so an in-use
+// bind point still gets detached from the namespace; the guarded check
+// then catches the rare case where detachment didn't happen.
+func (m *Manager) CleanupJailerChroot(ctx context.Context, chrootRoot string) error {
+	if strings.TrimSpace(chrootRoot) == "" {
+		return nil
+	}
+	// Lstat (not Stat): if chrootRoot is a symlink the umount/rm shell-outs
+	// below would chase it. The handler-side validateNotSymlink also catches
+	// this, but lifting the check inside fcproc closes the TOCTOU window
+	// between the handler check and our umount command.
+	info, err := os.Lstat(chrootRoot)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return nil
+		}
+		return fmt.Errorf("inspect chroot %s: %w", chrootRoot, err)
+	}
+	if info.Mode()&os.ModeSymlink != 0 {
+		return fmt.Errorf("refusing to clean up %q: path is a symlink", chrootRoot)
+	}
+	if !info.IsDir() {
+		return fmt.Errorf("refusing to clean up %q: not a directory", chrootRoot)
+	}
+	// Resolve any intermediate symlinks and require the result equals the
+	// input — that catches a planted `…/jail/firecracker/<vmid> → /` even
+	// though the leaf "/root" component is itself a real directory inside
+	// the redirected target. Equality + Lstat together cover both top and
+	// intermediate symlink shapes.
+	resolved, err := filepath.EvalSymlinks(chrootRoot)
+	if err != nil {
+		return fmt.Errorf("resolve chroot %s: %w", chrootRoot, err)
+	}
+	if filepath.Clean(resolved) != filepath.Clean(chrootRoot) {
+		return fmt.Errorf("refusing to clean up %q: resolves to %q via symlink", chrootRoot, resolved)
+	}
+	// Switch from `umount --recursive --lazy <chrootRoot>` (shell-resolved,
+	// follows symlinks at exec time) to direct umount2() syscalls per child
+	// mount with UMOUNT_NOFOLLOW. That fully closes the residual TOCTOU
+	// between the EvalSymlinks check above and the unmount: even if a daemon-
+	// uid attacker swapped a child mount's path to a symlink in the gap, the
+	// kernel refuses to follow it. The findmnt guard below still catches any
+	// mount we couldn't detach.
+	mounts, err := m.mountsUnder(ctx, chrootRoot)
+	if err != nil {
+		return fmt.Errorf("inspect chroot mounts: %w", err)
+	}
+	// Deepest-first so child mounts come off before parents; otherwise a
+	// parent unmount would EBUSY against in-use children.
+	sort.Slice(mounts, func(i, j int) bool {
+		return strings.Count(mounts[i], "/") > strings.Count(mounts[j], "/")
+	})
+	for _, mt := range mounts {
+		if err := m.detachMount(ctx, mt); err != nil {
+			return fmt.Errorf("detach %q: %w", mt, err)
+		}
+	}
+	if remaining, err := m.mountsUnder(ctx, chrootRoot); err != nil {
+		return fmt.Errorf("re-inspect chroot mounts: %w", err)
+	} else if len(remaining) > 0 {
+		return fmt.Errorf("refusing to rm -rf %q: still has %d mount(s): %v", chrootRoot, len(remaining), remaining)
+	}
+	return m.sudo(ctx, "rm", "-rf", "--", chrootRoot)
+}
+
+// detachMount tears down a single mount target with MNT_DETACH (lazy) +
+// UMOUNT_NOFOLLOW (refuse symlinks). Falls back to `sudo umount --lazy`
+// when not running as root, since umount2() requires CAP_SYS_ADMIN.
+//
+// ENOENT and EINVAL on the syscall path are treated as "already gone" —
+// findmnt's snapshot can race with parallel cleanups, and a missing
+// mount is the desired end state.
+func (m *Manager) detachMount(ctx context.Context, target string) error {
+	if os.Geteuid() == 0 {
+		err := unix.Unmount(target, unix.MNT_DETACH|unix.UMOUNT_NOFOLLOW)
+		if err == nil || errors.Is(err, unix.ENOENT) || errors.Is(err, unix.EINVAL) {
+			return nil
+		}
+		return err
+	}
+	// Local-priv fallback: shell `umount --lazy` resolves the path through
+	// the kernel without UMOUNT_NOFOLLOW, but the EvalSymlinks check earlier
+	// already constrained the chroot tree. The dev-mode caveat in
+	// docs/privileges.md covers this branch's looser guarantees.
+	_, err := m.runner.RunSudo(ctx, "umount", "--lazy", target)
+	return err
+}
+
+func (m *Manager) bindFile(ctx context.Context, source, target string, readOnly bool) error {
+	if err := m.sudo(ctx, "install", "-D", "-m", "0644", "/dev/null", target); err != nil {
+		return fmt.Errorf("create bind target file: %w", err)
+	}
+	return m.bindMount(ctx, source, target, readOnly)
+}
+
+func (m *Manager) bindDir(ctx context.Context, source, target string, readOnly bool) error {
+	if err := m.sudo(ctx, "mkdir", "-p", target); err != nil {
+		return fmt.Errorf("create bind target dir: %w", err)
+	}
+	return m.bindMount(ctx, source, target, readOnly)
+}
+
+func (m *Manager) bindMount(ctx context.Context, source, target string, readOnly bool) error {
+	if err := m.sudo(ctx, "mount", "--bind", source, target); err != nil {
+		return err
+	}
+	if !readOnly {
+		return nil
+	}
+	// Single-step ro bind isn't honored by all kernels — the bind happens
+	// rw and the ro flag is silently ignored. Remount makes it stick.
+	return m.sudo(ctx, "mount", "-o", "remount,bind,ro", target)
+}
+
+// mountsUnder returns the list of mount targets at or under chrootRoot.
+// findmnt's output is one path per line; an empty list means no leftovers.
+func (m *Manager) mountsUnder(ctx context.Context, chrootRoot string) ([]string, error) {
+	out, err := m.runner.Run(ctx, "findmnt", "--output", "TARGET", "--list", "--noheadings")
+	if err != nil {
+		return nil, err
+	}
+	var mounts []string
+	prefix := chrootRoot + string(os.PathSeparator)
+	for _, line := range strings.Split(string(out), "\n") {
+		t := strings.TrimSpace(line)
+		if t == chrootRoot || strings.HasPrefix(t, prefix) {
+			mounts = append(mounts, t)
+		}
+	}
+	return mounts, nil
+}
+
+func (m *Manager) stageDrive(ctx context.Context, chrootRoot string, d ChrootDriveSpec, uid, gid int) error {
+	info, err := os.Stat(d.HostPath)
+	if err != nil {
+		return err
+	}
+	if info.Mode()&os.ModeDevice != 0 {
+		stat, ok := info.Sys().(*syscall.Stat_t)
+		if !ok {
+			return fmt.Errorf("stat %s: cannot read device numbers", d.HostPath)
+		}
+		major := unix.Major(stat.Rdev)
+		minor := unix.Minor(stat.Rdev)
+		return m.mknodBlock(ctx, chrootRoot, d.ChrootName, major, minor, uid, gid)
+	}
+	return m.linkInto(ctx, chrootRoot, d.HostPath, d.ChrootName, uid, gid)
+}
+
+func (m *Manager) stageDevice(ctx context.Context, chrootRoot, chrootName, hostDevice string, uid, gid int) error {
+	info, err := os.Stat(hostDevice)
+	if err != nil {
+		return err
+	}
+	stat, ok := info.Sys().(*syscall.Stat_t)
+	if !ok {
+		return fmt.Errorf("stat %s: cannot read device numbers", hostDevice)
+	}
+	major := unix.Major(stat.Rdev)
+	minor := unix.Minor(stat.Rdev)
+	target := chrootRoot + "/" + chrootName
+	if err := m.sudo(ctx, "mknod", "-m", "0660", target, "c", strconv.FormatUint(uint64(major), 10), strconv.FormatUint(uint64(minor), 10)); err != nil {
+		return err
+	}
+	return m.chown(ctx, target, uid, gid)
+}
+
+func (m *Manager) mknodBlock(ctx context.Context, chrootRoot, name string, major, minor uint32, uid, gid int) error {
+	target := chrootRoot + "/" + name
+	if err := m.sudo(ctx, "mknod", "-m", "0660", target, "b", strconv.FormatUint(uint64(major), 10), strconv.FormatUint(uint64(minor), 10)); err != nil {
+		return err
+	}
+	return m.chown(ctx, target, uid, gid)
+}
+
+func (m *Manager) linkInto(ctx context.Context, chrootRoot, source, name string, uid, gid int) error {
+	target := chrootRoot + "/" + name
+	if err := m.sudo(ctx, "ln", "-f", source, target); err != nil {
+		return err
+	}
+	return m.chown(ctx, target, uid, gid)
+}
+
+func (m *Manager) chown(ctx context.Context, target string, uid, gid int) error {
+	return m.sudo(ctx, "chown", fmt.Sprintf("%d:%d", uid, gid), target)
+}
+
+func (m *Manager) sudo(ctx context.Context, name string, args ...string) error {
+	if os.Geteuid() == 0 {
+		_, err := m.runner.Run(ctx, name, args...)
+		return err
+	}
+	_, err := m.runner.RunSudo(ctx, append([]string{name}, args...)...)
+	return err
+}
+
+func waitForPath(ctx context.Context, path string, timeout time.Duration, label string) error {
+	return pollPath(ctx, path, timeout, 100*time.Millisecond, label)
+}
+
+func pollPath(ctx context.Context, path string, timeout, interval time.Duration, label string) error {
+	deadline := time.Now().Add(timeout)
+	for {
+		if _, err := os.Stat(path); err == nil {
+			return nil
+		} else if err != nil && !os.IsNotExist(err) {
+			return err
+		}
+		if time.Now().After(deadline) {
+			return fmt.Errorf("%s not ready: %s: %w", label, path, context.DeadlineExceeded)
+		}
+		select {
+		case <-ctx.Done():
+			return ctx.Err()
+		case <-time.After(interval):
+		}
+	}
+}
--- a/internal/daemon/fcproc/fcproc_test.go
+++ b/internal/daemon/fcproc/fcproc_test.go
@ -0,0 +1,471 @@
+package fcproc
+
+import (
+	"context"
+	"errors"
+	"log/slog"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+)
+
+// scriptedRunner is a minimal Runner that records every call and
+// plays back a pre-scripted sequence of (name, args, out, err)
+// steps. Failing to match or running past the script fails the
+// test. Mirrors the pattern from internal/daemon/snapshot_test.go
+// but lives here because fcproc is a leaf package — it can't import
+// its parent's test helpers.
+type scriptedRunner struct {
+	t     *testing.T
+	runs  []scriptedCall
+	sudos []scriptedCall
+}
+
+type scriptedCall struct {
+	matchName string   // empty for RunSudo (sudo has no distinct name arg)
+	matchArgs []string // nil means "don't care"
+	out       []byte
+	err       error
+}
+
+func (r *scriptedRunner) Run(_ context.Context, name string, args ...string) ([]byte, error) {
+	r.t.Helper()
+	if len(r.runs) == 0 {
+		r.t.Fatalf("unexpected Run(%q, %v)", name, args)
+	}
+	step := r.runs[0]
+	r.runs = r.runs[1:]
+	if step.matchName != "" && step.matchName != name {
+		r.t.Fatalf("Run name = %q, want %q", name, step.matchName)
+	}
+	return step.out, step.err
+}
+
+func (r *scriptedRunner) RunSudo(_ context.Context, args ...string) ([]byte, error) {
+	r.t.Helper()
+	if len(r.sudos) == 0 {
+		r.t.Fatalf("unexpected RunSudo(%v)", args)
+	}
+	step := r.sudos[0]
+	r.sudos = r.sudos[1:]
+	return step.out, step.err
+}
+
+// TestWaitForPathReturnsDeadlineExceededWhenSocketNeverAppears pins
+// the timeout branch of waitForPath. If this drifts, every callsite
+// that wraps it (EnsureSocketAccess on the firecracker API +
+// vsock sockets) loses its bounded wait.
+func TestWaitForPathReturnsDeadlineExceededWhenSocketNeverAppears(t *testing.T) {
+	missing := filepath.Join(t.TempDir(), "never-created.sock")
+	start := time.Now()
+	err := waitForPath(context.Background(), missing, 150*time.Millisecond, "api socket")
+	elapsed := time.Since(start)
+
+	if !errors.Is(err, context.DeadlineExceeded) {
+		t.Fatalf("err = %v, want wrapped context.DeadlineExceeded", err)
+	}
+	if !contains(err.Error(), "api socket") {
+		t.Fatalf("err = %v, want label 'api socket' in message", err)
+	}
+	// Timeout should fire close to the configured budget, not zero
+	// (tight-loop regression) and not way over (missing select
+	// regression). The 100ms poll tick plus the initial stat makes
+	// the lower bound noisy; check we at least waited a tick.
+	if elapsed < 90*time.Millisecond {
+		t.Fatalf("returned after %s; waitForPath exited before its timeout budget", elapsed)
+	}
+}
+
+// TestWaitForPathReturnsOnceSocketAppears pins the happy path:
+// when the file materialises mid-wait, the function returns nil
+// without having to walk to its deadline.
+func TestWaitForPathReturnsOnceSocketAppears(t *testing.T) {
+	socketPath := filepath.Join(t.TempDir(), "will-appear.sock")
+	go func() {
+		time.Sleep(50 * time.Millisecond)
+		_ = os.WriteFile(socketPath, []byte{}, 0o600)
+	}()
+	if err := waitForPath(context.Background(), socketPath, 2*time.Second, "api socket"); err != nil {
+		t.Fatalf("waitForPath: %v", err)
+	}
+}
+
+// TestWaitForPathRespectsContextCancellation pins the ctx.Done()
+// branch — a canceled request must not be blocked by the poll
+// interval.
+func TestWaitForPathRespectsContextCancellation(t *testing.T) {
+	missing := filepath.Join(t.TempDir(), "never.sock")
+	ctx, cancel := context.WithCancel(context.Background())
+	go func() {
+		time.Sleep(30 * time.Millisecond)
+		cancel()
+	}()
+	err := waitForPath(ctx, missing, 5*time.Second, "api socket")
+	if !errors.Is(err, context.Canceled) {
+		t.Fatalf("err = %v, want context.Canceled when ctx is cancelled mid-wait", err)
+	}
+}
+
+// TestEnsureSocketAccessChmodFailureBubbles verifies the chmod step
+// fails fast before any ownership handoff. Once chown runs, the
+// bounded helper no longer owns the socket and can't tighten its mode
+// without CAP_FOWNER, so the order matters.
+func TestEnsureSocketAccessChmodFailureBubbles(t *testing.T) {
+	socketPath := filepath.Join(t.TempDir(), "present.sock")
+	if err := os.WriteFile(socketPath, []byte{}, 0o600); err != nil {
+		t.Fatalf("WriteFile: %v", err)
+	}
+
+	chmodErr := errors.New("sudo chmod failed")
+	runner := &scriptedRunner{
+		t:     t,
+		sudos: []scriptedCall{{err: chmodErr}},
+	}
+	mgr := New(runner, Config{}, slog.Default())
+
+	err := mgr.EnsureSocketAccess(context.Background(), socketPath, "api socket")
+	if !errors.Is(err, chmodErr) {
+		t.Fatalf("err = %v, want chmod error", err)
+	}
+	// chown must not have been attempted.
+	if len(runner.sudos) != 0 {
+		t.Fatalf("chown was attempted after chmod failed: %d sudo calls left", len(runner.sudos))
+	}
+}
+
+// TestEnsureSocketAccessChownFailureBubbles verifies the ownership
+// handoff still surfaces errors after chmod succeeds.
+func TestEnsureSocketAccessChownFailureBubbles(t *testing.T) {
+	socketPath := filepath.Join(t.TempDir(), "present.sock")
+	if err := os.WriteFile(socketPath, []byte{}, 0o600); err != nil {
+		t.Fatalf("WriteFile: %v", err)
+	}
+
+	chownErr := errors.New("sudo chown failed")
+	runner := &scriptedRunner{
+		t: t,
+		sudos: []scriptedCall{
+			{},              // chmod succeeds
+			{err: chownErr}, // chown fails
+		},
+	}
+	mgr := New(runner, Config{}, slog.Default())
+
+	err := mgr.EnsureSocketAccess(context.Background(), socketPath, "api socket")
+	if !errors.Is(err, chownErr) {
+		t.Fatalf("err = %v, want chown error", err)
+	}
+}
+
+// TestEnsureSocketAccessTimesOutBeforeTouchingRunner pins the
+// ordering contract: if waitForPath never sees the socket, the
+// sudo commands must not run. Running chown/chmod against a
+// non-existent path would just noise the logs.
+func TestEnsureSocketAccessTimesOutBeforeTouchingRunner(t *testing.T) {
+	missing := filepath.Join(t.TempDir(), "never.sock")
+	runner := &scriptedRunner{t: t} // no scripted calls — any runner invocation fails the test
+	mgr := New(runner, Config{}, slog.Default())
+
+	// EnsureSocketAccess's waitForPath has a hardcoded 5s timeout,
+	// and we can't inject a shorter one without widening the API.
+	// Use a short context instead — cancellation short-circuits
+	// waitForPath via the ctx.Done() branch.
+	ctx, cancel := context.WithTimeout(context.Background(), 150*time.Millisecond)
+	defer cancel()
+
+	err := mgr.EnsureSocketAccess(ctx, missing, "api socket")
+	if err == nil {
+		t.Fatal("EnsureSocketAccess: want error when socket never appears")
+	}
+}
+
+// TestEnsureSocketAccessForAsyncReturnsImmediatelyWhenNoPaths pins the
+// fast-path: callers can hand the helper an empty list (e.g. when VSockPath
+// is unset) and get a no-op channel back without spinning a goroutine.
+func TestEnsureSocketAccessForAsyncReturnsImmediatelyWhenNoPaths(t *testing.T) {
+	runner := &scriptedRunner{t: t} // any runner call would fail the test
+	mgr := New(runner, Config{}, slog.Default())
+
+	done := mgr.EnsureSocketAccessForAsync(context.Background(), []string{"", "   "}, 1000, 1000)
+	select {
+	case err := <-done:
+		if err != nil {
+			t.Fatalf("got %v, want nil for empty input", err)
+		}
+	case <-time.After(time.Second):
+		t.Fatal("EnsureSocketAccessForAsync did not signal completion")
+	}
+}
+
+// TestEnsureSocketAccessForAsyncWaitsForSocketThenChowns pins the boot-time
+// race fix: while Machine.Start spins up firecracker, the helper polls for the
+// socket and runs chmod + chown the moment it appears. If this drifts, the
+// SDK's HTTP probe gets EACCES on a root-owned socket and Start times out.
+func TestEnsureSocketAccessForAsyncWaitsForSocketThenChowns(t *testing.T) {
+	socketPath := filepath.Join(t.TempDir(), "delayed.sock")
+	go func() {
+		time.Sleep(50 * time.Millisecond)
+		_ = os.WriteFile(socketPath, []byte{}, 0o600)
+	}()
+
+	runner := &scriptedRunner{
+		t: t,
+		sudos: []scriptedCall{
+			{}, // chmod 600
+			{}, // chown uid:gid
+		},
+	}
+	mgr := New(runner, Config{}, slog.Default())
+
+	done := mgr.EnsureSocketAccessForAsync(context.Background(), []string{socketPath}, 4242, 4242)
+	select {
+	case err := <-done:
+		if err != nil {
+			t.Fatalf("EnsureSocketAccessForAsync: %v", err)
+		}
+	case <-time.After(2 * time.Second):
+		t.Fatal("EnsureSocketAccessForAsync did not signal completion")
+	}
+	if len(runner.sudos) != 0 {
+		t.Fatalf("expected both chmod and chown to run, %d sudo calls remaining", len(runner.sudos))
+	}
+}
+
+// recordingRunner captures every Run/RunSudo invocation's full
+// argv. Used to assert that ensureSocketAccessFor's fallback path
+// passes `chown -h` rather than the symlink-following plain `chown`.
+type recordingRunner struct {
+	sudos [][]string
+	runs  [][]string
+}
+
+func (r *recordingRunner) Run(_ context.Context, name string, args ...string) ([]byte, error) {
+	r.runs = append(r.runs, append([]string{name}, args...))
+	return nil, nil
+}
+
+func (r *recordingRunner) RunSudo(_ context.Context, args ...string) ([]byte, error) {
+	r.sudos = append(r.sudos, append([]string(nil), args...))
+	return nil, nil
+}
+
+// TestCleanupJailerChrootRejectsSymlink pins the TOCTOU-closing
+// fcproc-side check: even if a daemon-uid attacker somehow bypasses
+// the helper handler's validateNotSymlink (or races it), the cleanup
+// itself refuses a symlinked path before any umount/rm shells.
+func TestCleanupJailerChrootRejectsSymlink(t *testing.T) {
+	dir := t.TempDir()
+	target := filepath.Join(dir, "real")
+	if err := os.Mkdir(target, 0o700); err != nil {
+		t.Fatalf("mkdir target: %v", err)
+	}
+	link := filepath.Join(dir, "link")
+	if err := os.Symlink(target, link); err != nil {
+		t.Fatalf("symlink: %v", err)
+	}
+
+	// scriptedRunner with no scripted calls — any shell invocation
+	// trips r.t.Fatalf, proving rejection happened before umount/rm.
+	runner := &scriptedRunner{t: t}
+	mgr := New(runner, Config{}, slog.Default())
+	if err := mgr.CleanupJailerChroot(context.Background(), link); err == nil {
+		t.Fatal("CleanupJailerChroot(symlink) succeeded, want error")
+	}
+}
+
+// TestCleanupJailerChrootRejectsIntermediateSymlink covers the
+// `/jail/firecracker/<vmid> → /` shape: the leaf "/root" component
+// is a real directory inside the redirected target, but EvalSymlinks
+// resolves to a different path so we still bail.
+func TestCleanupJailerChrootRejectsIntermediateSymlink(t *testing.T) {
+	dir := t.TempDir()
+	realParent := filepath.Join(dir, "real-parent")
+	if err := os.MkdirAll(filepath.Join(realParent, "root"), 0o700); err != nil {
+		t.Fatalf("mkdir real: %v", err)
+	}
+	linkParent := filepath.Join(dir, "link-parent")
+	if err := os.Symlink(realParent, linkParent); err != nil {
+		t.Fatalf("symlink: %v", err)
+	}
+	chrootViaSymlink := filepath.Join(linkParent, "root")
+
+	runner := &scriptedRunner{t: t}
+	mgr := New(runner, Config{}, slog.Default())
+	if err := mgr.CleanupJailerChroot(context.Background(), chrootViaSymlink); err == nil {
+		t.Fatal("CleanupJailerChroot(symlinked-parent) succeeded, want error")
+	}
+}
+
+// TestCleanupJailerChrootHappyPathWithoutMounts pins the no-leak case:
+// when findmnt reports zero mounts under the chroot, the cleanup
+// skips straight to `sudo rm -rf` without invoking umount2 / sudo
+// umount at all. Regression guard for the umount2 rewrite — if the
+// new logic leaks an extra runner call here, this test will fail.
+func TestCleanupJailerChrootHappyPathWithoutMounts(t *testing.T) {
+	dir := t.TempDir()
+	chroot := filepath.Join(dir, "root")
+	if err := os.Mkdir(chroot, 0o700); err != nil {
+		t.Fatalf("mkdir chroot: %v", err)
+	}
+	runner := &scriptedRunner{
+		t: t,
+		runs: []scriptedCall{
+			// First mountsUnder() — pre-detach. Empty stdout = no mounts.
+			{matchName: "findmnt", out: nil},
+			// Second mountsUnder() — post-detach guard. Same.
+			{matchName: "findmnt", out: nil},
+		},
+		// sudo rm -rf -- chroot.
+		sudos: []scriptedCall{{}},
+	}
+	mgr := New(runner, Config{}, slog.Default())
+	if err := mgr.CleanupJailerChroot(context.Background(), chroot); err != nil {
+		t.Fatalf("CleanupJailerChroot: %v", err)
+	}
+	if len(runner.runs) != 0 {
+		t.Fatalf("findmnt scripted calls left over: %d", len(runner.runs))
+	}
+	if len(runner.sudos) != 0 {
+		t.Fatalf("sudo scripted calls left over: %d", len(runner.sudos))
+	}
+}
+
+// TestCleanupJailerChrootDetachesMountsDeepestFirst pins the ordering
+// contract for the umount2 rewrite: child mounts come off before
+// parents, otherwise the parent unmount would race against in-use
+// children. The non-root code path shells `sudo umount --lazy`, which
+// the recording runner captures so we can assert order + the --lazy
+// flag.
+func TestCleanupJailerChrootDetachesMountsDeepestFirst(t *testing.T) {
+	if os.Geteuid() == 0 {
+		t.Skip("euid 0 takes the umount2 syscall branch; this test exercises the sudo fallback")
+	}
+	dir := t.TempDir()
+	chroot := filepath.Join(dir, "root")
+	if err := os.Mkdir(chroot, 0o700); err != nil {
+		t.Fatalf("mkdir chroot: %v", err)
+	}
+	parent := chroot
+	child := filepath.Join(chroot, "lib")
+	deep := filepath.Join(child, "deep")
+	findmntOut := []byte(strings.Join([]string{parent, child, deep}, "\n"))
+	runner := &mountRecordingRunner{findmntOut: findmntOut}
+	mgr := New(runner, Config{}, slog.Default())
+	if err := mgr.CleanupJailerChroot(context.Background(), chroot); err != nil {
+		t.Fatalf("CleanupJailerChroot: %v", err)
+	}
+	// Three umount + final rm -rf. The umount targets must be deep,
+	// child, parent in that order.
+	wantTargets := []string{deep, child, parent}
+	if len(runner.umountTargets) != len(wantTargets) {
+		t.Fatalf("umount calls = %v, want %d", runner.umountTargets, len(wantTargets))
+	}
+	for i, want := range wantTargets {
+		if runner.umountTargets[i] != want {
+			t.Fatalf("umount[%d] = %q, want %q", i, runner.umountTargets[i], want)
+		}
+	}
+	if !runner.lazyFlagSeen {
+		t.Fatalf("expected umount --lazy on the sudo branch, args = %v", runner.umountArgs)
+	}
+	if !runner.rmCalled {
+		t.Fatal("rm -rf was never invoked after the umount sweep")
+	}
+}
+
+// mountRecordingRunner stubs out findmnt + sudo for the cleanup path:
+// the first findmnt call returns the canned mount list (pre-detach),
+// subsequent calls return empty to simulate the kernel having dropped
+// each mount as we asked. sudo umount/rm calls are captured and
+// answer success.
+type mountRecordingRunner struct {
+	findmntOut    []byte
+	findmntCalls  int
+	umountTargets []string
+	umountArgs    [][]string
+	lazyFlagSeen  bool
+	rmCalled      bool
+}
+
+func (r *mountRecordingRunner) Run(_ context.Context, name string, _ ...string) ([]byte, error) {
+	if name == "findmnt" {
+		r.findmntCalls++
+		if r.findmntCalls == 1 {
+			return r.findmntOut, nil
+		}
+		return nil, nil
+	}
+	return nil, nil
+}
+
+func (r *mountRecordingRunner) RunSudo(_ context.Context, args ...string) ([]byte, error) {
+	if len(args) == 0 {
+		return nil, nil
+	}
+	switch args[0] {
+	case "umount":
+		// Last arg is the target. Earlier args are flags.
+		if len(args) >= 2 {
+			r.umountTargets = append(r.umountTargets, args[len(args)-1])
+		}
+		r.umountArgs = append(r.umountArgs, append([]string(nil), args...))
+		for _, a := range args[1 : len(args)-1] {
+			if a == "--lazy" || a == "-l" {
+				r.lazyFlagSeen = true
+			}
+		}
+	case "rm":
+		r.rmCalled = true
+	}
+	return nil, nil
+}
+
+// TestEnsureSocketAccessSudoBranchUsesChownNoFollow pins the
+// symlink-defence on the local-priv (non-root) path: a follow-symlink
+// chown on a daemon-uid attacker-planted symlink is the same arbitrary
+// file-ownership primitive we close in the root branch via
+// O_PATH|O_NOFOLLOW. Test only runs as non-root (the syscall branch is
+// taken when euid == 0, which CI doesn't see).
+func TestEnsureSocketAccessSudoBranchUsesChownNoFollow(t *testing.T) {
+	if os.Geteuid() == 0 {
+		t.Skip("euid 0 takes the syscall branch; the sudo branch is only reachable as a regular user")
+	}
+	socketPath := filepath.Join(t.TempDir(), "present.sock")
+	if err := os.WriteFile(socketPath, []byte{}, 0o600); err != nil {
+		t.Fatalf("WriteFile: %v", err)
+	}
+	runner := &recordingRunner{}
+	mgr := New(runner, Config{}, slog.Default())
+
+	if err := mgr.EnsureSocketAccess(context.Background(), socketPath, "api socket"); err != nil {
+		t.Fatalf("EnsureSocketAccess: %v", err)
+	}
+	if len(runner.sudos) != 2 {
+		t.Fatalf("got %d sudo calls, want 2 (chmod, chown)", len(runner.sudos))
+	}
+	chown := runner.sudos[1]
+	if len(chown) < 2 || chown[0] != "chown" {
+		t.Fatalf("second sudo call = %v, want chown", chown)
+	}
+	hasNoFollow := false
+	for _, arg := range chown[1:] {
+		if arg == "-h" {
+			hasNoFollow = true
+			break
+		}
+	}
+	if !hasNoFollow {
+		t.Fatalf("chown args = %v, missing the -h symlink-no-follow flag", chown)
+	}
+}
+
+func contains(s, sub string) bool {
+	for i := 0; i+len(sub) <= len(s); i++ {
+		if s[i:i+len(sub)] == sub {
+			return true
+		}
+	}
+	return false
+}
--- a/internal/daemon/fcproc/findpid_jailer_test.go
+++ b/internal/daemon/fcproc/findpid_jailer_test.go
@ -0,0 +1,173 @@
+package fcproc
+
+import (
+	"errors"
+	"fmt"
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+// pidfileFixture builds the on-disk shape findByJailerPidfile inspects:
+// a /proc-like tree (one entry per pid with comm), an api-sock symlink
+// pointing into a faux chroot, and the chroot's firecracker.pid file.
+type pidfileFixture struct {
+	root    string
+	proc    string
+	runtime string
+	chroots string
+}
+
+func newPidfileFixture(t *testing.T) *pidfileFixture {
+	t.Helper()
+	root := t.TempDir()
+	f := &pidfileFixture{
+		root:    root,
+		proc:    filepath.Join(root, "proc"),
+		runtime: filepath.Join(root, "runtime"),
+		chroots: filepath.Join(root, "chroots"),
+	}
+	for _, dir := range []string{f.proc, f.runtime, f.chroots} {
+		if err := os.MkdirAll(dir, 0o755); err != nil {
+			t.Fatalf("mkdir %s: %v", dir, err)
+		}
+	}
+	prev := procDir
+	procDir = f.proc
+	t.Cleanup(func() { procDir = prev })
+	return f
+}
+
+// addProc writes /proc/<pid>/comm. Mirrors the real /proc shape (comm
+// has a trailing newline; production code TrimSpaces it).
+func (f *pidfileFixture) addProc(t *testing.T, pid int, comm string) {
+	t.Helper()
+	pidDir := filepath.Join(f.proc, fmt.Sprint(pid))
+	if err := os.MkdirAll(pidDir, 0o755); err != nil {
+		t.Fatalf("mkdir %s: %v", pidDir, err)
+	}
+	if err := os.WriteFile(filepath.Join(pidDir, "comm"), []byte(comm+"\n"), 0o644); err != nil {
+		t.Fatalf("write comm: %v", err)
+	}
+}
+
+// buildVMSocket lays out the chroot for a VM and returns the api-sock
+// path the test points findByJailerPidfile at. pidfileContent is what
+// `cat <chroot>/firecracker.pid` will return — pass an empty string to
+// skip writing the pidfile.
+func (f *pidfileFixture) buildVMSocket(t *testing.T, vmid, pidfileContent string) (apiSock string) {
+	t.Helper()
+	chroot := filepath.Join(f.chroots, vmid, "root")
+	if err := os.MkdirAll(chroot, 0o755); err != nil {
+		t.Fatalf("mkdir chroot: %v", err)
+	}
+	socketTarget := filepath.Join(chroot, "firecracker.socket")
+	if err := os.WriteFile(socketTarget, nil, 0o600); err != nil {
+		t.Fatalf("write socket placeholder: %v", err)
+	}
+	if pidfileContent != "" {
+		if err := os.WriteFile(filepath.Join(chroot, "firecracker.pid"), []byte(pidfileContent), 0o600); err != nil {
+			t.Fatalf("write pidfile: %v", err)
+		}
+	}
+	apiSock = filepath.Join(f.runtime, "fc-"+vmid+".sock")
+	if err := os.Symlink(socketTarget, apiSock); err != nil {
+		t.Fatalf("symlink api sock: %v", err)
+	}
+	return apiSock
+}
+
+func TestFindByJailerPidfileHappyPath(t *testing.T) {
+	f := newPidfileFixture(t)
+	apiSock := f.buildVMSocket(t, "abc", "100\n")
+	f.addProc(t, 100, "firecracker")
+
+	got, err := findByJailerPidfile(apiSock)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if got != 100 {
+		t.Fatalf("pid = %d, want 100", got)
+	}
+}
+
+func TestFindByJailerPidfileMissingPidfile(t *testing.T) {
+	f := newPidfileFixture(t)
+	// VM exists in the chroot layout but no pidfile (e.g. VM was created
+	// but never started, or stopped and pidfile cleared).
+	apiSock := f.buildVMSocket(t, "abc", "")
+
+	_, err := findByJailerPidfile(apiSock)
+	if !errors.Is(err, errFirecrackerPIDNotFound) {
+		t.Fatalf("err = %v, want errFirecrackerPIDNotFound", err)
+	}
+}
+
+func TestFindByJailerPidfileStalePID(t *testing.T) {
+	f := newPidfileFixture(t)
+	// Pidfile points at a PID with no /proc entry — the FC died but the
+	// pidfile was left behind. Reconcile must treat this as "not running"
+	// so the rediscoverHandles path can mark the VM stopped cleanly.
+	apiSock := f.buildVMSocket(t, "abc", "100\n")
+	// Deliberately don't addProc(100, ...).
+
+	_, err := findByJailerPidfile(apiSock)
+	if !errors.Is(err, errFirecrackerPIDNotFound) {
+		t.Fatalf("err = %v, want errFirecrackerPIDNotFound", err)
+	}
+}
+
+func TestFindByJailerPidfileWrongComm(t *testing.T) {
+	f := newPidfileFixture(t)
+	// PID was recycled by the kernel and now belongs to some other
+	// process. The comm check is what catches this — pidfile content is
+	// untrusted across reboots / PID-wraparound.
+	apiSock := f.buildVMSocket(t, "abc", "100\n")
+	f.addProc(t, 100, "bash")
+
+	_, err := findByJailerPidfile(apiSock)
+	if !errors.Is(err, errFirecrackerPIDNotFound) {
+		t.Fatalf("err = %v, want errFirecrackerPIDNotFound", err)
+	}
+}
+
+func TestFindByJailerPidfileGarbageContent(t *testing.T) {
+	f := newPidfileFixture(t)
+	apiSock := f.buildVMSocket(t, "abc", "not-a-pid\n")
+
+	_, err := findByJailerPidfile(apiSock)
+	if !errors.Is(err, errFirecrackerPIDNotFound) {
+		t.Fatalf("err = %v, want errFirecrackerPIDNotFound", err)
+	}
+}
+
+func TestFindByJailerPidfileNonSymlinkApiSock(t *testing.T) {
+	f := newPidfileFixture(t)
+	// Direct (non-jailer) launches produce a regular-file api sock with
+	// no chroot beside it. Pidfile lookup can't help; fall through cleanly.
+	apiSock := filepath.Join(f.runtime, "direct-launch.sock")
+	if err := os.WriteFile(apiSock, nil, 0o600); err != nil {
+		t.Fatalf("write apiSock: %v", err)
+	}
+
+	_, err := findByJailerPidfile(apiSock)
+	if !errors.Is(err, errFirecrackerPIDNotFound) {
+		t.Fatalf("err = %v, want errFirecrackerPIDNotFound", err)
+	}
+}
+
+func TestFindByJailerPidfileTrimsWhitespace(t *testing.T) {
+	f := newPidfileFixture(t)
+	// Some FC versions write the pidfile with stray whitespace; the
+	// parser must tolerate it.
+	apiSock := f.buildVMSocket(t, "abc", "  100  \n\n")
+	f.addProc(t, 100, "firecracker")
+
+	got, err := findByJailerPidfile(apiSock)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if got != 100 {
+		t.Fatalf("pid = %d, want 100", got)
+	}
+}
--- a/internal/daemon/guest_ssh.go
+++ b/internal/daemon/guest_ssh.go
@ -0,0 +1,35 @@
+package daemon
+
+import (
+	"context"
+	"io"
+	"os"
+	"time"
+
+	"banger/internal/guest"
+)
+
+// guestSSHClient is the narrow guest-SSH surface the daemon uses for
+// workspace prepare / export and ad-hoc guest interactions.
+type guestSSHClient interface {
+	Close() error
+	RunScript(context.Context, string, io.Writer) error
+	RunScriptOutput(context.Context, string) ([]byte, error)
+	UploadFile(context.Context, string, os.FileMode, []byte, io.Writer) error
+	StreamTar(context.Context, string, string, io.Writer) error
+	StreamTarEntries(context.Context, string, []string, string, io.Writer) error
+}
+
+func (d *Daemon) waitForGuestSSH(ctx context.Context, address string, interval time.Duration) error {
+	if d != nil && d.guestWaitForSSH != nil {
+		return d.guestWaitForSSH(ctx, address, d.config.SSHKeyPath, interval)
+	}
+	return guest.WaitForSSH(ctx, address, d.config.SSHKeyPath, d.layout.KnownHostsPath, interval)
+}
+
+func (d *Daemon) dialGuest(ctx context.Context, address string) (guestSSHClient, error) {
+	if d != nil && d.guestDial != nil {
+		return d.guestDial(ctx, address, d.config.SSHKeyPath)
+	}
+	return guest.Dial(ctx, address, d.config.SSHKeyPath, d.layout.KnownHostsPath)
+}
--- a/Show more
+++ b/Show more