banger/internal/daemon/doctor_test.go
Thales Maciel 1c1ca7d6a4
doctor: pin firecracker version range, distro-aware install hint
Pre-release polish: be explicit about which firecracker versions
banger has been validated against, and give users a one-line install
suggestion when the binary is missing rather than the previous
generic "install firecracker or set firecracker_bin".

internal/firecracker/version.go (new):
  * MinSupportedVersion = "1.5.0" — the floor banger refuses to
    launch below. Bumping this is a deliberate decision, paired
    with whatever helper feature started requiring the newer
    firecracker.
  * KnownTestedVersion = "1.14.1" — what banger's smoke suite
    actually runs against today.
  * SemVer + Compare + ParseVersionOutput, table-tested. The parser
    tolerates the trailing "exiting successfully" log line that
    firecracker tacks onto --version; only the canonical
    "Firecracker vX.Y.Z" line matters.
  * QueryVersion shells `<bin> --version` through a CommandRunner-
    shaped interface; doesn't import internal/system to keep the
    firecracker package leaf-clean.

internal/daemon/doctor.go:
  * New addFirecrackerVersionCheck replaces the previous bare
    RequireExecutable preflight for firecracker. Three outcomes:
    PASS within [Min, Tested], WARN above Tested (newer firecracker
    usually works but is outside the tested window), FAIL below Min
    or when the binary is missing.
  * On missing binary, surfaces a distro-aware install command via
    parseOSReleaseIDs(/etc/os-release) → guessFirecrackerInstall
    Command. Pinned suggestions for debian (apt), arch/manjaro
    (paru), and nixos (nix-env). Other distros get only the upstream
    Releases URL — guessing wrong sends users on a wild goose chase.
  * runtimeChecks no longer includes the firecracker preflight; the
    new check subsumes it.

README.md:
  * Requirements line now spells out the tested-against version
    (v1.14.1) and the supported floor (≥ v1.5.0), and points at
    `banger doctor` for the version check + install hint.

Tests: ParseVersionOutput across canonical/prerelease/garbage inputs,
SemVer.Compare across major/minor/patch boundaries, MustParseSemVer
panics on malformed inputs. Doctor-side: PASS on tested version,
FAIL below Min, WARN above Tested, FAIL with upstream URL when
missing, install-hint dispatch table covering debian/ubuntu (via
ID_LIKE)/arch/manjaro/nixos/fedora-fallback/missing-os-release.
The renamed TestDoctorReport_MissingFirecrackerFails... now asserts
against the new check name. Live `banger doctor` reports
"v1.14.1 at /usr/bin/firecracker (within tested range; min v1.5.0,
tested v1.14.1)" against the smoke host.

Smoke bare_run still green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-28 17:47:42 -03:00

551 lines
20 KiB
Go

package daemon
import (
"context"
"errors"
"os"
"path/filepath"
"strings"
"testing"
"banger/internal/firecracker"
"banger/internal/model"
"banger/internal/paths"
"banger/internal/system"
)
// permissiveRunner satisfies system.CommandRunner by returning a
// configurable response for every call. Doctor tests don't care about
// the exact ip/iptables commands run — they care that the aggregated
// report surfaces each feature check correctly, so a one-size runner
// keeps the test prelude short.
type permissiveRunner struct {
out []byte
err error
}
func (r *permissiveRunner) Run(_ context.Context, _ string, _ ...string) ([]byte, error) {
return r.out, r.err
}
func (r *permissiveRunner) RunSudo(_ context.Context, _ ...string) ([]byte, error) {
return r.out, r.err
}
// buildDoctorDaemon stands up a Daemon the way doctorReport expects:
// fake PATH with every tool the preflights look for, fake firecracker
// + vsock companion binaries, fake vsock host device file, and a
// permissive runner that claims a default-route via eth0 so NAT's
// defaultUplink call succeeds. Returns the wired *Daemon.
func buildDoctorDaemon(t *testing.T) *Daemon {
t.Helper()
binDir := t.TempDir()
for _, name := range []string{
"sudo", "ip", "dmsetup", "losetup", "blockdev", "truncate", "pgrep",
"chown", "chmod", "kill", "e2cp", "e2rm", "debugfs",
"iptables", "sysctl", "mkfs.ext4", "mount", "umount", "cp",
} {
writeFakeExecutable(t, filepath.Join(binDir, name))
}
t.Setenv("PATH", binDir)
firecrackerBin := filepath.Join(t.TempDir(), "firecracker")
if err := os.WriteFile(firecrackerBin, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil {
t.Fatalf("write firecracker: %v", err)
}
vsockHelper := filepath.Join(t.TempDir(), "banger-vsock-agent")
if err := os.WriteFile(vsockHelper, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil {
t.Fatalf("write vsock helper: %v", err)
}
t.Setenv("BANGER_VSOCK_AGENT_BIN", vsockHelper)
sshKey := filepath.Join(t.TempDir(), "id_ed25519")
if err := os.WriteFile(sshKey, []byte("unused"), 0o600); err != nil {
t.Fatalf("write ssh key: %v", err)
}
vsockHostDevice := filepath.Join(t.TempDir(), "vhost-vsock")
if err := os.WriteFile(vsockHostDevice, []byte{}, 0o644); err != nil {
t.Fatalf("write vsock host device: %v", err)
}
runner := &permissiveRunner{out: []byte("default via 10.0.0.1 dev eth0 proto static\n")}
d := &Daemon{
layout: paths.Layout{
ConfigDir: t.TempDir(),
StateDir: t.TempDir(),
DBPath: filepath.Join(t.TempDir(), "state.db"),
},
config: model.DaemonConfig{
FirecrackerBin: firecrackerBin,
SSHKeyPath: sshKey,
BridgeName: model.DefaultBridgeName,
BridgeIP: model.DefaultBridgeIP,
StatsPollInterval: model.DefaultStatsPollInterval,
},
runner: runner,
}
wireServices(d)
d.vm.vsockHostDevice = vsockHostDevice
// HostNetwork defaults its own runner to the one on the struct, but
// wireServices only copies the Daemon's runner if d.net is nil
// before that call — in this test we constructed d.net implicitly,
// so belt-and-braces the permissive runner onto HostNetwork too.
d.net.runner = runner
return d
}
// findCheck returns the first CheckResult with the given name, or nil
// if no such check was emitted. The test helper rather than a method
// on Report so the field scope stays tight.
func findCheck(report system.Report, name string) *system.CheckResult {
for i := range report.Checks {
if report.Checks[i].Name == name {
return &report.Checks[i]
}
}
return nil
}
// TestDoctorReport_NonSystemModeEmitsSecurityWarn pins the non-
// system-mode branch: when install.toml is absent the security
// posture check must surface a warn that points at the dev-mode
// caveat in docs/privileges.md. A pass row in this mode would
// imply guarantees the install isn't actually providing. Drives
// the seam variant so the test is independent of whether the host
// happens to have /etc/banger/install.toml.
func TestDoctorReport_NonSystemModeEmitsSecurityWarn(t *testing.T) {
d := buildDoctorDaemon(t)
report := system.Report{}
missingInstall := filepath.Join(t.TempDir(), "install.toml")
d.addSecurityPostureChecksAt(context.Background(), &report, missingInstall, t.TempDir())
check := findCheck(report, "security posture")
if check == nil {
t.Fatal("security posture check missing from report")
}
if check.Status != system.CheckStatusWarn {
t.Fatalf("security posture status = %q, want warn", check.Status)
}
joined := strings.Join(check.Details, " ")
if !strings.Contains(joined, "outside the system install") {
t.Fatalf("warn details = %q, want mention of non-system mode", joined)
}
if !strings.Contains(joined, "docs/privileges.md") {
t.Fatalf("warn details = %q, want pointer to docs/privileges.md", joined)
}
}
func TestAddSocketPermsCheckRejectsWrongMode(t *testing.T) {
socketPath := filepath.Join(t.TempDir(), "fake.sock")
if err := os.WriteFile(socketPath, []byte{}, 0o644); err != nil {
t.Fatalf("write fake socket: %v", err)
}
report := system.Report{}
addSocketPermsCheck(&report, "test socket", socketPath, os.Getuid(), 0o600)
check := findCheck(report, "test socket")
if check == nil {
t.Fatal("expected test socket check")
}
if check.Status != system.CheckStatusFail {
t.Fatalf("status = %q, want fail when mode is 0644 vs 0600 expected", check.Status)
}
joined := strings.Join(check.Details, " ")
if !strings.Contains(joined, "mode is") {
t.Fatalf("details = %q, want mode-mismatch message", joined)
}
}
func TestAddSocketPermsCheckPassesWhenModeAndOwnerMatch(t *testing.T) {
socketPath := filepath.Join(t.TempDir(), "fake.sock")
if err := os.WriteFile(socketPath, []byte{}, 0o600); err != nil {
t.Fatalf("write fake socket: %v", err)
}
report := system.Report{}
addSocketPermsCheck(&report, "test socket", socketPath, os.Getuid(), 0o600)
check := findCheck(report, "test socket")
if check == nil {
t.Fatal("expected test socket check")
}
if check.Status != system.CheckStatusPass {
t.Fatalf("status = %q, want pass when mode + uid match; details = %v", check.Status, check.Details)
}
}
func TestAddUnitHardeningCheckFlagsMissingDirective(t *testing.T) {
unitPath := filepath.Join(t.TempDir(), "bangerd.service")
if err := os.WriteFile(unitPath, []byte("[Service]\nUser=alice\nProtectSystem=strict\n"), 0o644); err != nil {
t.Fatalf("write unit: %v", err)
}
report := system.Report{}
addUnitHardeningCheck(&report, "unit hardening", unitPath, []string{"User=alice", "NoNewPrivileges=yes", "ProtectSystem=strict"})
check := findCheck(report, "unit hardening")
if check == nil {
t.Fatal("expected unit hardening check")
}
if check.Status != system.CheckStatusFail {
t.Fatalf("status = %q, want fail when NoNewPrivileges is missing", check.Status)
}
joined := strings.Join(check.Details, " ")
if !strings.Contains(joined, "NoNewPrivileges=yes") {
t.Fatalf("details = %q, want mention of the missing directive", joined)
}
}
func TestAddUnitHardeningCheckPassesWhenAllPresent(t *testing.T) {
unitPath := filepath.Join(t.TempDir(), "bangerd-root.service")
body := "[Service]\nNoNewPrivileges=yes\nProtectSystem=strict\nProtectHome=yes\nCapabilityBoundingSet=CAP_CHOWN\n"
if err := os.WriteFile(unitPath, []byte(body), 0o644); err != nil {
t.Fatalf("write unit: %v", err)
}
report := system.Report{}
addUnitHardeningCheck(&report, "unit hardening", unitPath, []string{"NoNewPrivileges=yes", "ProtectSystem=strict", "CapabilityBoundingSet="})
check := findCheck(report, "unit hardening")
if check == nil {
t.Fatal("expected unit hardening check")
}
if check.Status != system.CheckStatusPass {
t.Fatalf("status = %q, want pass when every directive is present; details = %v", check.Status, check.Details)
}
}
func TestAddExecutableOwnershipCheckRejectsSymlink(t *testing.T) {
dir := t.TempDir()
real := filepath.Join(dir, "fc")
if err := os.WriteFile(real, []byte{}, 0o755); err != nil {
t.Fatalf("write fc: %v", err)
}
link := filepath.Join(dir, "fc-symlink")
if err := os.Symlink(real, link); err != nil {
t.Fatalf("symlink: %v", err)
}
report := system.Report{}
addExecutableOwnershipCheck(&report, "fc binary", link)
check := findCheck(report, "fc binary")
if check == nil {
t.Fatal("expected fc binary check")
}
if check.Status != system.CheckStatusFail {
t.Fatalf("status = %q, want fail for symlinked binary", check.Status)
}
joined := strings.Join(check.Details, " ")
if !strings.Contains(joined, "symlink") {
t.Fatalf("details = %q, want symlink rejection message", joined)
}
}
func TestAddExecutableOwnershipCheckRejectsGroupWritable(t *testing.T) {
if os.Getuid() == 0 {
t.Skip("test runs as root; can't construct a non-root-owned check target meaningfully")
}
path := filepath.Join(t.TempDir(), "fc")
if err := os.WriteFile(path, []byte{}, 0o775); err != nil {
t.Fatalf("write fc: %v", err)
}
report := system.Report{}
addExecutableOwnershipCheck(&report, "fc binary", path)
check := findCheck(report, "fc binary")
if check == nil {
t.Fatal("expected fc binary check")
}
if check.Status != system.CheckStatusFail {
t.Fatalf("status = %q, want fail when binary is group/world writable", check.Status)
}
}
// TestDoctorReport_SystemModeRunsAllSecurityChecks pins the system-mode
// branch end-to-end: with a fake install.toml + fake systemd dir it
// must contribute every security row (services, sockets, unit
// hardening, fc ownership). Statuses themselves vary because we can't
// easily fake root-owned files in a test, but every check name must
// appear so a future refactor can't silently drop one.
func TestDoctorReport_SystemModeRunsAllSecurityChecks(t *testing.T) {
d := buildDoctorDaemon(t)
installDir := t.TempDir()
installPath := filepath.Join(installDir, "install.toml")
if err := os.WriteFile(installPath, []byte("owner_user = \"alice\"\nowner_uid = 1000\nowner_gid = 1000\nowner_home = \"/home/alice\"\ninstalled_at = 2026-04-28T00:00:00Z\n"), 0o644); err != nil {
t.Fatalf("write install.toml: %v", err)
}
systemdDir := t.TempDir()
for _, svc := range []string{"bangerd.service", "bangerd-root.service"} {
if err := os.WriteFile(filepath.Join(systemdDir, svc), []byte(""), 0o644); err != nil {
t.Fatalf("write fake unit %s: %v", svc, err)
}
}
report := system.Report{}
d.addSecurityPostureChecksAt(context.Background(), &report, installPath, systemdDir)
for _, name := range []string{
"helper service",
"owner daemon service",
"helper socket",
"daemon socket",
"helper unit hardening",
"daemon unit hardening",
"firecracker binary ownership",
} {
if findCheck(report, name) == nil {
t.Errorf("system-mode security check %q missing from report", name)
}
}
if findCheck(report, "security posture") != nil {
t.Error("system mode should NOT emit the non-system-mode warn")
}
}
func TestDoctorReport_StoreErrorSurfacesAsFail(t *testing.T) {
d := buildDoctorDaemon(t)
report := d.doctorReport(context.Background(), errors.New("simulated open failure"), false)
check := findCheck(report, "state store")
if check == nil {
t.Fatal("state store check missing from report")
}
if check.Status != system.CheckStatusFail {
t.Fatalf("state store status = %q, want fail (store error should surface)", check.Status)
}
joined := strings.Join(check.Details, " ")
if !strings.Contains(joined, "simulated open failure") {
t.Fatalf("state store details = %q, want the storeErr message", joined)
}
}
func TestDoctorReport_StoreMissingSurfacesAsPassForFreshInstall(t *testing.T) {
d := buildDoctorDaemon(t)
// Fresh install: the DB file simply doesn't exist yet. doctor must
// not treat that as a failure — nothing's broken, the first daemon
// start will create the file. The status message should say so,
// so a user running `banger doctor` before ever booting a VM
// doesn't see a scary red check.
report := d.doctorReport(context.Background(), nil, true)
check := findCheck(report, "state store")
if check == nil {
t.Fatal("state store check missing from report")
}
if check.Status != system.CheckStatusPass {
t.Fatalf("state store status = %q, want pass for a missing DB on fresh install", check.Status)
}
joined := strings.Join(check.Details, " ")
if !strings.Contains(joined, "will be created") {
t.Fatalf("state store details = %q, want mention of 'will be created' so users know this is expected", joined)
}
}
func TestDoctorReport_StoreSuccessSurfacesAsPass(t *testing.T) {
d := buildDoctorDaemon(t)
report := d.doctorReport(context.Background(), nil, false)
check := findCheck(report, "state store")
if check == nil {
t.Fatal("state store check missing from report")
}
if check.Status != system.CheckStatusPass {
t.Fatalf("state store status = %q, want pass", check.Status)
}
}
func TestDoctorReport_MissingFirecrackerFailsFirecrackerBinaryCheck(t *testing.T) {
d := buildDoctorDaemon(t)
// Point at a nonexistent path. Note: the doctor's PATH lookup
// looks for the basename, so use an absolute non-existent path
// (that's the configured-path branch — bare-name lookups would
// fall through to the test-fixture binDir which DOES contain a
// fake `firecracker`).
d.config.FirecrackerBin = filepath.Join(t.TempDir(), "does-not-exist")
report := d.doctorReport(context.Background(), nil, false)
check := findCheck(report, "firecracker binary")
if check == nil {
t.Fatal("firecracker binary check missing from report")
}
if check.Status != system.CheckStatusFail {
t.Fatalf("firecracker binary status = %q, want fail when binary missing", check.Status)
}
joined := strings.Join(check.Details, " ")
if !strings.Contains(joined, "firecracker-microvm/firecracker/releases") {
t.Fatalf("missing-binary report should include the upstream URL; got %q", joined)
}
}
// TestFirecrackerInstallHintDispatchesByDistro pins the per-distro
// install command guess. Pinned IDs are the ones banger is willing to
// suggest a concrete command for; everything else gets only the
// upstream URL.
func TestFirecrackerInstallHintDispatchesByDistro(t *testing.T) {
t.Parallel()
for _, tc := range []struct {
name string
release string
wantSub string
wantNone bool
}{
{name: "debian", release: "ID=debian\nVERSION_CODENAME=bookworm\n", wantSub: "apt install firecracker"},
{name: "ubuntu_id_like_debian", release: "ID=ubuntu\nID_LIKE=debian\n", wantSub: "apt install firecracker"},
{name: "arch", release: "ID=arch\n", wantSub: "paru -S firecracker"},
{name: "manjaro_via_id_like", release: "ID=manjaro\nID_LIKE=arch\n", wantSub: "paru -S firecracker"},
{name: "nixos", release: "ID=nixos\n", wantSub: "nixos.firecracker"},
{name: "fedora_falls_back_to_url", release: "ID=fedora\n", wantNone: true},
{name: "missing_file", release: "", wantNone: true},
} {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
osPath := filepath.Join(t.TempDir(), "os-release")
if tc.release != "" {
if err := os.WriteFile(osPath, []byte(tc.release), 0o644); err != nil {
t.Fatalf("write os-release: %v", err)
}
}
hints := firecrackerInstallHint(osPath)
joined := strings.Join(hints, " ")
if !strings.Contains(joined, "firecracker-microvm/firecracker/releases") {
t.Fatalf("hints missing upstream URL; got %q", joined)
}
if tc.wantNone {
// Distro-specific hint must NOT be present — only the URL.
if len(hints) != 1 {
t.Fatalf("unrecognised distro got distro-specific hint(s); want only the URL line, got %v", hints)
}
return
}
if !strings.Contains(joined, tc.wantSub) {
t.Fatalf("hints %q do not contain expected substring %q", joined, tc.wantSub)
}
if len(hints) < 2 {
t.Fatalf("expected distro hint + URL; got only %v", hints)
}
})
}
}
// firecrackerVersionRunner is a CommandRunner that actually executes
// firecracker --version (via system.Runner) but stubs everything else
// with the permissive default. The doctor uses d.runner for the
// firecracker version query AND for several other checks; this tiny
// dispatcher lets us run a real script for one command without
// rewiring the rest.
type firecrackerVersionRunner struct {
real system.Runner
canned []byte
bin string
}
func (r *firecrackerVersionRunner) Run(ctx context.Context, name string, args ...string) ([]byte, error) {
if name == r.bin {
return r.real.Run(ctx, name, args...)
}
return r.canned, nil
}
func (r *firecrackerVersionRunner) RunSudo(_ context.Context, _ ...string) ([]byte, error) {
return r.canned, nil
}
// stubFirecrackerVersion replaces the test daemon's firecracker
// stub with a script that prints the requested version line, then
// swaps d.runner for one that actually executes the script when the
// firecracker path is queried. Returns the resulting daemon ready
// for doctorReport.
func stubFirecrackerVersion(t *testing.T, d *Daemon, version string) {
t.Helper()
if err := os.WriteFile(d.config.FirecrackerBin, []byte("#!/bin/sh\necho 'Firecracker v"+version+"'\n"), 0o755); err != nil {
t.Fatalf("write firecracker stub: %v", err)
}
d.runner = &firecrackerVersionRunner{
real: system.NewRunner(),
canned: []byte("default via 10.0.0.1 dev eth0 proto static\n"),
bin: d.config.FirecrackerBin,
}
}
// TestFirecrackerVersionCheckPasses pins the happy path: when the
// configured firecracker reports a tested-range version, doctor
// emits a PASS row.
func TestFirecrackerVersionCheckPasses(t *testing.T) {
d := buildDoctorDaemon(t)
stubFirecrackerVersion(t, d, firecracker.KnownTestedVersion)
report := d.doctorReport(context.Background(), nil, false)
check := findCheck(report, "firecracker binary")
if check == nil {
t.Fatal("firecracker binary check missing from report")
}
if check.Status != system.CheckStatusPass {
t.Fatalf("status = %q, want pass; details=%v", check.Status, check.Details)
}
}
// TestFirecrackerVersionCheckFailsBelowMin pins the too-old path:
// a binary reporting a version below MinSupportedVersion must FAIL
// with the upgrade hint.
func TestFirecrackerVersionCheckFailsBelowMin(t *testing.T) {
d := buildDoctorDaemon(t)
stubFirecrackerVersion(t, d, "0.25.0")
report := d.doctorReport(context.Background(), nil, false)
check := findCheck(report, "firecracker binary")
if check == nil {
t.Fatal("firecracker binary check missing from report")
}
if check.Status != system.CheckStatusFail {
t.Fatalf("status = %q, want fail for below-min version", check.Status)
}
}
// TestFirecrackerVersionCheckWarnsAboveTested pins the over-tested
// path: a binary reporting a version newer than KnownTestedVersion
// must WARN — newer firecracker usually works, but it's outside the
// tested window.
func TestFirecrackerVersionCheckWarnsAboveTested(t *testing.T) {
d := buildDoctorDaemon(t)
stubFirecrackerVersion(t, d, "99.0.0")
report := d.doctorReport(context.Background(), nil, false)
check := findCheck(report, "firecracker binary")
if check == nil {
t.Fatal("firecracker binary check missing from report")
}
if check.Status != system.CheckStatusWarn {
t.Fatalf("status = %q, want warn for above-tested version", check.Status)
}
}
func TestDoctorReport_IncludesEveryDefaultCapability(t *testing.T) {
d := buildDoctorDaemon(t)
report := d.doctorReport(context.Background(), nil, false)
// Every registered capability that implements doctorCapability must
// contribute a check. Current defaults: work-disk, dns, nat. If a
// capability is added later it should either extend this list or
// register its own check name — either way, the assertion makes
// the contract visible.
for _, name := range []string{
"feature /root work disk",
"feature vm dns",
"feature nat",
} {
if findCheck(report, name) == nil {
t.Errorf("capability check %q missing from report", name)
}
}
}
func TestDoctorReport_EmitsVMDefaultsProvenance(t *testing.T) {
d := buildDoctorDaemon(t)
report := d.doctorReport(context.Background(), nil, false)
check := findCheck(report, "vm defaults")
if check == nil {
t.Fatal("vm defaults check missing from report")
}
if check.Status != system.CheckStatusPass {
t.Fatalf("vm defaults status = %q, want pass (this is an always-pass informational check)", check.Status)
}
joined := strings.Join(check.Details, "\n")
for _, needle := range []string{"vcpu:", "memory:", "disk:"} {
if !strings.Contains(joined, needle) {
t.Errorf("vm defaults details missing %q; got:\n%s", needle, joined)
}
}
}