package daemon import ( "context" "fmt" "os" "path/filepath" "runtime" "strings" "syscall" "time" "banger/internal/buildinfo" "banger/internal/config" "banger/internal/firecracker" "banger/internal/imagecat" "banger/internal/installmeta" "banger/internal/model" "banger/internal/paths" "banger/internal/store" "banger/internal/system" ) // systemdSystemDir is the path systemd reads enabled units from. Pulled // out as a var (not a const) so the security-posture tests can swap it // for a tempdir without faking /etc/systemd/system on the test host. var systemdSystemDir = "/etc/systemd/system" func Doctor(ctx context.Context) (system.Report, error) { userLayout, err := paths.Resolve() if err != nil { return system.Report{}, err } cfg, err := config.Load(userLayout) if err != nil { return system.Report{}, err } layout := paths.ResolveSystem() // Doctor must be read-only: running it should never mutate the // state DB (no migrations, no WAL checkpoint, no pragma writes). // Skip OpenReadOnly entirely when the DB file doesn't exist — // that's a fresh install, not an error condition. The first // daemon start will create the file. storeMissing differentiates // "no DB yet" (pass) from "DB present but unreadable" (fail) in // the report. d := &Daemon{ layout: layout, userLayout: userLayout, config: cfg, runner: system.NewRunner(), } var storeErr error storeMissing := false if _, statErr := os.Stat(layout.DBPath); statErr != nil { if os.IsNotExist(statErr) { storeMissing = true } else { storeErr = statErr } } else { db, err := store.OpenReadOnly(layout.DBPath) if err != nil { storeErr = err } else { defer db.Close() d.store = db } } wireServices(d) return d.doctorReport(ctx, storeErr, storeMissing), nil } func (d *Daemon) doctorReport(ctx context.Context, storeErr error, storeMissing bool) system.Report { report := system.Report{} addArchitectureCheck(&report) addBangerVersionCheck(&report, installmeta.DefaultPath) switch { case storeMissing: report.AddPass("state store", "will be created on first daemon start at "+d.layout.DBPath) case storeErr != nil: report.AddFail( "state store", fmt.Sprintf("open %s: %v", d.layout.DBPath, storeErr), "remove or restore the file if corrupt; otherwise check its permissions", ) default: report.AddPass("state store", "readable at "+d.layout.DBPath) } report.AddPreflight("host runtime", d.runtimeChecks(), runtimeStatus(d.config)) report.AddPreflight("core vm lifecycle", d.coreVMLifecycleChecks(), "required host tools available") report.AddPreflight("vsock guest agent", d.vsockChecks(), "vsock guest agent prerequisites available") d.addVMDefaultsCheck(&report) d.addSSHShortcutCheck(&report) d.addCapabilityDoctorChecks(ctx, &report) d.addFirecrackerVersionCheck(ctx, &report) d.addSecurityPostureChecks(ctx, &report) return report } // addFirecrackerVersionCheck verifies the configured firecracker // binary exists, is recent enough for banger's expectations // (firecracker.MinSupportedVersion), and surfaces a distro-aware // install hint if it's missing. Three outcomes: // // - present + version in [Min, Tested]: PASS. // - present + version above Tested: WARN. Newer firecracker // usually works (the API is stable within a major), but it's // outside banger's tested window. // - present + version below Min: FAIL with the upgrade hint. // - missing entirely: FAIL with a guess at the user's package // manager plus the upstream Releases URL. // // We intentionally don't use the generic RequireExecutable preflight // for this check — its static hint string can't carry the distro // dispatch. func (d *Daemon) addFirecrackerVersionCheck(ctx context.Context, report *system.Report) { binPath := strings.TrimSpace(d.config.FirecrackerBin) if binPath == "" { binPath = "firecracker" } resolved, err := system.LookupExecutable(binPath) if err != nil { details := []string{fmt.Sprintf("not found: %s", binPath)} details = append(details, firecrackerInstallHint(osReleaseSource)...) report.AddFail("firecracker binary", details...) return } parsed, err := firecracker.QueryVersion(ctx, d.runner, resolved) if err != nil { report.AddFail("firecracker binary", fmt.Sprintf("`%s --version` failed: %v", resolved, err), "reinstall firecracker; see https://github.com/firecracker-microvm/firecracker/releases") return } reported := parsed.String() min := firecracker.MustParseSemVer(firecracker.MinSupportedVersion) tested := firecracker.MustParseSemVer(firecracker.KnownTestedVersion) switch { case parsed.Compare(min) < 0: report.AddFail("firecracker binary", fmt.Sprintf("%s at %s; banger requires ≥ v%s", reported, resolved, firecracker.MinSupportedVersion), "upgrade firecracker — see https://github.com/firecracker-microvm/firecracker/releases") case parsed.Compare(tested) > 0: report.AddWarn("firecracker binary", fmt.Sprintf("%s at %s (newer than banger's tested v%s; usually works)", reported, resolved, firecracker.KnownTestedVersion)) default: report.AddPass("firecracker binary", fmt.Sprintf("%s at %s (within tested range; min v%s, tested v%s)", reported, resolved, firecracker.MinSupportedVersion, firecracker.KnownTestedVersion)) } } // osReleaseSource is the file the install-hint reads to detect the // host distro. Var rather than const so doctor tests can swap in a // fixture. var osReleaseSource = "/etc/os-release" // firecrackerInstallHint returns 1-2 detail lines describing how to // install firecracker on the current host: a one-line guess based on // /etc/os-release when the distro is recognised, plus the upstream // Releases URL as a universal fallback. Anything we can't recognise // gets only the URL — better silence than wrong instructions. func firecrackerInstallHint(osReleasePath string) []string { hints := []string{} if cmd := guessFirecrackerInstallCommand(osReleasePath); cmd != "" { hints = append(hints, "install: "+cmd) } hints = append(hints, "or download a static binary from https://github.com/firecracker-microvm/firecracker/releases") return hints } // guessFirecrackerInstallCommand reads osReleasePath and returns a // short, copy-pasteable install command for the detected distro, or // "" when no reliable mapping applies. We only suggest commands for // distros where firecracker is actually packaged — guessing wrong // here would send users on a wild goose chase. func guessFirecrackerInstallCommand(osReleasePath string) string { data, err := os.ReadFile(osReleasePath) if err != nil { return "" } id, idLike := parseOSReleaseIDs(string(data)) candidates := append([]string{id}, strings.Fields(idLike)...) for _, c := range candidates { switch c { case "debian": // Packaged in Debian since trixie / bookworm-backports. return "sudo apt install firecracker" case "arch", "manjaro", "endeavouros": // AUR; we don't assume a specific helper, but `paru` is the // common one. Users who prefer yay/makepkg/etc. will // substitute mentally. return "paru -S firecracker # or your preferred AUR helper" case "nixos": return "nix-env -iA nixos.firecracker # or add to your configuration.nix" } } return "" } // parseOSReleaseIDs extracts the ID and ID_LIKE values from an // /etc/os-release blob. Both are returned with surrounding quotes // stripped; missing keys return empty strings. We don't validate // the format beyond `KEY=value` — os-release is a simple format and // any drift would manifest as a quiet "no distro hint" rather than // a false positive. func parseOSReleaseIDs(content string) (id, idLike string) { for _, line := range strings.Split(content, "\n") { line = strings.TrimSpace(line) if rest, ok := strings.CutPrefix(line, "ID="); ok { id = strings.Trim(rest, `"`) } if rest, ok := strings.CutPrefix(line, "ID_LIKE="); ok { idLike = strings.Trim(rest, `"`) } } return id, idLike } // addSecurityPostureChecks verifies the install matches what // docs/privileges.md describes: helper + owner-daemon units active, // sockets at the expected mode/owner, unit files carrying the // hardening directives, and the firecracker binary owned by root + // non-writable. Drift between the doc and the running install would // silently weaken the trust model; surfacing it here makes the doc // load-bearing rather than aspirational. // // In non-system mode (no /etc/banger/install.toml) emits a single // warn pointing at the docs section that explains the looser dev-mode // trust model — a doctor PASS row in that mode would imply guarantees // the install isn't actually providing. func (d *Daemon) addSecurityPostureChecks(ctx context.Context, report *system.Report) { d.addSecurityPostureChecksAt(ctx, report, installmeta.DefaultPath, systemdSystemDir) } // addSecurityPostureChecksAt is the seam tests use: pass a fake // install.toml + systemd dir to exercise the system-mode branch // without writing to /etc. func (d *Daemon) addSecurityPostureChecksAt(ctx context.Context, report *system.Report, installPath, systemdDir string) { meta, err := installmeta.Load(installPath) if err != nil { report.AddWarn("security posture", "running outside the system install (no "+installPath+")", "helper SO_PEERCRED, narrow CapabilityBoundingSet, NoNewPrivileges, and ProtectSystem=strict are bypassed in this mode", "see docs/privileges.md > 'Running outside the system install'; install via `sudo banger system install --owner $USER` for the supported trust model") return } addServiceActiveCheck(ctx, d.runner, report, "helper service", installmeta.DefaultRootHelperService) addServiceActiveCheck(ctx, d.runner, report, "owner daemon service", installmeta.DefaultService) addSocketPermsCheck(report, "helper socket", installmeta.DefaultRootHelperSocketPath, meta.OwnerUID, 0o600) addSocketPermsCheck(report, "daemon socket", installmeta.DefaultSocketPath, meta.OwnerUID, 0o600) addUnitHardeningCheck(report, "helper unit hardening", filepath.Join(systemdDir, installmeta.DefaultRootHelperService), []string{ "NoNewPrivileges=yes", "ProtectSystem=strict", "ProtectHome=yes", "RestrictSUIDSGID=yes", "LockPersonality=yes", "CapabilityBoundingSet=", }) addUnitHardeningCheck(report, "daemon unit hardening", filepath.Join(systemdDir, installmeta.DefaultService), []string{ "User=" + meta.OwnerUser, "NoNewPrivileges=yes", "ProtectSystem=strict", "ProtectHome=read-only", "RestrictSUIDSGID=yes", "LockPersonality=yes", }) addExecutableOwnershipCheck(report, "firecracker binary ownership", d.config.FirecrackerBin) } // addServiceActiveCheck shells `systemctl is-active ` and surfaces // the result. is-active exits non-zero for inactive/failed states but // always prints the state on stdout, so we read the trimmed output and // ignore the exit code. Anything other than "active" is a fail with a // systemctl-restart hint. func addServiceActiveCheck(ctx context.Context, runner system.CommandRunner, report *system.Report, name, service string) { out, _ := runner.Run(ctx, "systemctl", "is-active", service) state := strings.TrimSpace(string(out)) if state == "" { state = "unknown" } if state == "active" { report.AddPass(name, fmt.Sprintf("%s is active", service)) return } report.AddFail(name, fmt.Sprintf("%s is %s, not active", service, state), fmt.Sprintf("run `sudo systemctl restart %s` and re-run `banger doctor`", service)) } // addSocketPermsCheck stat()s the socket path and compares mode + // owner against the values the install promises. Both daemon and // helper sockets are 0600 chowned to the registered owner UID; any // drift means filesystem perms aren't gating access the way the docs // describe. func addSocketPermsCheck(report *system.Report, name, path string, expectedUID int, expectedMode os.FileMode) { info, err := os.Stat(path) if err != nil { report.AddFail(name, fmt.Sprintf("%s: %v", path, err), "is the service running? `sudo systemctl status` and check the runtime dir") return } stat, ok := info.Sys().(*syscall.Stat_t) if !ok { report.AddWarn(name, fmt.Sprintf("%s: cannot read ownership metadata on this platform", path)) return } actualMode := info.Mode().Perm() var problems []string if actualMode != expectedMode { problems = append(problems, fmt.Sprintf("mode is %#o, want %#o", actualMode, expectedMode)) } if int(stat.Uid) != expectedUID { problems = append(problems, fmt.Sprintf("uid is %d, want %d", stat.Uid, expectedUID)) } if len(problems) > 0 { problems = append(problems, "restart the service so the socket gets recreated with correct perms") report.AddFail(name, fmt.Sprintf("%s: %s", path, strings.Join(problems, "; "))) return } report.AddPass(name, fmt.Sprintf("%s: mode %#o, uid %d", path, actualMode, expectedUID)) } // addUnitHardeningCheck reads the systemd unit file and confirms // every required directive is present as a literal substring. Brittle // to formatting changes (a comment-out would slip through), but // strong enough to catch the "someone hand-edited the unit and // dropped NoNewPrivileges" failure mode that motivates this check. // The directives list captures the security-relevant subset of the // renderer in commands_system.go; everything else (Description, // ExecStart, etc.) is operational and not worth pinning here. func addUnitHardeningCheck(report *system.Report, name, path string, required []string) { data, err := os.ReadFile(path) if err != nil { report.AddFail(name, fmt.Sprintf("%s: %v", path, err), "reinstall via `sudo banger system install` to refresh the unit") return } content := string(data) var missing []string for _, directive := range required { if !strings.Contains(content, directive) { missing = append(missing, directive) } } if len(missing) > 0 { report.AddFail(name, fmt.Sprintf("%s missing directives: %s", path, strings.Join(missing, ", ")), "reinstall via `sudo banger system install` to refresh the unit") return } report.AddPass(name, fmt.Sprintf("%s: %d hardening directives present", path, len(required))) } // addExecutableOwnershipCheck mirrors validateRootExecutable's runtime // check at doctor time: regular file, root-owned, executable, not // group/world writable, not a symlink. Doctor catching this once at // install time beats the helper failing every launch with a less // helpful message. func addExecutableOwnershipCheck(report *system.Report, name, path string) { if strings.TrimSpace(path) == "" { report.AddWarn(name, "no firecracker binary path configured") return } info, err := os.Lstat(path) if err != nil { report.AddFail(name, fmt.Sprintf("%s: %v", path, err)) return } if info.Mode()&os.ModeSymlink != 0 { report.AddFail(name, fmt.Sprintf("%s is a symlink", path), "the helper opens the binary with O_NOFOLLOW; resolve the symlink and update firecracker_bin in the daemon config") return } if !info.Mode().IsRegular() { report.AddFail(name, fmt.Sprintf("%s is not a regular file", path)) return } mode := info.Mode().Perm() if mode&0o111 == 0 { report.AddFail(name, fmt.Sprintf("%s mode %#o is not executable", path, mode), "chmod +x the binary") return } if mode&0o022 != 0 { report.AddFail(name, fmt.Sprintf("%s mode %#o is group/world writable", path, mode), "chmod g-w,o-w the binary so the helper accepts it") return } stat, ok := info.Sys().(*syscall.Stat_t) if !ok { report.AddWarn(name, fmt.Sprintf("%s: cannot read ownership metadata on this platform", path)) return } if stat.Uid != 0 { report.AddFail(name, fmt.Sprintf("%s is owned by uid %d, want 0", path, stat.Uid), "`sudo chown root` the firecracker binary") return } report.AddPass(name, fmt.Sprintf("%s: regular, root-owned, mode %#o", path, mode)) } // addSSHShortcutCheck surfaces a gentle warning when banger maintains // an ssh_config file but the user hasn't wired it into ~/.ssh/config. // This is intentionally a warn, not a fail — the shortcut is opt-in // convenience and `banger vm ssh` works either way. func (d *Daemon) addSSHShortcutCheck(report *system.Report) { bangerConfig := BangerSSHConfigPath(d.userLayout) if strings.TrimSpace(bangerConfig) == "" { return } if _, err := os.Stat(bangerConfig); err != nil { // No banger ssh_config rendered yet — nothing to include. return } installed, err := UserSSHIncludeInstalled() if err != nil { report.AddWarn("ssh shortcut", fmt.Sprintf("could not read ~/.ssh/config: %v", err)) return } if installed { report.AddPass("ssh shortcut", "enabled — `ssh .vm` routes through banger") return } report.AddWarn( "ssh shortcut", fmt.Sprintf("`ssh .vm` not enabled (opt-in); run `banger ssh-config --install` or add `Include %s` to ~/.ssh/config", bangerConfig), ) } // addBangerVersionCheck reports the running CLI's version + commit // alongside whatever's recorded in /etc/banger/install.toml. When // the installed copy and the running binary disagree on version or // commit, doctor warns: a stale `banger` running against a freshly- // installed daemon (or vice versa) is the most common version-skew // pitfall, and a one-line warning is friendlier than tracking down // which side is wrong from a launch failure. // // Drift detection is suppressed when EITHER side is "dev"/"unknown" // (untagged build) — those don't have a real version to compare. func addBangerVersionCheck(report *system.Report, installPath string) { cli := buildinfo.Current() cliLine := fmt.Sprintf("CLI %s (commit %s, built %s)", cli.Version, shortCommit(cli.Commit), cli.BuiltAt) meta, err := installmeta.Load(installPath) if err != nil { // Non-system mode (no install.toml). Just report what we have. report.AddPass("banger version", cliLine) return } installLine := fmt.Sprintf("install %s (commit %s, installed %s)", meta.Version, shortCommit(meta.Commit), meta.InstalledAt.Format(time.RFC3339)) if versionsDrift(cli, meta) { report.AddWarn("banger version", cliLine, installLine, "CLI and installed banger disagree; run `sudo banger system install` to refresh, or run the matching CLI binary") return } report.AddPass("banger version", cliLine, installLine+" (matches CLI)") } func versionsDrift(cli buildinfo.Info, meta installmeta.Metadata) bool { // Treat dev/unknown as "no real version on this side" — comparing // a dev build against a tagged install is the local-development // case, not a drift problem worth surfacing. if cli.Version == "dev" || strings.TrimSpace(meta.Version) == "" { return false } if cli.Version != meta.Version { return true } if cli.Commit != "unknown" && strings.TrimSpace(meta.Commit) != "" && cli.Commit != meta.Commit { return true } return false } func shortCommit(c string) string { if len(c) > 8 { return c[:8] } return c } // addArchitectureCheck surfaces a hard-fail when banger is running on // a non-amd64 host. Companion binaries are pinned to amd64 in the // Makefile, the published kernel catalog ships only x86_64 images, and // OCI import pulls linux/amd64 layers. Letting users discover this // through cryptic downstream failures is worse than saying it up front. func addArchitectureCheck(report *system.Report) { if runtime.GOARCH == "amd64" { report.AddPass("host architecture", "amd64") return } report.AddFail( "host architecture", fmt.Sprintf("running on %s; banger today only supports amd64/x86_64 hosts", runtime.GOARCH), "companion build, kernel catalog, and OCI import all assume linux/amd64", ) } // addVMDefaultsCheck surfaces the effective VM sizing that `vm run` / // `vm create` will apply when the user omits the flags. Shown as a // PASS check so it always renders, with per-field provenance // (config|auto|builtin) so users can tell what's driving each number. func (d *Daemon) addVMDefaultsCheck(report *system.Report) { host, err := system.ReadHostResources() var cpus int var memBytes int64 if err == nil { cpus = host.CPUCount memBytes = host.TotalMemoryBytes } defaults := model.ResolveVMDefaults(d.config.VMDefaults, cpus, memBytes) details := []string{ fmt.Sprintf("vcpu: %d (%s)", defaults.VCPUCount, defaults.VCPUSource), fmt.Sprintf("memory: %d MiB (%s)", defaults.MemoryMiB, defaults.MemorySource), fmt.Sprintf("disk: %s (%s)", model.FormatSizeBytes(defaults.WorkDiskSizeBytes), defaults.WorkDiskSource), "override any of these in ~/.config/banger/config.toml under [vm_defaults]", } report.AddPass("vm defaults", details...) } func (d *Daemon) runtimeChecks() *system.Preflight { checks := system.NewPreflight() // Firecracker presence + version is a separate top-level check (see // addFirecrackerVersionCheck) so the report can carry a distro-aware // install hint when the binary is missing — RequireExecutable's // static `hint` string can't do that. checks.RequireFile(d.config.SSHKeyPath, "ssh private key", `set "ssh_key_path" or let banger create its default key`) if helper, err := vsockAgentBinary(d.layout); err == nil { checks.RequireExecutable(helper, "vsock agent helper", `run 'make build' or reinstall banger`) } else { checks.Addf("%v", err) } if d.store != nil && strings.TrimSpace(d.config.DefaultImageName) != "" { name := d.config.DefaultImageName image, err := d.store.GetImageByName(context.Background(), name) if err == nil { checks.RequireFile(image.RootfsPath, "default image rootfs", `re-register or rebuild the default image`) checks.RequireFile(image.KernelPath, "default image kernel", `re-register or rebuild the default image`) if strings.TrimSpace(image.InitrdPath) != "" { checks.RequireFile(image.InitrdPath, "default image initrd", `re-register or rebuild the default image`) } } else if !defaultImageInCatalog(name) { checks.Addf("default image %q is not registered and not in the imagecat catalog", name) } // If the default image isn't local but is cataloged, vm create // will auto-pull it on first use — no error to surface. } return checks } func defaultImageInCatalog(name string) bool { catalog, err := imagecat.LoadEmbedded() if err != nil { return false } _, err = catalog.Lookup(name) return err == nil } func (d *Daemon) coreVMLifecycleChecks() *system.Preflight { checks := system.NewPreflight() d.vm.addBaseStartCommandPrereqs(checks) return checks } func (d *Daemon) vsockChecks() *system.Preflight { checks := system.NewPreflight() if helper, err := vsockAgentBinary(d.layout); err == nil { checks.RequireExecutable(helper, "vsock agent helper", `run 'make build' or reinstall banger`) } else { checks.Addf("%v", err) } checks.RequireFile(d.vm.vsockHostDevice, "vsock host device", "load the vhost_vsock kernel module on the host") return checks } func runtimeStatus(cfg model.DaemonConfig) string { if strings.TrimSpace(cfg.FirecrackerBin) == "" { return "firecracker not configured" } return "firecracker and ssh key resolved" }