The unit + integration tests can't cross machine.Start — the SDK
boundary would need a fake firecracker that reimplements the
control-plane HTTP API, and the ongoing maintenance cost of keeping
that fake honest with upstream kills the value. Instead, add a
pre-release smoke target that drives REAL Firecracker + real KVM,
captures coverage from the -cover-instrumented binaries, and
surfaces per-package deltas so regressions in the boot path don't
ship silently.
scripts/smoke.sh:
- Isolated XDG_{CONFIG,STATE,CACHE,RUNTIME} so the smoke run can't
touch real user state (state/cache persist under build/smoke/xdg
for fast reruns; runtime is mktemp'd fresh per-run because
sockets can't be reused)
- Preflight: `banger doctor` must pass; UDP :42069 must be free
(otherwise the user's real daemon is up and the smoke daemon
can't bind its DNS listener — fail with an actionable message)
- Scenario 1 — bare: `banger vm run --rm -- echo smoke-bare-ok`
exercises create → start → socket ownership chown → machine.Start
→ SDK waitForSocket race → vsock agent readiness → guest SSH
wait → exec → cleanup → delete
- Scenario 2 — workspace: creates a throwaway git repo, runs
`banger vm run --rm <repo> -- cat /root/repo/smoke-file.txt`,
verifies the tracked file reached the guest (exercises
workDisk capability PrepareHost + workspace.prepare)
- `banger daemon stop` at the end so instrumented binaries flush
GOCOVERDIR pods before the script exits
Makefile additions:
- smoke-build: builds banger/bangerd under build/smoke/bin/ with
`go build -cover`
- smoke: runs the script with GOCOVERDIR set, reports per-package
coverage via `go tool covdata percent`
- smoke-coverage-html: textfmt + go tool cover for a browsable
report
- smoke-clean: nukes build/smoke/ including the persisted XDG
state
Bonus fix uncovered during the first smoke run: doctor treated a
missing state.db as a FAIL ("out of memory" from SQLite
SQLITE_CANTOPEN), which red-flagged every fresh install. Split
the store check: DB file absent → PASS with "will be created on
first daemon start" detail; DB present but unreadable → FAIL as
before. New TestDoctorReport_StoreMissingSurfacesAsPassForFreshInstall
pins the behaviour.
Concrete coverage delta from the first successful smoke run
(compared to `make coverage-total`'s unit-test-only 37.8%):
internal/firecracker 43.6% → 75.0%
internal/daemon/workspace 33.8% → 60.8%
internal/store 40.1% → 56.3%
internal/guest 63.7% → 57.4% (different mix: smoke
exercises real SSH;
unit tests cover more
error branches)
The packages the review flagged are the ones that moved most —
which is the point.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
213 lines
7.2 KiB
Go
213 lines
7.2 KiB
Go
package daemon
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"runtime"
|
|
"strings"
|
|
|
|
"banger/internal/config"
|
|
"banger/internal/imagecat"
|
|
"banger/internal/model"
|
|
"banger/internal/paths"
|
|
"banger/internal/store"
|
|
"banger/internal/system"
|
|
)
|
|
|
|
func Doctor(ctx context.Context) (system.Report, error) {
|
|
layout, err := paths.Resolve()
|
|
if err != nil {
|
|
return system.Report{}, err
|
|
}
|
|
cfg, err := config.Load(layout)
|
|
if err != nil {
|
|
return system.Report{}, err
|
|
}
|
|
// Doctor must be read-only: running it should never mutate the
|
|
// state DB (no migrations, no WAL checkpoint, no pragma writes).
|
|
// Skip OpenReadOnly entirely when the DB file doesn't exist —
|
|
// that's a fresh install, not an error condition. The first
|
|
// daemon start will create the file. storeMissing differentiates
|
|
// "no DB yet" (pass) from "DB present but unreadable" (fail) in
|
|
// the report.
|
|
d := &Daemon{
|
|
layout: layout,
|
|
config: cfg,
|
|
runner: system.NewRunner(),
|
|
}
|
|
var storeErr error
|
|
storeMissing := false
|
|
if _, statErr := os.Stat(layout.DBPath); statErr != nil {
|
|
if os.IsNotExist(statErr) {
|
|
storeMissing = true
|
|
} else {
|
|
storeErr = statErr
|
|
}
|
|
} else {
|
|
db, err := store.OpenReadOnly(layout.DBPath)
|
|
if err != nil {
|
|
storeErr = err
|
|
} else {
|
|
defer db.Close()
|
|
d.store = db
|
|
}
|
|
}
|
|
wireServices(d)
|
|
return d.doctorReport(ctx, storeErr, storeMissing), nil
|
|
}
|
|
|
|
func (d *Daemon) doctorReport(ctx context.Context, storeErr error, storeMissing bool) system.Report {
|
|
report := system.Report{}
|
|
|
|
addArchitectureCheck(&report)
|
|
|
|
switch {
|
|
case storeMissing:
|
|
report.AddPass("state store", "will be created on first daemon start at "+d.layout.DBPath)
|
|
case storeErr != nil:
|
|
report.AddFail(
|
|
"state store",
|
|
fmt.Sprintf("open %s: %v", d.layout.DBPath, storeErr),
|
|
"remove or restore the file if corrupt; otherwise check its permissions",
|
|
)
|
|
default:
|
|
report.AddPass("state store", "readable at "+d.layout.DBPath)
|
|
}
|
|
|
|
report.AddPreflight("host runtime", d.runtimeChecks(), runtimeStatus(d.config))
|
|
report.AddPreflight("core vm lifecycle", d.coreVMLifecycleChecks(), "required host tools available")
|
|
report.AddPreflight("vsock guest agent", d.vsockChecks(), "vsock guest agent prerequisites available")
|
|
d.addVMDefaultsCheck(&report)
|
|
d.addSSHShortcutCheck(&report)
|
|
d.addCapabilityDoctorChecks(ctx, &report)
|
|
|
|
return report
|
|
}
|
|
|
|
// addSSHShortcutCheck surfaces a gentle warning when banger maintains
|
|
// an ssh_config file but the user hasn't wired it into ~/.ssh/config.
|
|
// This is intentionally a warn, not a fail — the shortcut is opt-in
|
|
// convenience and `banger vm ssh` works either way.
|
|
func (d *Daemon) addSSHShortcutCheck(report *system.Report) {
|
|
bangerConfig := BangerSSHConfigPath(d.layout)
|
|
if strings.TrimSpace(bangerConfig) == "" {
|
|
return
|
|
}
|
|
if _, err := os.Stat(bangerConfig); err != nil {
|
|
// No banger ssh_config rendered yet — nothing to include.
|
|
return
|
|
}
|
|
installed, err := UserSSHIncludeInstalled()
|
|
if err != nil {
|
|
report.AddWarn("ssh shortcut", fmt.Sprintf("could not read ~/.ssh/config: %v", err))
|
|
return
|
|
}
|
|
if installed {
|
|
report.AddPass("ssh shortcut", "enabled — `ssh <name>.vm` routes through banger")
|
|
return
|
|
}
|
|
report.AddWarn(
|
|
"ssh shortcut",
|
|
fmt.Sprintf("`ssh <name>.vm` not enabled (opt-in); run `banger ssh-config --install` or add `Include %s` to ~/.ssh/config", bangerConfig),
|
|
)
|
|
}
|
|
|
|
// addArchitectureCheck surfaces a hard-fail when banger is running on
|
|
// a non-amd64 host. Companion binaries are pinned to amd64 in the
|
|
// Makefile, the published kernel catalog ships only x86_64 images, and
|
|
// OCI import pulls linux/amd64 layers. Letting users discover this
|
|
// through cryptic downstream failures is worse than saying it up front.
|
|
func addArchitectureCheck(report *system.Report) {
|
|
if runtime.GOARCH == "amd64" {
|
|
report.AddPass("host architecture", "amd64")
|
|
return
|
|
}
|
|
report.AddFail(
|
|
"host architecture",
|
|
fmt.Sprintf("running on %s; banger today only supports amd64/x86_64 hosts", runtime.GOARCH),
|
|
"companion build, kernel catalog, and OCI import all assume linux/amd64",
|
|
)
|
|
}
|
|
|
|
// addVMDefaultsCheck surfaces the effective VM sizing that `vm run` /
|
|
// `vm create` will apply when the user omits the flags. Shown as a
|
|
// PASS check so it always renders, with per-field provenance
|
|
// (config|auto|builtin) so users can tell what's driving each number.
|
|
func (d *Daemon) addVMDefaultsCheck(report *system.Report) {
|
|
host, err := system.ReadHostResources()
|
|
var cpus int
|
|
var memBytes int64
|
|
if err == nil {
|
|
cpus = host.CPUCount
|
|
memBytes = host.TotalMemoryBytes
|
|
}
|
|
defaults := model.ResolveVMDefaults(d.config.VMDefaults, cpus, memBytes)
|
|
details := []string{
|
|
fmt.Sprintf("vcpu: %d (%s)", defaults.VCPUCount, defaults.VCPUSource),
|
|
fmt.Sprintf("memory: %d MiB (%s)", defaults.MemoryMiB, defaults.MemorySource),
|
|
fmt.Sprintf("disk: %s (%s)", model.FormatSizeBytes(defaults.WorkDiskSizeBytes), defaults.WorkDiskSource),
|
|
"override any of these in ~/.config/banger/config.toml under [vm_defaults]",
|
|
}
|
|
report.AddPass("vm defaults", details...)
|
|
}
|
|
|
|
func (d *Daemon) runtimeChecks() *system.Preflight {
|
|
checks := system.NewPreflight()
|
|
checks.RequireExecutable(d.config.FirecrackerBin, "firecracker binary", `install firecracker or set "firecracker_bin"`)
|
|
checks.RequireFile(d.config.SSHKeyPath, "ssh private key", `set "ssh_key_path" or let banger create its default key`)
|
|
if helper, err := vsockAgentBinary(d.layout); err == nil {
|
|
checks.RequireExecutable(helper, "vsock agent helper", `run 'make build' or reinstall banger`)
|
|
} else {
|
|
checks.Addf("%v", err)
|
|
}
|
|
if d.store != nil && strings.TrimSpace(d.config.DefaultImageName) != "" {
|
|
name := d.config.DefaultImageName
|
|
image, err := d.store.GetImageByName(context.Background(), name)
|
|
if err == nil {
|
|
checks.RequireFile(image.RootfsPath, "default image rootfs", `re-register or rebuild the default image`)
|
|
checks.RequireFile(image.KernelPath, "default image kernel", `re-register or rebuild the default image`)
|
|
if strings.TrimSpace(image.InitrdPath) != "" {
|
|
checks.RequireFile(image.InitrdPath, "default image initrd", `re-register or rebuild the default image`)
|
|
}
|
|
} else if !defaultImageInCatalog(name) {
|
|
checks.Addf("default image %q is not registered and not in the imagecat catalog", name)
|
|
}
|
|
// If the default image isn't local but is cataloged, vm create
|
|
// will auto-pull it on first use — no error to surface.
|
|
}
|
|
return checks
|
|
}
|
|
|
|
func defaultImageInCatalog(name string) bool {
|
|
catalog, err := imagecat.LoadEmbedded()
|
|
if err != nil {
|
|
return false
|
|
}
|
|
_, err = catalog.Lookup(name)
|
|
return err == nil
|
|
}
|
|
|
|
func (d *Daemon) coreVMLifecycleChecks() *system.Preflight {
|
|
checks := system.NewPreflight()
|
|
d.vm.addBaseStartCommandPrereqs(checks)
|
|
return checks
|
|
}
|
|
|
|
func (d *Daemon) vsockChecks() *system.Preflight {
|
|
checks := system.NewPreflight()
|
|
if helper, err := vsockAgentBinary(d.layout); err == nil {
|
|
checks.RequireExecutable(helper, "vsock agent helper", `run 'make build' or reinstall banger`)
|
|
} else {
|
|
checks.Addf("%v", err)
|
|
}
|
|
checks.RequireFile(d.vm.vsockHostDevice, "vsock host device", "load the vhost_vsock kernel module on the host")
|
|
return checks
|
|
}
|
|
|
|
func runtimeStatus(cfg model.DaemonConfig) string {
|
|
if strings.TrimSpace(cfg.FirecrackerBin) == "" {
|
|
return "firecracker not configured"
|
|
}
|
|
return "firecracker and ssh key resolved"
|
|
}
|