The unit + integration tests can't cross machine.Start — the SDK
boundary would need a fake firecracker that reimplements the
control-plane HTTP API, and the ongoing maintenance cost of keeping
that fake honest with upstream kills the value. Instead, add a
pre-release smoke target that drives REAL Firecracker + real KVM,
captures coverage from the -cover-instrumented binaries, and
surfaces per-package deltas so regressions in the boot path don't
ship silently.
scripts/smoke.sh:
- Isolated XDG_{CONFIG,STATE,CACHE,RUNTIME} so the smoke run can't
touch real user state (state/cache persist under build/smoke/xdg
for fast reruns; runtime is mktemp'd fresh per-run because
sockets can't be reused)
- Preflight: `banger doctor` must pass; UDP :42069 must be free
(otherwise the user's real daemon is up and the smoke daemon
can't bind its DNS listener — fail with an actionable message)
- Scenario 1 — bare: `banger vm run --rm -- echo smoke-bare-ok`
exercises create → start → socket ownership chown → machine.Start
→ SDK waitForSocket race → vsock agent readiness → guest SSH
wait → exec → cleanup → delete
- Scenario 2 — workspace: creates a throwaway git repo, runs
`banger vm run --rm <repo> -- cat /root/repo/smoke-file.txt`,
verifies the tracked file reached the guest (exercises
workDisk capability PrepareHost + workspace.prepare)
- `banger daemon stop` at the end so instrumented binaries flush
GOCOVERDIR pods before the script exits
Makefile additions:
- smoke-build: builds banger/bangerd under build/smoke/bin/ with
`go build -cover`
- smoke: runs the script with GOCOVERDIR set, reports per-package
coverage via `go tool covdata percent`
- smoke-coverage-html: textfmt + go tool cover for a browsable
report
- smoke-clean: nukes build/smoke/ including the persisted XDG
state
Bonus fix uncovered during the first smoke run: doctor treated a
missing state.db as a FAIL ("out of memory" from SQLite
SQLITE_CANTOPEN), which red-flagged every fresh install. Split
the store check: DB file absent → PASS with "will be created on
first daemon start" detail; DB present but unreadable → FAIL as
before. New TestDoctorReport_StoreMissingSurfacesAsPassForFreshInstall
pins the behaviour.
Concrete coverage delta from the first successful smoke run
(compared to `make coverage-total`'s unit-test-only 37.8%):
internal/firecracker 43.6% → 75.0%
internal/daemon/workspace 33.8% → 60.8%
internal/store 40.1% → 56.3%
internal/guest 63.7% → 57.4% (different mix: smoke
exercises real SSH;
unit tests cover more
error branches)
The packages the review flagged are the ones that moved most —
which is the point.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
213 lines
7.1 KiB
Go
213 lines
7.1 KiB
Go
package daemon
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"testing"
|
|
|
|
"banger/internal/model"
|
|
"banger/internal/paths"
|
|
"banger/internal/system"
|
|
)
|
|
|
|
// permissiveRunner satisfies system.CommandRunner by returning a
|
|
// configurable response for every call. Doctor tests don't care about
|
|
// the exact ip/iptables commands run — they care that the aggregated
|
|
// report surfaces each feature check correctly, so a one-size runner
|
|
// keeps the test prelude short.
|
|
type permissiveRunner struct {
|
|
out []byte
|
|
err error
|
|
}
|
|
|
|
func (r *permissiveRunner) Run(_ context.Context, _ string, _ ...string) ([]byte, error) {
|
|
return r.out, r.err
|
|
}
|
|
|
|
func (r *permissiveRunner) RunSudo(_ context.Context, _ ...string) ([]byte, error) {
|
|
return r.out, r.err
|
|
}
|
|
|
|
// buildDoctorDaemon stands up a Daemon the way doctorReport expects:
|
|
// fake PATH with every tool the preflights look for, fake firecracker
|
|
// + vsock companion binaries, fake vsock host device file, and a
|
|
// permissive runner that claims a default-route via eth0 so NAT's
|
|
// defaultUplink call succeeds. Returns the wired *Daemon.
|
|
func buildDoctorDaemon(t *testing.T) *Daemon {
|
|
t.Helper()
|
|
binDir := t.TempDir()
|
|
for _, name := range []string{
|
|
"sudo", "ip", "dmsetup", "losetup", "blockdev", "truncate", "pgrep",
|
|
"chown", "chmod", "kill", "e2cp", "e2rm", "debugfs",
|
|
"iptables", "sysctl", "mkfs.ext4", "mount", "umount", "cp",
|
|
} {
|
|
writeFakeExecutable(t, filepath.Join(binDir, name))
|
|
}
|
|
t.Setenv("PATH", binDir)
|
|
|
|
firecrackerBin := filepath.Join(t.TempDir(), "firecracker")
|
|
if err := os.WriteFile(firecrackerBin, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil {
|
|
t.Fatalf("write firecracker: %v", err)
|
|
}
|
|
vsockHelper := filepath.Join(t.TempDir(), "banger-vsock-agent")
|
|
if err := os.WriteFile(vsockHelper, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil {
|
|
t.Fatalf("write vsock helper: %v", err)
|
|
}
|
|
t.Setenv("BANGER_VSOCK_AGENT_BIN", vsockHelper)
|
|
|
|
sshKey := filepath.Join(t.TempDir(), "id_ed25519")
|
|
if err := os.WriteFile(sshKey, []byte("unused"), 0o600); err != nil {
|
|
t.Fatalf("write ssh key: %v", err)
|
|
}
|
|
|
|
vsockHostDevice := filepath.Join(t.TempDir(), "vhost-vsock")
|
|
if err := os.WriteFile(vsockHostDevice, []byte{}, 0o644); err != nil {
|
|
t.Fatalf("write vsock host device: %v", err)
|
|
}
|
|
|
|
runner := &permissiveRunner{out: []byte("default via 10.0.0.1 dev eth0 proto static\n")}
|
|
|
|
d := &Daemon{
|
|
layout: paths.Layout{
|
|
ConfigDir: t.TempDir(),
|
|
StateDir: t.TempDir(),
|
|
DBPath: filepath.Join(t.TempDir(), "state.db"),
|
|
},
|
|
config: model.DaemonConfig{
|
|
FirecrackerBin: firecrackerBin,
|
|
SSHKeyPath: sshKey,
|
|
BridgeName: model.DefaultBridgeName,
|
|
BridgeIP: model.DefaultBridgeIP,
|
|
StatsPollInterval: model.DefaultStatsPollInterval,
|
|
},
|
|
runner: runner,
|
|
}
|
|
wireServices(d)
|
|
d.vm.vsockHostDevice = vsockHostDevice
|
|
// HostNetwork defaults its own runner to the one on the struct, but
|
|
// wireServices only copies the Daemon's runner if d.net is nil
|
|
// before that call — in this test we constructed d.net implicitly,
|
|
// so belt-and-braces the permissive runner onto HostNetwork too.
|
|
d.net.runner = runner
|
|
return d
|
|
}
|
|
|
|
// findCheck returns the first CheckResult with the given name, or nil
|
|
// if no such check was emitted. The test helper rather than a method
|
|
// on Report so the field scope stays tight.
|
|
func findCheck(report system.Report, name string) *system.CheckResult {
|
|
for i := range report.Checks {
|
|
if report.Checks[i].Name == name {
|
|
return &report.Checks[i]
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func TestDoctorReport_StoreErrorSurfacesAsFail(t *testing.T) {
|
|
d := buildDoctorDaemon(t)
|
|
report := d.doctorReport(context.Background(), errors.New("simulated open failure"), false)
|
|
|
|
check := findCheck(report, "state store")
|
|
if check == nil {
|
|
t.Fatal("state store check missing from report")
|
|
}
|
|
if check.Status != system.CheckStatusFail {
|
|
t.Fatalf("state store status = %q, want fail (store error should surface)", check.Status)
|
|
}
|
|
joined := strings.Join(check.Details, " ")
|
|
if !strings.Contains(joined, "simulated open failure") {
|
|
t.Fatalf("state store details = %q, want the storeErr message", joined)
|
|
}
|
|
}
|
|
|
|
func TestDoctorReport_StoreMissingSurfacesAsPassForFreshInstall(t *testing.T) {
|
|
d := buildDoctorDaemon(t)
|
|
// Fresh install: the DB file simply doesn't exist yet. doctor must
|
|
// not treat that as a failure — nothing's broken, the first daemon
|
|
// start will create the file. The status message should say so,
|
|
// so a user running `banger doctor` before ever booting a VM
|
|
// doesn't see a scary red check.
|
|
report := d.doctorReport(context.Background(), nil, true)
|
|
|
|
check := findCheck(report, "state store")
|
|
if check == nil {
|
|
t.Fatal("state store check missing from report")
|
|
}
|
|
if check.Status != system.CheckStatusPass {
|
|
t.Fatalf("state store status = %q, want pass for a missing DB on fresh install", check.Status)
|
|
}
|
|
joined := strings.Join(check.Details, " ")
|
|
if !strings.Contains(joined, "will be created") {
|
|
t.Fatalf("state store details = %q, want mention of 'will be created' so users know this is expected", joined)
|
|
}
|
|
}
|
|
|
|
func TestDoctorReport_StoreSuccessSurfacesAsPass(t *testing.T) {
|
|
d := buildDoctorDaemon(t)
|
|
report := d.doctorReport(context.Background(), nil, false)
|
|
|
|
check := findCheck(report, "state store")
|
|
if check == nil {
|
|
t.Fatal("state store check missing from report")
|
|
}
|
|
if check.Status != system.CheckStatusPass {
|
|
t.Fatalf("state store status = %q, want pass", check.Status)
|
|
}
|
|
}
|
|
|
|
func TestDoctorReport_MissingFirecrackerFailsHostRuntime(t *testing.T) {
|
|
d := buildDoctorDaemon(t)
|
|
d.config.FirecrackerBin = filepath.Join(t.TempDir(), "does-not-exist")
|
|
|
|
report := d.doctorReport(context.Background(), nil, false)
|
|
check := findCheck(report, "host runtime")
|
|
if check == nil {
|
|
t.Fatal("host runtime check missing from report")
|
|
}
|
|
if check.Status != system.CheckStatusFail {
|
|
t.Fatalf("host runtime status = %q, want fail when firecracker binary missing", check.Status)
|
|
}
|
|
}
|
|
|
|
func TestDoctorReport_IncludesEveryDefaultCapability(t *testing.T) {
|
|
d := buildDoctorDaemon(t)
|
|
report := d.doctorReport(context.Background(), nil, false)
|
|
|
|
// Every registered capability that implements doctorCapability must
|
|
// contribute a check. Pre-v0.1 the defaults are work-disk, dns, nat.
|
|
// If a capability is added later it should either extend this list
|
|
// or register its own check name — either way, the assertion makes
|
|
// the contract visible.
|
|
for _, name := range []string{
|
|
"feature /root work disk",
|
|
"feature vm dns",
|
|
"feature nat",
|
|
} {
|
|
if findCheck(report, name) == nil {
|
|
t.Errorf("capability check %q missing from report", name)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestDoctorReport_EmitsVMDefaultsProvenance(t *testing.T) {
|
|
d := buildDoctorDaemon(t)
|
|
report := d.doctorReport(context.Background(), nil, false)
|
|
|
|
check := findCheck(report, "vm defaults")
|
|
if check == nil {
|
|
t.Fatal("vm defaults check missing from report")
|
|
}
|
|
if check.Status != system.CheckStatusPass {
|
|
t.Fatalf("vm defaults status = %q, want pass (this is an always-pass informational check)", check.Status)
|
|
}
|
|
joined := strings.Join(check.Details, "\n")
|
|
for _, needle := range []string{"vcpu:", "memory:", "disk:"} {
|
|
if !strings.Contains(joined, needle) {
|
|
t.Errorf("vm defaults details missing %q; got:\n%s", needle, joined)
|
|
}
|
|
}
|
|
}
|