banger/internal/smoketest/scenarios_global_test.go
2026-05-01 19:34:44 -03:00

368 lines
13 KiB
Go

//go:build smoke
package smoketest
import (
"os/exec"
"regexp"
"strings"
"testing"
)
// testInvalidSpec is the Go port of scenario_invalid_spec. Asserts that
// `vm run --rm --vcpu 0 ...` is rejected and that no VM row is leaked
// in the process. Global-class because it asserts on host-wide vm-list
// counts; running concurrently with pure-class VM creation would race.
func testInvalidSpec(t *testing.T) {
preCount := vmListAllCount(t)
res := banger(t, "vm", "run", "--rm", "--vcpu", "0", "--", "echo", "unused")
if res.rc == 0 {
t.Fatalf("invalid spec: vm run unexpectedly succeeded with --vcpu 0\nstdout: %s\nstderr: %s",
res.stdout, res.stderr)
}
postCount := vmListAllCount(t)
if preCount != postCount {
t.Fatalf("invalid spec leaked a VM row: pre=%d, post=%d", preCount, postCount)
}
}
// vmListAllCount returns the line count of `banger vm list --all`.
// Mirrors the bash `vm list --all | wc -l` idiom; the absolute count
// doesn't matter, only that it doesn't change across the rejected
// invocation.
func vmListAllCount(t *testing.T) int {
t.Helper()
out := mustBanger(t, "vm", "list", "--all")
return strings.Count(out, "\n")
}
// testVMPrune ports scenario_vm_prune. `vm prune -f` should remove
// stopped VMs while preserving running ones. Global-class because it
// asserts on host-wide vm-list contents.
func testVMPrune(t *testing.T) {
mustBanger(t, "vm", "create", "--name", "smoke-prune-running")
t.Cleanup(func() { vmDelete("smoke-prune-running") })
mustBanger(t, "vm", "create", "--name", "smoke-prune-stopped")
t.Cleanup(func() { vmDelete("smoke-prune-stopped") })
mustBanger(t, "vm", "stop", "smoke-prune-stopped")
mustBanger(t, "vm", "prune", "-f")
if banger(t, "vm", "show", "smoke-prune-running").rc != 0 {
t.Fatalf("vm prune: running VM was deleted (regression!)")
}
if banger(t, "vm", "show", "smoke-prune-stopped").rc == 0 {
t.Fatalf("vm prune: stopped VM survived prune")
}
}
// guestIPRE captures `"guest_ip": "172.16.0.X"` from `vm show` JSON.
// Used by testNAT to map VMs to their POSTROUTING rule subjects.
var guestIPRE = regexp.MustCompile(`"guest_ip":\s*"([^"]+)"`)
// vmGuestIP returns the guest_ip field from `vm show`. Fatals if
// missing — every running VM has one.
func vmGuestIP(t *testing.T, name string) string {
t.Helper()
show := mustBanger(t, "vm", "show", name)
m := guestIPRE.FindStringSubmatch(show)
if len(m) != 2 {
t.Fatalf("could not read guest_ip from vm show %q:\n%s", name, show)
}
return m[1]
}
// testNAT ports scenario_nat. Verifies that `--nat` installs a per-VM
// MASQUERADE rule, that the rule survives stop/start, and that delete
// cleans it up. The control VM (no --nat) must NOT have a rule.
func testNAT(t *testing.T) {
requireSudoIptables(t)
mustBanger(t, "vm", "create", "--name", "smoke-nat", "--nat")
t.Cleanup(func() { vmDelete("smoke-nat") })
mustBanger(t, "vm", "create", "--name", "smoke-nocnat")
t.Cleanup(func() { vmDelete("smoke-nocnat") })
natIP := vmGuestIP(t, "smoke-nat")
ctlIP := vmGuestIP(t, "smoke-nocnat")
postrouting := iptablesPostrouting(t)
natRule := "-s " + natIP + "/32"
if !strings.Contains(postrouting, natRule) || !strings.Contains(postrouting, "MASQUERADE") {
t.Fatalf("NAT: --nat VM has no POSTROUTING MASQUERADE rule for %s; got:\n%s", natIP, postrouting)
}
if strings.Contains(postrouting, "-s "+ctlIP+"/32") {
t.Fatalf("NAT: control VM unexpectedly has a MASQUERADE rule for %s", ctlIP)
}
mustBanger(t, "vm", "stop", "smoke-nat")
mustBanger(t, "vm", "start", "smoke-nat")
postrouting = iptablesPostrouting(t)
count := strings.Count(postrouting, natRule)
if count != 1 {
t.Fatalf("NAT: MASQUERADE rule count for %s = %d after restart, want 1", natIP, count)
}
mustBanger(t, "vm", "delete", "smoke-nat")
mustBanger(t, "vm", "delete", "smoke-nocnat")
postrouting = iptablesPostrouting(t)
if strings.Contains(postrouting, natRule) {
t.Fatalf("NAT: delete left a MASQUERADE rule behind for %s", natIP)
}
}
func iptablesPostrouting(t *testing.T) string {
t.Helper()
out, err := exec.Command("sudo", "-n", "iptables", "-t", "nat", "-S", "POSTROUTING").Output()
if err != nil {
t.Fatalf("read iptables POSTROUTING: %v", err)
}
return string(out)
}
// testInvalidName ports scenario_invalid_name. A handful of malformed
// names must all be rejected and none of them may leak a VM row.
func testInvalidName(t *testing.T) {
preCount := vmListAllCount(t)
for _, bad := range []string{"MyBox", "my box", "box.vm", "-box"} {
res := banger(t, "vm", "create", "--name", bad, "--no-start")
if res.rc == 0 {
t.Fatalf("invalid name: vm create accepted %q", bad)
}
}
if postCount := vmListAllCount(t); postCount != preCount {
t.Fatalf("invalid name leaked VM row(s): pre=%d, post=%d", preCount, postCount)
}
}
// updateBaseArgs are the manifest/pubkey flags every update scenario
// needs to redirect the updater away from the production R2 bucket
// and at our smoke release server. Built lazily because manifestURL /
// pubkeyFile are populated by prepareSmokeReleases.
func updateBaseArgs() []string {
return []string{"--manifest-url", manifestURL, "--pubkey-file", pubkeyFile}
}
// testUpdateCheck ports scenario_update_check. `update --check` must
// succeed against the smoke release server and announce the available
// version on stdout.
func testUpdateCheck(t *testing.T) {
if err := prepareSmokeReleases(); err != nil {
t.Fatalf("prepare smoke releases: %v", err)
}
args := append([]string{"update", "--check"}, updateBaseArgs()...)
res := banger(t, args...)
if res.rc != 0 {
t.Fatalf("update --check failed: rc=%d\nstdout: %s\nstderr: %s",
res.rc, res.stdout, res.stderr)
}
wantContains(t, res.stdout+res.stderr, "update available: ", "update --check stdout")
}
// testUpdateToUnknown ports scenario_update_to_unknown. Asking for a
// version not in the manifest must fail before any host mutation —
// the installed binary's version stays put.
func testUpdateToUnknown(t *testing.T) {
if err := prepareSmokeReleases(); err != nil {
t.Fatalf("prepare smoke releases: %v", err)
}
preVer := installedVersion(t)
args := append([]string{"update", "--to", "v9.9.9"}, updateBaseArgs()...)
res := banger(t, args...)
if res.rc == 0 {
t.Fatalf("update --to v9.9.9: exit 0 (out: %s%s)", res.stdout, res.stderr)
}
combined := strings.ToLower(res.stdout + res.stderr)
if !strings.Contains(combined, "not found") {
t.Fatalf("update --to v9.9.9: error doesn't say 'not found'; got: %s%s", res.stdout, res.stderr)
}
if postVer := installedVersion(t); preVer != postVer {
t.Fatalf("update --to v9.9.9 mutated the install: %s -> %s", preVer, postVer)
}
}
// testUpdateNoRoot ports scenario_update_no_root. Non-sudo invocation
// of `update --to` must refuse with a root-required error and leave
// the install untouched.
func testUpdateNoRoot(t *testing.T) {
if err := prepareSmokeReleases(); err != nil {
t.Fatalf("prepare smoke releases: %v", err)
}
preVer := installedVersion(t)
args := append([]string{"update", "--to", smokeReleaseGood}, updateBaseArgs()...)
res := banger(t, args...)
if res.rc == 0 {
t.Fatalf("update without sudo: exit 0 (out: %s%s)", res.stdout, res.stderr)
}
combined := strings.ToLower(res.stdout + res.stderr)
if !strings.Contains(combined, "root") {
t.Fatalf("update without sudo: error doesn't mention root; got: %s%s", res.stdout, res.stderr)
}
if postVer := installedVersion(t); preVer != postVer {
t.Fatalf("update without sudo mutated the install: %s -> %s", preVer, postVer)
}
}
// testUpdateDryRun ports scenario_update_dry_run. `--dry-run` fetches
// + verifies the new release but must not swap the binary.
func testUpdateDryRun(t *testing.T) {
requirePasswordlessSudo(t)
if err := prepareSmokeReleases(); err != nil {
t.Fatalf("prepare smoke releases: %v", err)
}
preVer := installedVersion(t)
args := append([]string{"update", "--to", smokeReleaseGood, "--dry-run"}, updateBaseArgs()...)
res := sudoBanger(t, args...)
if res.rc != 0 {
t.Fatalf("update --dry-run failed: %s%s", res.stdout, res.stderr)
}
wantContains(t, res.stdout+res.stderr, "dry-run:", "update --dry-run stdout")
if postVer := installedVersion(t); preVer != postVer {
t.Fatalf("update --dry-run swapped the binary: %s -> %s", preVer, postVer)
}
}
// vmBootID reads /proc/sys/kernel/random/boot_id from the guest. The
// kernel regenerates it on every boot, so an unchanged value across a
// daemon restart proves the firecracker process survived. Used by both
// update scenarios that assert "the VM stays alive".
func vmBootID(t *testing.T, name string) string {
t.Helper()
out, _ := exec.Command(bangerBin, "vm", "ssh", name, "--", "cat", "/proc/sys/kernel/random/boot_id").Output()
return strings.TrimSpace(string(out))
}
var installTomlVersionRE = regexp.MustCompile(`(?m)^version\s*=\s*"([^"]+)"`)
// installedTomlVersion reads /etc/banger/install.toml's version field
// (under sudo since the dir is not always world-readable).
func installedTomlVersion(t *testing.T) string {
t.Helper()
out, err := exec.Command("sudo", "cat", "/etc/banger/install.toml").Output()
if err != nil {
t.Fatalf("read /etc/banger/install.toml: %v", err)
}
m := installTomlVersionRE.FindStringSubmatch(string(out))
if len(m) != 2 {
t.Fatalf("install.toml: no version field in:\n%s", out)
}
return m[1]
}
// testUpdateKeepsVMAlive ports scenario_update_keeps_vm_alive. The
// long-running update scenario: a real swap to v0.smoke.0, must not
// reboot the running VM, must update the install metadata, and the VM
// must still answer SSH afterwards.
func testUpdateKeepsVMAlive(t *testing.T) {
requirePasswordlessSudo(t)
if err := prepareSmokeReleases(); err != nil {
t.Fatalf("prepare smoke releases: %v", err)
}
const name = "smoke-update"
vmCreate(t, name)
waitForSSH(t, name)
preBoot := vmBootID(t, name)
if preBoot == "" {
t.Fatalf("pre-update boot_id capture failed")
}
preVer := installedVersion(t)
args := append([]string{"update", "--to", smokeReleaseGood}, updateBaseArgs()...)
if res := sudoBanger(t, args...); res.rc != 0 {
t.Fatalf("update --to %s failed: %s%s", smokeReleaseGood, res.stdout, res.stderr)
}
postVer := installedVersion(t)
if postVer != smokeReleaseGood {
t.Fatalf("post-update /usr/local/bin/banger version = %s, want %s", postVer, smokeReleaseGood)
}
if preVer == postVer {
t.Fatalf("update did not change the binary version (pre==post=%s)", postVer)
}
if metaVer := installedTomlVersion(t); metaVer != smokeReleaseGood {
t.Fatalf("install.toml version = %q, want %s", metaVer, smokeReleaseGood)
}
waitForSSH(t, name)
postBoot := vmBootID(t, name)
if postBoot == "" {
t.Fatalf("post-update boot_id read failed")
}
if preBoot != postBoot {
t.Fatalf("VM rebooted during update: boot_id %s -> %s", preBoot, postBoot)
}
}
// testUpdateRollbackKeepsVMAlive ports scenario_update_rollback_keeps_vm_alive.
// Rollback drill: install the broken-bangerd release, which passes the
// pre-swap migration sanity but fails as a service. runUpdate's
// rollbackAndWrap must restore the previous binaries, and the VM must
// survive the whole drill.
func testUpdateRollbackKeepsVMAlive(t *testing.T) {
requirePasswordlessSudo(t)
if err := prepareSmokeReleases(); err != nil {
t.Fatalf("prepare smoke releases: %v", err)
}
preVer := installedVersion(t)
const name = "smoke-rollback"
vmCreate(t, name)
waitForSSH(t, name)
preBoot := vmBootID(t, name)
if preBoot == "" {
t.Fatalf("pre-drill boot_id capture failed")
}
args := append([]string{"update", "--to", smokeReleaseBroken}, updateBaseArgs()...)
res := sudoBanger(t, args...)
if res.rc == 0 {
t.Fatalf("rollback drill: update returned exit 0 despite broken bangerd\nstdout: %s\nstderr: %s",
res.stdout, res.stderr)
}
if postVer := installedVersion(t); postVer != preVer {
t.Fatalf("rollback drill: post-rollback version = %s, want %s", postVer, preVer)
}
waitForSSH(t, name)
postBoot := vmBootID(t, name)
if postBoot == "" {
t.Fatalf("post-rollback boot_id read failed")
}
if preBoot != postBoot {
t.Fatalf("VM rebooted during rollback drill: boot_id %s -> %s", preBoot, postBoot)
}
}
// testDaemonAdmin ports scenario_daemon_admin. MUST be the last global
// scenario in the run order: `banger daemon stop` tears the installed
// services down, so anything after it that talks to the daemon would
// fail. The teardown path re-stops idempotently.
func testDaemonAdmin(t *testing.T) {
socket := strings.TrimSpace(mustBanger(t, "daemon", "socket"))
if socket != "/run/banger/bangerd.sock" {
t.Fatalf("daemon socket: got %q, want /run/banger/bangerd.sock", socket)
}
migOut, err := exec.Command(bangerdBin, "--system", "--check-migrations").CombinedOutput()
if err != nil {
t.Fatalf("bangerd --check-migrations: %v\n%s", err, migOut)
}
if !strings.HasPrefix(strings.TrimSpace(string(migOut)), "compatible:") {
t.Fatalf("bangerd --check-migrations: stdout missing 'compatible:' prefix; got: %s", migOut)
}
requirePasswordlessSudo(t)
if res := sudoBanger(t, "daemon", "stop"); res.rc != 0 {
t.Fatalf("banger daemon stop: %s%s", res.stdout, res.stderr)
}
status, _ := exec.Command(bangerBin, "system", "status").Output()
if !regexp.MustCompile(`(?m)^active\s+inactive`).Match(status) {
t.Fatalf("owner daemon still active after daemon stop:\n%s", status)
}
if !regexp.MustCompile(`(?m)^helper_active\s+inactive`).Match(status) {
t.Fatalf("root helper still active after daemon stop:\n%s", status)
}
}