smoke: five more scenarios + fix exit-code propagation bug the new ones caught

Five new smoke scenarios layered on top of the existing bare + workspace vm-run tests: - exit-code propagation: `sh -c 'exit 42'` must rc=42 - workspace dry-run: --dry-run lists tracked files without a VM - workspace --include-untracked: opt-in ships files outside the git index (regression guard on the security-default flip from review 4) - concurrent vm runs: two --rm invocations in parallel both succeed (stresses per-VM locks, createVMMu reservation window, tap pool) - invalid spec rejection: --vcpu 0 must fail with no VM row left behind (the "cleanup on partial failure" path the review flagged) The exit-code scenario caught a real bug on first run: `banger vm run --rm -- sh -c 'exit 42'` returned rc=0, not 42. Root cause in internal/cli/ssh.go's sshCommandArgs: extra args were appended to the ssh argv verbatim, relying on ssh(1)'s implicit space-join to deliver the remote command. That works for single tokens (echo hello) but re-tokenises multi-word commands on the remote side: `ssh host sh -c 'exit 42'` becomes remote `sh -c exit 42`, where `42` is $0 for the already-completed `exit`, and the exit code the user asked for is lost. Fix: shell-quote every element of extra (`'sh'` `'-c'` `'exit 42'`) and join them into a single trailing argv entry. ssh's space-join then produces exactly the command the user typed on the remote shell. TestSSHCommandArgs was updated to pin the quoting; the existing TestRunVMRunCommandModePropagatesExitCode test needed a one-word quote tweak (`false` → `'false'`). Smoke run after fix passes all seven scenarios in ~2 min on warm state. cmd/banger coverage jumped to 100% (the invalid-spec scenario hits the error-reporting path that wasn't covered before). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-22 19:37:07 -03:00 · 2026-04-22 19:37:07 -03:00 · 672d7151e9
commit 672d7151e9
parent 5f81332b0a
3 changed files with 94 additions and 4 deletions
--- a/internal/cli/cli_test.go
+++ b/internal/cli/cli_test.go
@ -1037,7 +1037,6 @@ func TestSSHCommandArgs(t *testing.T) {
 		"-o", "PasswordAuthentication=no",
 		"-o", "KbdInteractiveAuthentication=no",
 		"root@172.16.0.2",
 		"--", "uname", "-a",
 	}
 	for _, s := range wantSubstrings {
 		found := false
@ -1052,6 +1051,15 @@ func TestSSHCommandArgs(t *testing.T) {
 		}
 	}
 	// The trailing argument is the user's command, shell-quoted and
 	// joined so ssh(1)'s space-concatenation produces the exact argv
 	// the user typed on the remote shell. Without this, multi-word
 	// args like `sh -c 'exit 42'` re-tokenise on the remote and lose
 	// exit codes.
 	if got, want := args[len(args)-1], `'--' 'uname' '-a'`; got != want {
 		t.Errorf("trailing arg = %q, want %q (ssh needs a single shell-quoted string)", got, want)
 	}
 	// Host-key verification posture: accept-new + a real path into
 	// banger state, not /dev/null.
 	joined := strings.Join(args, " ")
@ -1607,8 +1615,8 @@ func TestRunVMRunCommandModePropagatesExitCode(t *testing.T) {
 	if !errors.As(err, &exitErr) || exitErr.Code != 7 {
 		t.Fatalf("d.runVMRun error = %v, want ExitCodeError{7}", err)
 	}
-	if len(sshArgsSeen) == 0 || sshArgsSeen[len(sshArgsSeen)-1] != "false" {
+	if len(sshArgsSeen) == 0 || sshArgsSeen[len(sshArgsSeen)-1] != "'false'" {
-		t.Fatalf("sshArgsSeen = %v, want trailing command 'false'", sshArgsSeen)
+		t.Fatalf("sshArgsSeen = %v, want trailing shell-quoted command 'false'", sshArgsSeen)
 	}
 	if !strings.Contains(stderr.String(), "[vm run] running command in guest") {
 		t.Fatalf("stderr = %q, want command progress", stderr.String())
--- a/internal/cli/ssh.go
+++ b/internal/cli/ssh.go
@ -91,7 +91,20 @@ func sshCommandArgs(cfg model.DaemonConfig, guestIP string, extra []string) ([]s
 		)
 	}
 	args = append(args, "root@"+guestIP)
-	args = append(args, extra...)
+	// ssh(1) concatenates every argument after the host with spaces
 	// before sending to the remote shell. That means passing extra
 	// args raw — `ssh host sh -c 'exit 42'` — re-tokenises on the
 	// remote side to `sh -c exit 42`, where `42` is $0 for the
 	// already-completed `exit`, and the rc the user asked for is
 	// lost. Shell-quote each element and join them ourselves so the
 	// remote shell sees exactly the argv the user typed locally.
 	if len(extra) > 0 {
 		quoted := make([]string, len(extra))
 		for i, a := range extra {
 			quoted[i] = shellQuote(a)
 		}
 		args = append(args, strings.Join(quoted, " "))
 	}
 	return args, nil
 }
--- a/scripts/smoke.sh
+++ b/scripts/smoke.sh
@ -107,6 +107,75 @@ log "workspace vm run: create + start + workspace prepare + cat guest file + --r
 ws_out="$("$BANGER" vm run --rm "$repodir" -- cat /root/repo/smoke-file.txt)" || die "workspace vm run exit $?"
 grep -q 'smoke-workspace-marker' <<<"$ws_out" || die "workspace vm run didn't ship smoke-file.txt: $ws_out"
 # --- command exit-code propagation ------------------------------------
 # A non-zero exit from the guest command must surface as banger's own
 # exit code. Regressions here are hard to catch any other way — the
 # local Go tests don't cross the SSH boundary, and users expect their
 # CI scripts that wrap `banger vm run` to fail when the thing inside
 # the VM failed.
 log 'exit-code propagation: guest `sh -c "exit 42"` must produce rc=42'
 set +e
 "$BANGER" vm run --rm -- sh -c 'exit 42'
 rc=$?
 set -e
 [[ "$rc" -eq 42 ]] || die "exit-code propagation: got rc=$rc, want 42"
 # --- workspace dry-run (no VM) ----------------------------------------
 # Pure CLI-side path — no VM, no sudo, just the local git inspection
 # against d.repoInspector. Fast; catches regressions in the preview
 # output (file list shape, mode line) that the Go tests already pin
 # but that could still be broken by a client-side wiring change.
 log 'workspace dry-run: list tracked files without creating a VM'
 dry_out="$("$BANGER" vm run --dry-run "$repodir")" || die "dry-run exit $?"
 grep -q 'smoke-file.txt' <<<"$dry_out" || die "dry-run didn't list smoke-file.txt: $dry_out"
 grep -q 'mode: tracked only' <<<"$dry_out" || die "dry-run mode line missing or wrong: $dry_out"
 # --- workspace --include-untracked -----------------------------------
 # The default is tracked-only (review cycle 4). Opt-in must ship
 # untracked files too. Write one, run with --include-untracked, verify
 # it reaches the guest.
 log 'workspace --include-untracked: opt-in ships files outside the git index'
 echo 'untracked-marker' > "$repodir/smoke-untracked.txt"
 inc_out="$("$BANGER" vm run --rm --include-untracked "$repodir" -- cat /root/repo/smoke-untracked.txt)" || die "include-untracked vm run exit $?"
 grep -q 'untracked-marker' <<<"$inc_out" || die "--include-untracked didn't ship the untracked file: $inc_out"
 # Restore repo to tracked-only state for any later scenarios.
 rm -f "$repodir/smoke-untracked.txt"
 # --- concurrent vm runs -----------------------------------------------
 # Stresses per-VM lock scoping, the tap pool warm-up path, and
 # createVMMu's narrow reservation window. Two `vm run --rm` invocations
 # that actually overlap should both succeed. A regression that
 # serialises create path too aggressively would make this slow but
 # still pass; a regression that breaks tap allocation or name
 # uniqueness would fail one of them.
 log 'concurrent vm runs: two --rm invocations must both succeed'
 tmpA="$runtime_dir/concurrent-a.out"
 tmpB="$runtime_dir/concurrent-b.out"
 "$BANGER" vm run --rm -- echo smoke-concurrent-a > "$tmpA" 2>&1 &
 pidA=$!
 "$BANGER" vm run --rm -- echo smoke-concurrent-b > "$tmpB" 2>&1 &
 pidB=$!
 wait "$pidA" || die "concurrent VM A exited non-zero: $(cat "$tmpA")"
 wait "$pidB" || die "concurrent VM B exited non-zero: $(cat "$tmpB")"
 grep -q 'smoke-concurrent-a' "$tmpA" || die "concurrent VM A missing marker: $(cat "$tmpA")"
 grep -q 'smoke-concurrent-b' "$tmpB" || die "concurrent VM B missing marker: $(cat "$tmpB")"
 # --- invalid spec rejection + no artifact leak ------------------------
 # Tests the negative-path create flow: a blatantly invalid VM spec
 # must fail before any VM row is persisted. The review cycle flagged
 # "cleanup on partial failure" as under-tested; this scenario pins
 # that a rejected create doesn't leak a reservation we then have to
 # clean up by hand.
 log 'invalid spec rejection: --vcpu 0 must fail and leave no VM behind'
 pre_vms="$("$BANGER" vm list --all 2>/dev/null | wc -l)"
 set +e
 "$BANGER" vm run --rm --vcpu 0 -- echo unused >/dev/null 2>&1
 rc=$?
 set -e
 [[ "$rc" -ne 0 ]] || die 'invalid spec: vm run succeeded despite --vcpu 0'
 post_vms="$("$BANGER" vm list --all 2>/dev/null | wc -l)"
 [[ "$pre_vms" == "$post_vms" ]] || die "invalid spec leaked a VM row: pre=$pre_vms, post=$post_vms"
 # --- daemon stop (flushes coverage) -----------------------------------
 log 'stopping daemon so instrumented binaries flush coverage'
 "$BANGER" daemon stop >/dev/null 2>&1 || true