diff --git a/CHANGELOG.md b/CHANGELOG.md index e706114..7ada785 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,16 +10,6 @@ changed between versions. ## [Unreleased] -## [v0.1.10] - 2026-05-03 - -### Added - -- README now includes an animated demo GIF showing the typical - sandbox lifecycle (`vm run`, host-side `ssh demo.vm`, stop/start - with file persistence, `vm exec`, `curl http://demo.vm`). The - recording script lives at `assets/demo.tape` and is rendered with - [VHS](https://github.com/charmbracelet/vhs). - ## [v0.1.9] - 2026-05-01 ### Fixed @@ -312,8 +302,7 @@ root filesystem and network, and exits on demand. the swap rather than starting up against an incompatible store. - Linux only. amd64 only. KVM required. -[Unreleased]: https://git.thaloco.com/thaloco/banger/compare/v0.1.10...HEAD -[v0.1.10]: https://git.thaloco.com/thaloco/banger/releases/tag/v0.1.10 +[Unreleased]: https://git.thaloco.com/thaloco/banger/compare/v0.1.9...HEAD [v0.1.9]: https://git.thaloco.com/thaloco/banger/releases/tag/v0.1.9 [v0.1.8]: https://git.thaloco.com/thaloco/banger/releases/tag/v0.1.8 [v0.1.7]: https://git.thaloco.com/thaloco/banger/releases/tag/v0.1.7 diff --git a/README.md b/README.md index ab2a8e6..ce7a310 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,6 @@ One-command development sandboxes on Firecracker microVMs. -![banger demo](assets/banger.gif) - Spin up a clean Linux VM with your repo and tooling preloaded, drop into ssh, and tear it down — all from one command. banger is built for the dev loop, not the server use case: guests are short-lived, diff --git a/assets/banger.gif b/assets/banger.gif deleted file mode 100644 index 2f88c5a..0000000 Binary files a/assets/banger.gif and /dev/null differ diff --git a/assets/demo.tape b/assets/demo.tape deleted file mode 100644 index d68741a..0000000 --- a/assets/demo.tape +++ /dev/null @@ -1,112 +0,0 @@ -# banger hero demo — VHS tape -# Render with: vhs assets/demo.tape - -Output assets/banger.gif - -Require banger -Require ssh -Require curl - -Set Shell "bash" -Set FontSize 14 -Set LineHeight 1.4 -Set Width 1200 -Set Height 720 -Set Padding 20 -Set Theme "Catppuccin Frappe" -Set TypingSpeed 66ms - -# Off-camera reset: enable bash syntax highlighting via ble.sh, prompt -# styling, drop any prior demo VM, and clear the screen. -Hide -Type "source ~/.local/share/blesh/ble.sh --noattach" -Enter -Sleep 200ms -Type "bleopt complete_auto_complete= complete_auto_history=" -Enter -Sleep 100ms -Type `export PS1="\n$PS1"` -Enter -Sleep 200ms -Type "[[ ${BLE_VERSION-} ]] && ble-attach" -Enter -Sleep 400ms -Type "ble-face -s syntax_error fg=red" -Enter -Sleep 100ms -Type "banger vm kill demo 2>/dev/null; banger vm delete demo 2>/dev/null; clear" -Enter -Sleep 500ms -Show - -Type "banger vm run --nat --name demo" -Enter -Wait+Line /demo:~#/ -Sleep 1.4s - -Type "uname -a" -Enter -Sleep 1.4s - -Type "exit" -Enter -Wait -Sleep 700ms - -Type "banger vm list" -Enter -Wait -Sleep 1.8s - -Type "ssh demo.vm" -Enter -Wait+Line /demo:~#/ -Sleep 500ms - -Type "touch foo bar baz" -Enter -Sleep 700ms - -Type "ls" -Enter -Sleep 1.4s - -Type "exit" -Enter -Sleep 700ms - -Type "banger vm stop demo" -Enter -Wait -Sleep 1s - -Type "banger vm start demo" -Enter -Wait -Sleep 1s - -Type "banger vm exec demo -- ls" -Enter -Wait -Sleep 1.4s - -Type "banger vm exec demo -- docker run -d -p 80:80 nginx" -Enter -Wait -Sleep 1.6s - -Type "banger vm ports demo" -Enter -Wait -Sleep 2s - -Type "curl http://demo.vm" -Sleep 1.2s -Enter -Wait -Sleep 4s - -Type "banger vm kill demo && banger vm delete demo" -Enter -Wait -Sleep 3s diff --git a/internal/daemon/capabilities.go b/internal/daemon/capabilities.go index b99ba4a..89fa5e9 100644 --- a/internal/daemon/capabilities.go +++ b/internal/daemon/capabilities.go @@ -247,9 +247,6 @@ func (c workDiskCapability) PrepareHost(ctx context.Context, vm *model.VMRecord, if err := c.ws.ensureAuthorizedKeyOnWorkDisk(ctx, vm, image, prep); err != nil { return err } - if err := c.ws.ensureHushLoginOnWorkDisk(ctx, vm); err != nil { - return err - } if err := c.ws.ensureGitIdentityOnWorkDisk(ctx, vm); err != nil { return err } diff --git a/internal/daemon/tap_pool.go b/internal/daemon/tap_pool.go index d91debf..c0e5f60 100644 --- a/internal/daemon/tap_pool.go +++ b/internal/daemon/tap_pool.go @@ -6,7 +6,6 @@ import ( "strconv" "strings" "sync" - "sync/atomic" ) const tapPoolPrefix = "tap-pool-" @@ -17,16 +16,8 @@ type tapPool struct { mu sync.Mutex entries []string next int - warming bool } -// maxConcurrentTapWarmup caps the number of `priv.create_tap` RPCs the -// warmup loop runs in parallel. Each tap creation is ~4 root-helper -// shell-outs serialized within one RPC handler; running too many at -// once just contends on netlink. 8 is the production sweet spot for -// SMOKE_JOBS=8. -const maxConcurrentTapWarmup = 8 - // initializeTapPool seeds the monotonic pool index from the set of // tap names already in use by running/stopped VMs, so newly warmed // pool entries don't collide with existing ones. Callers (Daemon.Open) @@ -50,23 +41,6 @@ func (n *HostNetwork) ensureTapPool(ctx context.Context) { if n.config.TapPoolSize <= 0 { return } - - // Dedupe concurrent warmup invocations. Releases trigger a fresh - // ensureTapPool in a goroutine; without this, N parallel releases - // would each spin up their own warmup loop racing on n.tapPool.next. - n.tapPool.mu.Lock() - if n.tapPool.warming { - n.tapPool.mu.Unlock() - return - } - n.tapPool.warming = true - n.tapPool.mu.Unlock() - defer func() { - n.tapPool.mu.Lock() - n.tapPool.warming = false - n.tapPool.mu.Unlock() - }() - for { select { case <-ctx.Done(): @@ -77,54 +51,28 @@ func (n *HostNetwork) ensureTapPool(ctx context.Context) { } n.tapPool.mu.Lock() - deficit := n.config.TapPoolSize - len(n.tapPool.entries) - if deficit <= 0 { + if len(n.tapPool.entries) >= n.config.TapPoolSize { n.tapPool.mu.Unlock() return } - batch := deficit - if batch > maxConcurrentTapWarmup { - batch = maxConcurrentTapWarmup - } - // Reserve names up front so concurrent goroutines can't collide - // on n.tapPool.next. - names := make([]string, batch) - for i := range names { - names[i] = fmt.Sprintf("%s%d", tapPoolPrefix, n.tapPool.next) - n.tapPool.next++ - } + tapName := fmt.Sprintf("%s%d", tapPoolPrefix, n.tapPool.next) + n.tapPool.next++ n.tapPool.mu.Unlock() - var ( - wg sync.WaitGroup - progress atomic.Int32 - ) - for _, tapName := range names { - wg.Add(1) - go func(tapName string) { - defer wg.Done() - if err := n.createTap(ctx, tapName); err != nil { - if n.logger != nil { - n.logger.Warn("tap pool warmup failed", "tap_device", tapName, "error", err.Error()) - } - return - } - n.tapPool.mu.Lock() - n.tapPool.entries = append(n.tapPool.entries, tapName) - n.tapPool.mu.Unlock() - progress.Add(1) - if n.logger != nil { - n.logger.Debug("tap added to idle pool", "tap_device", tapName) - } - }(tapName) - } - wg.Wait() - - // Whole batch failed → bail rather than burn names indefinitely - // (the original sequential loop bailed on first error too). - if progress.Load() == 0 { + if err := n.createTap(ctx, tapName); err != nil { + if n.logger != nil { + n.logger.Warn("tap pool warmup failed", "tap_device", tapName, "error", err.Error()) + } return } + + n.tapPool.mu.Lock() + n.tapPool.entries = append(n.tapPool.entries, tapName) + n.tapPool.mu.Unlock() + + if n.logger != nil { + n.logger.Debug("tap added to idle pool", "tap_device", tapName) + } } } diff --git a/internal/daemon/vm_authsync.go b/internal/daemon/vm_authsync.go index 117014a..b4feaaa 100644 --- a/internal/daemon/vm_authsync.go +++ b/internal/daemon/vm_authsync.go @@ -86,15 +86,6 @@ func provisionAuthorizedKey(ctx context.Context, runner system.CommandRunner, im return system.WriteExt4FileOwned(ctx, runner, imagePath, "/.ssh/authorized_keys", 0o600, 0, 0, merged) } -// ensureHushLoginOnWorkDisk lands /root/.hushlogin in the guest by -// writing /.hushlogin at the root of the work disk (which mounts at -// /root inside the guest). pam_motd checks $HOME/.hushlogin and stays -// silent when it exists — combined with sshd's PrintMotd no / PrintLastLog no -// that suppresses the Debian-style banner on `banger vm run`. -func (s *WorkspaceService) ensureHushLoginOnWorkDisk(ctx context.Context, vm *model.VMRecord) error { - return system.WriteExt4FileOwned(ctx, s.runner, vm.Runtime.WorkDiskPath, "/.hushlogin", 0o644, 0, 0, nil) -} - func (s *WorkspaceService) ensureGitIdentityOnWorkDisk(ctx context.Context, vm *model.VMRecord) error { runner := s.runner if runner == nil { diff --git a/internal/daemon/vm_disk.go b/internal/daemon/vm_disk.go index fe5db6d..e86b8b3 100644 --- a/internal/daemon/vm_disk.go +++ b/internal/daemon/vm_disk.go @@ -50,6 +50,11 @@ func (s *VMService) patchRootOverlay(ctx context.Context, vm model.VMRecord, ima builder.WriteFile(guestnet.ConfigPath, guestnet.ConfigFile(vm.Runtime.GuestIP, s.config.BridgeIP, s.config.DefaultDNS)) builder.WriteFile(guestnet.GuestScriptPath, []byte(guestnet.BootstrapScript())) builder.WriteFile("/etc/ssh/sshd_config.d/99-banger.conf", sshdConfig) + // pam_motd reads /etc/motd + /etc/update-motd.d on Debian-family + // guests independent of sshd's PrintMotd. .hushlogin in $HOME tells + // pam_motd to stay quiet for that user — root is the only login on + // banger VMs, so a single file suffices. + builder.WriteFile("/root/.hushlogin", []byte{}) builder.DropMountTarget("/home") builder.DropMountTarget("/var") builder.AddMount(guestconfig.MountSpec{ @@ -164,11 +169,9 @@ func (s *VMService) ensureWorkDisk(ctx context.Context, vm *model.VMRecord, imag // Banger VMs are short-lived sandboxes. The Debian-style MOTD // ("Linux ... GNU/Linux comes with ABSOLUTELY NO WARRANTY …") and // the "Last login" line are pure noise for `vm run -- echo hi` -// style invocations. Pair this with the .hushlogin landed on the -// work disk (see ensureHushLoginOnWorkDisk) so pam_motd also stays -// silent on distros that read /etc/motd through PAM rather than -// sshd. The work disk mounts at /root, so the file has to live on -// that disk — a write to the rootfs overlay would be shadowed. +// style invocations. Pair this with the .hushlogin written below +// so pam_motd also stays silent on distros that read /etc/motd +// through PAM rather than sshd. func sshdGuestConfig() string { return strings.Join([]string{ "PermitRootLogin prohibit-password", diff --git a/internal/daemon/vm_lifecycle.go b/internal/daemon/vm_lifecycle.go index ca0aad7..e759bc6 100644 --- a/internal/daemon/vm_lifecycle.go +++ b/internal/daemon/vm_lifecycle.go @@ -131,27 +131,44 @@ func (s *VMService) stopVMLocked(ctx context.Context, current model.VMRecord) (v } return vm, nil } + pid := s.vmHandles(vm.ID).PID op.stage("graceful_shutdown") - // Reach into the guest over SSH to force a sync + queue a poweroff. - // The sync is what keeps stop() from losing data: every dirty page - // the guest hasn't flushed through virtio-blk to the work disk is - // written out before this RPC returns. Once sync completes, - // root.ext4 on the host is consistent and cleanupRuntime's SIGKILL - // is safe — there is no benefit to waiting for the guest's - // poweroff.target to finish, so we skip waitForExit entirely. + // Reach into the guest over SSH to force a sync + queue a poweroff + // before falling back on FC's SendCtrlAltDel. The sync is what + // keeps stop() from losing data: every dirty page the guest hasn't + // flushed through virtio-blk to the work disk is written out + // before this RPC returns. Without it, files freshly created via + // `vm workspace prepare` can disappear across stop+start, because + // the 10-second wait_for_exit window expires (FC doesn't exit on + // SendCtrlAltDel — Debian routes ctrl-alt-del.target → reboot.target, + // not poweroff) and the fallback SIGKILL drops everything still + // in FC's userspace I/O path. // - // When SSH is unreachable (broken sshd, network down, drifted host - // key) we drop straight to SIGKILL via cleanupRuntime. The - // previous fallback was SendCtrlAltDel + a 10-second wait for FC - // to exit, but on Debian ctrl+alt+del routes to reboot.target, so - // FC never exits on it — the wait was always a wasted 10s. We pay - // the data-loss cost we already paid before (after the timeout - // expired the old code SIGKILLed too), but without the latency. + // `systemctl --no-block poweroff` is queued for the same reason + // SendCtrlAltDel was here originally — it's how stop() asks the + // guest to halt. That request is best-effort; FC may or may not + // exit before the SIGKILL fallback fires. Either way, sync + // already ran, so the on-host root.ext4 is consistent regardless. + // + // SendCtrlAltDel survives as a fallback for guests where SSH + // itself is unreachable (broken sshd, network down, drifted host + // key); it doesn't fix the data-loss path, but it's the existing + // last-resort signal and is at least no worse than today. if err := s.requestGuestPoweroff(ctx, vm); err != nil { if s.logger != nil { - s.logger.Warn("guest ssh poweroff failed; SIGKILL without sync", + s.logger.Warn("guest ssh poweroff failed; falling back to ctrl+alt+del", append(vmLogAttrs(vm), "error", err.Error())...) } + if fallbackErr := s.net.sendCtrlAltDel(ctx, vm.Runtime.APISockPath); fallbackErr != nil { + return model.VMRecord{}, fallbackErr + } + } + op.stage("wait_for_exit", "pid", pid) + if err := s.net.waitForExit(ctx, pid, vm.Runtime.APISockPath, gracefulShutdownWait); err != nil { + if !errors.Is(err, errWaitForExitTimeout) { + return model.VMRecord{}, err + } + op.stage("graceful_shutdown_timeout", "pid", pid) } op.stage("cleanup_runtime") if err := s.cleanupRuntime(ctx, vm, true); err != nil { @@ -173,16 +190,16 @@ func (s *VMService) stopVMLocked(ctx context.Context, current model.VMRecord) (v // comment in stopVMLocked. Returns the dial / SSH error if the guest // is unreachable; the caller treats that as a fallback signal. // -// Bounded by a hard 2-second SSH-dial timeout. A reachable guest on -// the host bridge dials in single-digit milliseconds; if we haven't -// connected in 2s the guest is effectively gone, so we fail fast and -// let the caller SIGKILL rather than burning latency on a doomed dial. +// Bounded by a hard 5-second SSH-dial timeout so a half-broken guest +// doesn't extend the overall stop window past the existing +// gracefulShutdownWait. If the dial doesn't succeed in that window we +// surface an error and let the caller take the SendCtrlAltDel path. func (s *VMService) requestGuestPoweroff(ctx context.Context, vm model.VMRecord) error { guestIP := strings.TrimSpace(vm.Runtime.GuestIP) if guestIP == "" { return errors.New("guest IP unknown") } - dialCtx, cancel := context.WithTimeout(ctx, 2*time.Second) + dialCtx, cancel := context.WithTimeout(ctx, 5*time.Second) defer cancel() address := net.JoinHostPort(guestIP, "22") client, err := guest.Dial(dialCtx, address, s.config.SSHKeyPath, s.layout.KnownHostsPath) diff --git a/internal/daemon/vm_test.go b/internal/daemon/vm_test.go index a747104..131c55f 100644 --- a/internal/daemon/vm_test.go +++ b/internal/daemon/vm_test.go @@ -1592,7 +1592,7 @@ func TestDeleteStoppedNATVMDoesNotFailWithoutTapDevice(t *testing.T) { } } -func TestStopVMSIGKILLsWhenSSHUnreachable(t *testing.T) { +func TestStopVMFallsBackToForcedCleanupAfterGracefulTimeout(t *testing.T) { ctx := context.Background() db := openDaemonStore(t) apiSock := filepath.Join(t.TempDir(), "fc.sock") @@ -1606,6 +1606,12 @@ func TestStopVMSIGKILLsWhenSSHUnreachable(t *testing.T) { } }) + oldGracefulWait := gracefulShutdownWait + gracefulShutdownWait = 50 * time.Millisecond + t.Cleanup(func() { + gracefulShutdownWait = oldGracefulWait + }) + vm := testVM("stubborn", "image-stubborn", "172.16.0.23") vm.State = model.VMStateRunning vm.Runtime.State = model.VMStateRunning @@ -1616,6 +1622,8 @@ func TestStopVMSIGKILLsWhenSSHUnreachable(t *testing.T) { scriptedRunner: &scriptedRunner{ t: t, steps: []runnerStep{ + sudoStep("", nil, "chmod", "600", apiSock), + sudoStep("", nil, "chown", "-h", fmt.Sprintf("%d:%d", os.Getuid(), os.Getgid()), apiSock), {call: runnerCall{name: "pgrep", args: []string{"-n", "-f", apiSock}}, out: []byte(strconv.Itoa(fake.Process.Pid) + "\n")}, sudoStep("", nil, "kill", "-KILL", strconv.Itoa(fake.Process.Pid)), }, diff --git a/scripts/install.sh b/scripts/install.sh index 9b8f0fd..515fd6f 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -228,7 +228,6 @@ banger $TARGET_VERSION installed. Next steps: banger doctor # confirm host readiness banger vm run # boot a sandbox - banger ssh-config --install # optional: enable 'ssh .vm' Updates land via: banger update --check