diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ada785..e706114 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,16 @@ changed between versions. ## [Unreleased] +## [v0.1.10] - 2026-05-03 + +### Added + +- README now includes an animated demo GIF showing the typical + sandbox lifecycle (`vm run`, host-side `ssh demo.vm`, stop/start + with file persistence, `vm exec`, `curl http://demo.vm`). The + recording script lives at `assets/demo.tape` and is rendered with + [VHS](https://github.com/charmbracelet/vhs). + ## [v0.1.9] - 2026-05-01 ### Fixed @@ -302,7 +312,8 @@ root filesystem and network, and exits on demand. the swap rather than starting up against an incompatible store. - Linux only. amd64 only. KVM required. -[Unreleased]: https://git.thaloco.com/thaloco/banger/compare/v0.1.9...HEAD +[Unreleased]: https://git.thaloco.com/thaloco/banger/compare/v0.1.10...HEAD +[v0.1.10]: https://git.thaloco.com/thaloco/banger/releases/tag/v0.1.10 [v0.1.9]: https://git.thaloco.com/thaloco/banger/releases/tag/v0.1.9 [v0.1.8]: https://git.thaloco.com/thaloco/banger/releases/tag/v0.1.8 [v0.1.7]: https://git.thaloco.com/thaloco/banger/releases/tag/v0.1.7 diff --git a/README.md b/README.md index ce7a310..ab2a8e6 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ One-command development sandboxes on Firecracker microVMs. +![banger demo](assets/banger.gif) + Spin up a clean Linux VM with your repo and tooling preloaded, drop into ssh, and tear it down — all from one command. banger is built for the dev loop, not the server use case: guests are short-lived, diff --git a/assets/banger.gif b/assets/banger.gif new file mode 100644 index 0000000..2f88c5a Binary files /dev/null and b/assets/banger.gif differ diff --git a/assets/demo.tape b/assets/demo.tape new file mode 100644 index 0000000..d68741a --- /dev/null +++ b/assets/demo.tape @@ -0,0 +1,112 @@ +# banger hero demo — VHS tape +# Render with: vhs assets/demo.tape + +Output assets/banger.gif + +Require banger +Require ssh +Require curl + +Set Shell "bash" +Set FontSize 14 +Set LineHeight 1.4 +Set Width 1200 +Set Height 720 +Set Padding 20 +Set Theme "Catppuccin Frappe" +Set TypingSpeed 66ms + +# Off-camera reset: enable bash syntax highlighting via ble.sh, prompt +# styling, drop any prior demo VM, and clear the screen. +Hide +Type "source ~/.local/share/blesh/ble.sh --noattach" +Enter +Sleep 200ms +Type "bleopt complete_auto_complete= complete_auto_history=" +Enter +Sleep 100ms +Type `export PS1="\n$PS1"` +Enter +Sleep 200ms +Type "[[ ${BLE_VERSION-} ]] && ble-attach" +Enter +Sleep 400ms +Type "ble-face -s syntax_error fg=red" +Enter +Sleep 100ms +Type "banger vm kill demo 2>/dev/null; banger vm delete demo 2>/dev/null; clear" +Enter +Sleep 500ms +Show + +Type "banger vm run --nat --name demo" +Enter +Wait+Line /demo:~#/ +Sleep 1.4s + +Type "uname -a" +Enter +Sleep 1.4s + +Type "exit" +Enter +Wait +Sleep 700ms + +Type "banger vm list" +Enter +Wait +Sleep 1.8s + +Type "ssh demo.vm" +Enter +Wait+Line /demo:~#/ +Sleep 500ms + +Type "touch foo bar baz" +Enter +Sleep 700ms + +Type "ls" +Enter +Sleep 1.4s + +Type "exit" +Enter +Sleep 700ms + +Type "banger vm stop demo" +Enter +Wait +Sleep 1s + +Type "banger vm start demo" +Enter +Wait +Sleep 1s + +Type "banger vm exec demo -- ls" +Enter +Wait +Sleep 1.4s + +Type "banger vm exec demo -- docker run -d -p 80:80 nginx" +Enter +Wait +Sleep 1.6s + +Type "banger vm ports demo" +Enter +Wait +Sleep 2s + +Type "curl http://demo.vm" +Sleep 1.2s +Enter +Wait +Sleep 4s + +Type "banger vm kill demo && banger vm delete demo" +Enter +Wait +Sleep 3s diff --git a/internal/daemon/capabilities.go b/internal/daemon/capabilities.go index 89fa5e9..b99ba4a 100644 --- a/internal/daemon/capabilities.go +++ b/internal/daemon/capabilities.go @@ -247,6 +247,9 @@ func (c workDiskCapability) PrepareHost(ctx context.Context, vm *model.VMRecord, if err := c.ws.ensureAuthorizedKeyOnWorkDisk(ctx, vm, image, prep); err != nil { return err } + if err := c.ws.ensureHushLoginOnWorkDisk(ctx, vm); err != nil { + return err + } if err := c.ws.ensureGitIdentityOnWorkDisk(ctx, vm); err != nil { return err } diff --git a/internal/daemon/tap_pool.go b/internal/daemon/tap_pool.go index c0e5f60..d91debf 100644 --- a/internal/daemon/tap_pool.go +++ b/internal/daemon/tap_pool.go @@ -6,6 +6,7 @@ import ( "strconv" "strings" "sync" + "sync/atomic" ) const tapPoolPrefix = "tap-pool-" @@ -16,8 +17,16 @@ type tapPool struct { mu sync.Mutex entries []string next int + warming bool } +// maxConcurrentTapWarmup caps the number of `priv.create_tap` RPCs the +// warmup loop runs in parallel. Each tap creation is ~4 root-helper +// shell-outs serialized within one RPC handler; running too many at +// once just contends on netlink. 8 is the production sweet spot for +// SMOKE_JOBS=8. +const maxConcurrentTapWarmup = 8 + // initializeTapPool seeds the monotonic pool index from the set of // tap names already in use by running/stopped VMs, so newly warmed // pool entries don't collide with existing ones. Callers (Daemon.Open) @@ -41,6 +50,23 @@ func (n *HostNetwork) ensureTapPool(ctx context.Context) { if n.config.TapPoolSize <= 0 { return } + + // Dedupe concurrent warmup invocations. Releases trigger a fresh + // ensureTapPool in a goroutine; without this, N parallel releases + // would each spin up their own warmup loop racing on n.tapPool.next. + n.tapPool.mu.Lock() + if n.tapPool.warming { + n.tapPool.mu.Unlock() + return + } + n.tapPool.warming = true + n.tapPool.mu.Unlock() + defer func() { + n.tapPool.mu.Lock() + n.tapPool.warming = false + n.tapPool.mu.Unlock() + }() + for { select { case <-ctx.Done(): @@ -51,27 +77,53 @@ func (n *HostNetwork) ensureTapPool(ctx context.Context) { } n.tapPool.mu.Lock() - if len(n.tapPool.entries) >= n.config.TapPoolSize { + deficit := n.config.TapPoolSize - len(n.tapPool.entries) + if deficit <= 0 { n.tapPool.mu.Unlock() return } - tapName := fmt.Sprintf("%s%d", tapPoolPrefix, n.tapPool.next) - n.tapPool.next++ - n.tapPool.mu.Unlock() - - if err := n.createTap(ctx, tapName); err != nil { - if n.logger != nil { - n.logger.Warn("tap pool warmup failed", "tap_device", tapName, "error", err.Error()) - } - return + batch := deficit + if batch > maxConcurrentTapWarmup { + batch = maxConcurrentTapWarmup + } + // Reserve names up front so concurrent goroutines can't collide + // on n.tapPool.next. + names := make([]string, batch) + for i := range names { + names[i] = fmt.Sprintf("%s%d", tapPoolPrefix, n.tapPool.next) + n.tapPool.next++ } - - n.tapPool.mu.Lock() - n.tapPool.entries = append(n.tapPool.entries, tapName) n.tapPool.mu.Unlock() - if n.logger != nil { - n.logger.Debug("tap added to idle pool", "tap_device", tapName) + var ( + wg sync.WaitGroup + progress atomic.Int32 + ) + for _, tapName := range names { + wg.Add(1) + go func(tapName string) { + defer wg.Done() + if err := n.createTap(ctx, tapName); err != nil { + if n.logger != nil { + n.logger.Warn("tap pool warmup failed", "tap_device", tapName, "error", err.Error()) + } + return + } + n.tapPool.mu.Lock() + n.tapPool.entries = append(n.tapPool.entries, tapName) + n.tapPool.mu.Unlock() + progress.Add(1) + if n.logger != nil { + n.logger.Debug("tap added to idle pool", "tap_device", tapName) + } + }(tapName) + } + wg.Wait() + + // Whole batch failed → bail rather than burn names indefinitely + // (the original sequential loop bailed on first error too). + if progress.Load() == 0 { + return } } } diff --git a/internal/daemon/vm_authsync.go b/internal/daemon/vm_authsync.go index b4feaaa..117014a 100644 --- a/internal/daemon/vm_authsync.go +++ b/internal/daemon/vm_authsync.go @@ -86,6 +86,15 @@ func provisionAuthorizedKey(ctx context.Context, runner system.CommandRunner, im return system.WriteExt4FileOwned(ctx, runner, imagePath, "/.ssh/authorized_keys", 0o600, 0, 0, merged) } +// ensureHushLoginOnWorkDisk lands /root/.hushlogin in the guest by +// writing /.hushlogin at the root of the work disk (which mounts at +// /root inside the guest). pam_motd checks $HOME/.hushlogin and stays +// silent when it exists — combined with sshd's PrintMotd no / PrintLastLog no +// that suppresses the Debian-style banner on `banger vm run`. +func (s *WorkspaceService) ensureHushLoginOnWorkDisk(ctx context.Context, vm *model.VMRecord) error { + return system.WriteExt4FileOwned(ctx, s.runner, vm.Runtime.WorkDiskPath, "/.hushlogin", 0o644, 0, 0, nil) +} + func (s *WorkspaceService) ensureGitIdentityOnWorkDisk(ctx context.Context, vm *model.VMRecord) error { runner := s.runner if runner == nil { diff --git a/internal/daemon/vm_disk.go b/internal/daemon/vm_disk.go index e86b8b3..fe5db6d 100644 --- a/internal/daemon/vm_disk.go +++ b/internal/daemon/vm_disk.go @@ -50,11 +50,6 @@ func (s *VMService) patchRootOverlay(ctx context.Context, vm model.VMRecord, ima builder.WriteFile(guestnet.ConfigPath, guestnet.ConfigFile(vm.Runtime.GuestIP, s.config.BridgeIP, s.config.DefaultDNS)) builder.WriteFile(guestnet.GuestScriptPath, []byte(guestnet.BootstrapScript())) builder.WriteFile("/etc/ssh/sshd_config.d/99-banger.conf", sshdConfig) - // pam_motd reads /etc/motd + /etc/update-motd.d on Debian-family - // guests independent of sshd's PrintMotd. .hushlogin in $HOME tells - // pam_motd to stay quiet for that user — root is the only login on - // banger VMs, so a single file suffices. - builder.WriteFile("/root/.hushlogin", []byte{}) builder.DropMountTarget("/home") builder.DropMountTarget("/var") builder.AddMount(guestconfig.MountSpec{ @@ -169,9 +164,11 @@ func (s *VMService) ensureWorkDisk(ctx context.Context, vm *model.VMRecord, imag // Banger VMs are short-lived sandboxes. The Debian-style MOTD // ("Linux ... GNU/Linux comes with ABSOLUTELY NO WARRANTY …") and // the "Last login" line are pure noise for `vm run -- echo hi` -// style invocations. Pair this with the .hushlogin written below -// so pam_motd also stays silent on distros that read /etc/motd -// through PAM rather than sshd. +// style invocations. Pair this with the .hushlogin landed on the +// work disk (see ensureHushLoginOnWorkDisk) so pam_motd also stays +// silent on distros that read /etc/motd through PAM rather than +// sshd. The work disk mounts at /root, so the file has to live on +// that disk — a write to the rootfs overlay would be shadowed. func sshdGuestConfig() string { return strings.Join([]string{ "PermitRootLogin prohibit-password", diff --git a/internal/daemon/vm_lifecycle.go b/internal/daemon/vm_lifecycle.go index e759bc6..ca0aad7 100644 --- a/internal/daemon/vm_lifecycle.go +++ b/internal/daemon/vm_lifecycle.go @@ -131,44 +131,27 @@ func (s *VMService) stopVMLocked(ctx context.Context, current model.VMRecord) (v } return vm, nil } - pid := s.vmHandles(vm.ID).PID op.stage("graceful_shutdown") - // Reach into the guest over SSH to force a sync + queue a poweroff - // before falling back on FC's SendCtrlAltDel. The sync is what - // keeps stop() from losing data: every dirty page the guest hasn't - // flushed through virtio-blk to the work disk is written out - // before this RPC returns. Without it, files freshly created via - // `vm workspace prepare` can disappear across stop+start, because - // the 10-second wait_for_exit window expires (FC doesn't exit on - // SendCtrlAltDel — Debian routes ctrl-alt-del.target → reboot.target, - // not poweroff) and the fallback SIGKILL drops everything still - // in FC's userspace I/O path. + // Reach into the guest over SSH to force a sync + queue a poweroff. + // The sync is what keeps stop() from losing data: every dirty page + // the guest hasn't flushed through virtio-blk to the work disk is + // written out before this RPC returns. Once sync completes, + // root.ext4 on the host is consistent and cleanupRuntime's SIGKILL + // is safe — there is no benefit to waiting for the guest's + // poweroff.target to finish, so we skip waitForExit entirely. // - // `systemctl --no-block poweroff` is queued for the same reason - // SendCtrlAltDel was here originally — it's how stop() asks the - // guest to halt. That request is best-effort; FC may or may not - // exit before the SIGKILL fallback fires. Either way, sync - // already ran, so the on-host root.ext4 is consistent regardless. - // - // SendCtrlAltDel survives as a fallback for guests where SSH - // itself is unreachable (broken sshd, network down, drifted host - // key); it doesn't fix the data-loss path, but it's the existing - // last-resort signal and is at least no worse than today. + // When SSH is unreachable (broken sshd, network down, drifted host + // key) we drop straight to SIGKILL via cleanupRuntime. The + // previous fallback was SendCtrlAltDel + a 10-second wait for FC + // to exit, but on Debian ctrl+alt+del routes to reboot.target, so + // FC never exits on it — the wait was always a wasted 10s. We pay + // the data-loss cost we already paid before (after the timeout + // expired the old code SIGKILLed too), but without the latency. if err := s.requestGuestPoweroff(ctx, vm); err != nil { if s.logger != nil { - s.logger.Warn("guest ssh poweroff failed; falling back to ctrl+alt+del", + s.logger.Warn("guest ssh poweroff failed; SIGKILL without sync", append(vmLogAttrs(vm), "error", err.Error())...) } - if fallbackErr := s.net.sendCtrlAltDel(ctx, vm.Runtime.APISockPath); fallbackErr != nil { - return model.VMRecord{}, fallbackErr - } - } - op.stage("wait_for_exit", "pid", pid) - if err := s.net.waitForExit(ctx, pid, vm.Runtime.APISockPath, gracefulShutdownWait); err != nil { - if !errors.Is(err, errWaitForExitTimeout) { - return model.VMRecord{}, err - } - op.stage("graceful_shutdown_timeout", "pid", pid) } op.stage("cleanup_runtime") if err := s.cleanupRuntime(ctx, vm, true); err != nil { @@ -190,16 +173,16 @@ func (s *VMService) stopVMLocked(ctx context.Context, current model.VMRecord) (v // comment in stopVMLocked. Returns the dial / SSH error if the guest // is unreachable; the caller treats that as a fallback signal. // -// Bounded by a hard 5-second SSH-dial timeout so a half-broken guest -// doesn't extend the overall stop window past the existing -// gracefulShutdownWait. If the dial doesn't succeed in that window we -// surface an error and let the caller take the SendCtrlAltDel path. +// Bounded by a hard 2-second SSH-dial timeout. A reachable guest on +// the host bridge dials in single-digit milliseconds; if we haven't +// connected in 2s the guest is effectively gone, so we fail fast and +// let the caller SIGKILL rather than burning latency on a doomed dial. func (s *VMService) requestGuestPoweroff(ctx context.Context, vm model.VMRecord) error { guestIP := strings.TrimSpace(vm.Runtime.GuestIP) if guestIP == "" { return errors.New("guest IP unknown") } - dialCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + dialCtx, cancel := context.WithTimeout(ctx, 2*time.Second) defer cancel() address := net.JoinHostPort(guestIP, "22") client, err := guest.Dial(dialCtx, address, s.config.SSHKeyPath, s.layout.KnownHostsPath) diff --git a/internal/daemon/vm_test.go b/internal/daemon/vm_test.go index 131c55f..a747104 100644 --- a/internal/daemon/vm_test.go +++ b/internal/daemon/vm_test.go @@ -1592,7 +1592,7 @@ func TestDeleteStoppedNATVMDoesNotFailWithoutTapDevice(t *testing.T) { } } -func TestStopVMFallsBackToForcedCleanupAfterGracefulTimeout(t *testing.T) { +func TestStopVMSIGKILLsWhenSSHUnreachable(t *testing.T) { ctx := context.Background() db := openDaemonStore(t) apiSock := filepath.Join(t.TempDir(), "fc.sock") @@ -1606,12 +1606,6 @@ func TestStopVMFallsBackToForcedCleanupAfterGracefulTimeout(t *testing.T) { } }) - oldGracefulWait := gracefulShutdownWait - gracefulShutdownWait = 50 * time.Millisecond - t.Cleanup(func() { - gracefulShutdownWait = oldGracefulWait - }) - vm := testVM("stubborn", "image-stubborn", "172.16.0.23") vm.State = model.VMStateRunning vm.Runtime.State = model.VMStateRunning @@ -1622,8 +1616,6 @@ func TestStopVMFallsBackToForcedCleanupAfterGracefulTimeout(t *testing.T) { scriptedRunner: &scriptedRunner{ t: t, steps: []runnerStep{ - sudoStep("", nil, "chmod", "600", apiSock), - sudoStep("", nil, "chown", "-h", fmt.Sprintf("%d:%d", os.Getuid(), os.Getgid()), apiSock), {call: runnerCall{name: "pgrep", args: []string{"-n", "-f", apiSock}}, out: []byte(strconv.Itoa(fake.Process.Pid) + "\n")}, sudoStep("", nil, "kill", "-KILL", strconv.Itoa(fake.Process.Pid)), }, diff --git a/scripts/install.sh b/scripts/install.sh index 515fd6f..9b8f0fd 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -228,6 +228,7 @@ banger $TARGET_VERSION installed. Next steps: banger doctor # confirm host readiness banger vm run # boot a sandbox + banger ssh-config --install # optional: enable 'ssh .vm' Updates land via: banger update --check