banger/internal/daemon/vm_disk.go
Thales Maciel a3a51e06c4
daemon: build the work disk fresh instead of cloning the seed file
Old flow on every 'banger vm run' that hit the seeded path:
CopyFilePreferClone the seed file (FICLONE attempt + io.Copy + fsync
fallback), then e2fsck -fp + resize2fs to grow the FS to the spec
size. On filesystems without reflink support that meant pushing
512+ MiB through the kernel followed by a full filesystem check
and resize, even though the seed only carries a few KB of dotfiles
— minWorkSeedBytes is 512 MiB but the actual payload is tiny.
That is the minute-long stall on the 'cloning work seed' stage
users see today.

Replace the copy with a sized fresh ext4: truncate to
WorkDiskSizeBytes, mkfs.ext4 -F -E root_owner=0:0, debugfs rdump
to extract the seed's contents, then ingest each file via the
sudoless ext4 toolkit (MkdirExt4 / WriteExt4FileOwned, root:root,
mode preserved). Sub-second regardless of seed size or requested
work-disk size; no fsck or resize needed because the FS is created
at its final size from the start.

Also drop the now-implementation-pinned
TestEnsureWorkDiskClonesSeedImageAndResizes — its premise (a
scripted e2fsck/resize2fs sequence) no longer reflects the code,
and smoke covers the new flow end to end. Stage label changed
from 'cloning work seed' to 'applying work seed' to match what
actually happens.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-26 20:42:10 -03:00

171 lines
6.6 KiB
Go

package daemon
import (
"context"
"fmt"
"strconv"
"strings"
"banger/internal/guestconfig"
"banger/internal/guestnet"
"banger/internal/model"
"banger/internal/roothelper"
"banger/internal/system"
)
type workDiskPreparation struct {
ClonedFromSeed bool
}
func (s *VMService) ensureSystemOverlay(ctx context.Context, vm *model.VMRecord) error {
if exists(vm.Runtime.SystemOverlay) {
return nil
}
_, err := s.runner.Run(ctx, "truncate", "-s", strconv.FormatInt(vm.Spec.SystemOverlaySizeByte, 10), vm.Runtime.SystemOverlay)
return err
}
// patchRootOverlay writes the per-VM config files (resolv.conf,
// hostname, hosts, sshd drop-in, network bootstrap, fstab) into the
// rootfs overlay. The start flow passes the DM device path explicitly so the
// owner daemon can hand the privileged ext4 work to the root helper without
// rereading mutable process state.
func (s *VMService) patchRootOverlay(ctx context.Context, vm model.VMRecord, image model.Image, dmDev string) error {
if strings.TrimSpace(dmDev) == "" {
return fmt.Errorf("vm %q: DM device is required", vm.ID)
}
resolv := []byte(fmt.Sprintf("nameserver %s\n", s.config.DefaultDNS))
hostname := []byte(vm.Name + "\n")
hosts := []byte(fmt.Sprintf("127.0.0.1 localhost\n127.0.1.1 %s\n", vm.Name))
sshdConfig := []byte(sshdGuestConfig())
fstabBytes, err := s.privOps().ReadExt4File(ctx, dmDev, "/etc/fstab")
fstab := string(fstabBytes)
if err != nil {
fstab = ""
}
builder := guestconfig.NewBuilder()
builder.WriteFile("/etc/resolv.conf", resolv)
builder.WriteFile("/etc/hostname", hostname)
builder.WriteFile("/etc/hosts", hosts)
builder.WriteFile(guestnet.ConfigPath, guestnet.ConfigFile(vm.Runtime.GuestIP, s.config.BridgeIP, s.config.DefaultDNS))
builder.WriteFile(guestnet.GuestScriptPath, []byte(guestnet.BootstrapScript()))
builder.WriteFile("/etc/ssh/sshd_config.d/99-banger.conf", sshdConfig)
builder.DropMountTarget("/home")
builder.DropMountTarget("/var")
builder.AddMount(guestconfig.MountSpec{
Source: "tmpfs",
Target: "/run",
FSType: "tmpfs",
Options: []string{"defaults", "nodev", "nosuid", "mode=0755"},
Dump: 0,
Pass: 0,
})
builder.AddMount(guestconfig.MountSpec{
Source: "tmpfs",
Target: "/tmp",
FSType: "tmpfs",
Options: []string{"defaults", "nodev", "nosuid", "mode=1777"},
Dump: 0,
Pass: 0,
})
s.capHooks.contributeGuest(builder, vm, image)
builder.WriteFile("/etc/fstab", []byte(builder.RenderFSTab(fstab)))
files := builder.Files()
writes := make([]roothelper.Ext4Write, 0, len(files))
for _, guestPath := range builder.FilePaths() {
mode := uint32(0o644)
if guestPath == guestnet.GuestScriptPath {
mode = 0o755
}
writes = append(writes, roothelper.Ext4Write{
GuestPath: guestPath,
Data: files[guestPath],
Mode: mode,
})
}
return s.privOps().WriteExt4Files(ctx, dmDev, writes)
}
func (s *VMService) ensureWorkDisk(ctx context.Context, vm *model.VMRecord, image model.Image) (workDiskPreparation, error) {
if exists(vm.Runtime.WorkDiskPath) {
return workDiskPreparation{}, nil
}
if exists(image.WorkSeedPath) {
vmCreateStage(ctx, "prepare_work_disk", "applying work seed")
// Old flow used CopyFilePreferClone + (e2fsck + resize2fs).
// On filesystems without reflink support that meant pushing
// every byte of a 512+ MiB seed through the kernel followed
// by a full fsck/resize, even though the seed itself only
// holds a few KB of dotfiles. mkfs + ingest runs in roughly
// a second regardless of seed or work-disk size.
if err := system.MaterializeWorkDisk(ctx, s.runner, image.WorkSeedPath, vm.Runtime.WorkDiskPath, vm.Spec.WorkDiskSizeBytes); err != nil {
return workDiskPreparation{}, err
}
return workDiskPreparation{ClonedFromSeed: true}, nil
}
// No seed: build an empty work disk. `-E root_owner=0:0` stamps
// inode 2 (the fs root, which becomes /root inside the guest) as
// root:root:0755 up front. sshd's StrictModes walks that dir's
// ownership and mode, so getting it right from mkfs means the
// authsync step can just write authorized_keys without any
// repair pass.
//
// Unlike the pre-refactor flow there is no "copy /root from the
// base rootfs" step. The no-seed path is the degraded fallback
// (the common case has a work-seed artifact and hits the branch
// above). Dropping the copy eliminates 4 sudo call sites — mount
// base ro, mount work rw, sudo cp -a, flattenNestedWorkHome —
// at the cost of losing default distro dotfiles on no-seed VMs.
// Users who need those should either rebuild the image with a
// work-seed (the documented path) or land them via [[file_sync]].
vmCreateStage(ctx, "prepare_work_disk", "creating empty work disk")
if _, err := s.runner.Run(ctx, "truncate", "-s", strconv.FormatInt(vm.Spec.WorkDiskSizeBytes, 10), vm.Runtime.WorkDiskPath); err != nil {
return workDiskPreparation{}, err
}
if _, err := s.runner.Run(ctx, "mkfs.ext4", "-F", "-E", "root_owner=0:0", vm.Runtime.WorkDiskPath); err != nil {
return workDiskPreparation{}, err
}
return workDiskPreparation{}, nil
}
// sshdGuestConfig is the banger-authored drop-in that lands at
// /etc/ssh/sshd_config.d/99-banger.conf inside every guest.
//
// Banger VMs are single-user root sandboxes reachable only through the
// host bridge (default 172.16.0.0/24). The drop-in sets the minimum
// needed to make that usable while keeping the posture tight enough
// that a misconfigured host bridge does not immediately hand over an
// unauthenticated root shell.
//
// Why each line is here:
//
// - PermitRootLogin prohibit-password
// The guest IS root — there's no other account. prohibit-password
// allows pubkey login and blocks password auth at the source even
// if some future config flips PasswordAuthentication on.
//
// - PubkeyAuthentication yes
// The only auth method we expect. Explicit in case a future
// Debian default or distro package flips it off.
//
// - PasswordAuthentication no
//
// - KbdInteractiveAuthentication no
// Belt-and-braces: every interactive auth path is off, not just
// the PermitRootLogin path. These are already Debian defaults but
// stating them here means the drop-in documents the intent.
//
// - AuthorizedKeysFile /root/.ssh/authorized_keys
// Pins the lookup path so the banger-written file always wins,
// regardless of distro default ($HOME/.ssh/authorized_keys) and
// regardless of any per-image weirdness.
func sshdGuestConfig() string {
return strings.Join([]string{
"PermitRootLogin prohibit-password",
"PubkeyAuthentication yes",
"PasswordAuthentication no",
"KbdInteractiveAuthentication no",
"AuthorizedKeysFile /root/.ssh/authorized_keys",
"",
}, "\n")
}