Old flow on every 'banger vm run' that hit the seeded path: CopyFilePreferClone the seed file (FICLONE attempt + io.Copy + fsync fallback), then e2fsck -fp + resize2fs to grow the FS to the spec size. On filesystems without reflink support that meant pushing 512+ MiB through the kernel followed by a full filesystem check and resize, even though the seed only carries a few KB of dotfiles — minWorkSeedBytes is 512 MiB but the actual payload is tiny. That is the minute-long stall on the 'cloning work seed' stage users see today. Replace the copy with a sized fresh ext4: truncate to WorkDiskSizeBytes, mkfs.ext4 -F -E root_owner=0:0, debugfs rdump to extract the seed's contents, then ingest each file via the sudoless ext4 toolkit (MkdirExt4 / WriteExt4FileOwned, root:root, mode preserved). Sub-second regardless of seed size or requested work-disk size; no fsck or resize needed because the FS is created at its final size from the start. Also drop the now-implementation-pinned TestEnsureWorkDiskClonesSeedImageAndResizes — its premise (a scripted e2fsck/resize2fs sequence) no longer reflects the code, and smoke covers the new flow end to end. Stage label changed from 'cloning work seed' to 'applying work seed' to match what actually happens. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
171 lines
6.6 KiB
Go
171 lines
6.6 KiB
Go
package daemon
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"banger/internal/guestconfig"
|
|
"banger/internal/guestnet"
|
|
"banger/internal/model"
|
|
"banger/internal/roothelper"
|
|
"banger/internal/system"
|
|
)
|
|
|
|
type workDiskPreparation struct {
|
|
ClonedFromSeed bool
|
|
}
|
|
|
|
func (s *VMService) ensureSystemOverlay(ctx context.Context, vm *model.VMRecord) error {
|
|
if exists(vm.Runtime.SystemOverlay) {
|
|
return nil
|
|
}
|
|
_, err := s.runner.Run(ctx, "truncate", "-s", strconv.FormatInt(vm.Spec.SystemOverlaySizeByte, 10), vm.Runtime.SystemOverlay)
|
|
return err
|
|
}
|
|
|
|
// patchRootOverlay writes the per-VM config files (resolv.conf,
|
|
// hostname, hosts, sshd drop-in, network bootstrap, fstab) into the
|
|
// rootfs overlay. The start flow passes the DM device path explicitly so the
|
|
// owner daemon can hand the privileged ext4 work to the root helper without
|
|
// rereading mutable process state.
|
|
func (s *VMService) patchRootOverlay(ctx context.Context, vm model.VMRecord, image model.Image, dmDev string) error {
|
|
if strings.TrimSpace(dmDev) == "" {
|
|
return fmt.Errorf("vm %q: DM device is required", vm.ID)
|
|
}
|
|
resolv := []byte(fmt.Sprintf("nameserver %s\n", s.config.DefaultDNS))
|
|
hostname := []byte(vm.Name + "\n")
|
|
hosts := []byte(fmt.Sprintf("127.0.0.1 localhost\n127.0.1.1 %s\n", vm.Name))
|
|
sshdConfig := []byte(sshdGuestConfig())
|
|
fstabBytes, err := s.privOps().ReadExt4File(ctx, dmDev, "/etc/fstab")
|
|
fstab := string(fstabBytes)
|
|
if err != nil {
|
|
fstab = ""
|
|
}
|
|
builder := guestconfig.NewBuilder()
|
|
builder.WriteFile("/etc/resolv.conf", resolv)
|
|
builder.WriteFile("/etc/hostname", hostname)
|
|
builder.WriteFile("/etc/hosts", hosts)
|
|
builder.WriteFile(guestnet.ConfigPath, guestnet.ConfigFile(vm.Runtime.GuestIP, s.config.BridgeIP, s.config.DefaultDNS))
|
|
builder.WriteFile(guestnet.GuestScriptPath, []byte(guestnet.BootstrapScript()))
|
|
builder.WriteFile("/etc/ssh/sshd_config.d/99-banger.conf", sshdConfig)
|
|
builder.DropMountTarget("/home")
|
|
builder.DropMountTarget("/var")
|
|
builder.AddMount(guestconfig.MountSpec{
|
|
Source: "tmpfs",
|
|
Target: "/run",
|
|
FSType: "tmpfs",
|
|
Options: []string{"defaults", "nodev", "nosuid", "mode=0755"},
|
|
Dump: 0,
|
|
Pass: 0,
|
|
})
|
|
builder.AddMount(guestconfig.MountSpec{
|
|
Source: "tmpfs",
|
|
Target: "/tmp",
|
|
FSType: "tmpfs",
|
|
Options: []string{"defaults", "nodev", "nosuid", "mode=1777"},
|
|
Dump: 0,
|
|
Pass: 0,
|
|
})
|
|
s.capHooks.contributeGuest(builder, vm, image)
|
|
builder.WriteFile("/etc/fstab", []byte(builder.RenderFSTab(fstab)))
|
|
files := builder.Files()
|
|
writes := make([]roothelper.Ext4Write, 0, len(files))
|
|
for _, guestPath := range builder.FilePaths() {
|
|
mode := uint32(0o644)
|
|
if guestPath == guestnet.GuestScriptPath {
|
|
mode = 0o755
|
|
}
|
|
writes = append(writes, roothelper.Ext4Write{
|
|
GuestPath: guestPath,
|
|
Data: files[guestPath],
|
|
Mode: mode,
|
|
})
|
|
}
|
|
return s.privOps().WriteExt4Files(ctx, dmDev, writes)
|
|
}
|
|
|
|
func (s *VMService) ensureWorkDisk(ctx context.Context, vm *model.VMRecord, image model.Image) (workDiskPreparation, error) {
|
|
if exists(vm.Runtime.WorkDiskPath) {
|
|
return workDiskPreparation{}, nil
|
|
}
|
|
if exists(image.WorkSeedPath) {
|
|
vmCreateStage(ctx, "prepare_work_disk", "applying work seed")
|
|
// Old flow used CopyFilePreferClone + (e2fsck + resize2fs).
|
|
// On filesystems without reflink support that meant pushing
|
|
// every byte of a 512+ MiB seed through the kernel followed
|
|
// by a full fsck/resize, even though the seed itself only
|
|
// holds a few KB of dotfiles. mkfs + ingest runs in roughly
|
|
// a second regardless of seed or work-disk size.
|
|
if err := system.MaterializeWorkDisk(ctx, s.runner, image.WorkSeedPath, vm.Runtime.WorkDiskPath, vm.Spec.WorkDiskSizeBytes); err != nil {
|
|
return workDiskPreparation{}, err
|
|
}
|
|
return workDiskPreparation{ClonedFromSeed: true}, nil
|
|
}
|
|
// No seed: build an empty work disk. `-E root_owner=0:0` stamps
|
|
// inode 2 (the fs root, which becomes /root inside the guest) as
|
|
// root:root:0755 up front. sshd's StrictModes walks that dir's
|
|
// ownership and mode, so getting it right from mkfs means the
|
|
// authsync step can just write authorized_keys without any
|
|
// repair pass.
|
|
//
|
|
// Unlike the pre-refactor flow there is no "copy /root from the
|
|
// base rootfs" step. The no-seed path is the degraded fallback
|
|
// (the common case has a work-seed artifact and hits the branch
|
|
// above). Dropping the copy eliminates 4 sudo call sites — mount
|
|
// base ro, mount work rw, sudo cp -a, flattenNestedWorkHome —
|
|
// at the cost of losing default distro dotfiles on no-seed VMs.
|
|
// Users who need those should either rebuild the image with a
|
|
// work-seed (the documented path) or land them via [[file_sync]].
|
|
vmCreateStage(ctx, "prepare_work_disk", "creating empty work disk")
|
|
if _, err := s.runner.Run(ctx, "truncate", "-s", strconv.FormatInt(vm.Spec.WorkDiskSizeBytes, 10), vm.Runtime.WorkDiskPath); err != nil {
|
|
return workDiskPreparation{}, err
|
|
}
|
|
if _, err := s.runner.Run(ctx, "mkfs.ext4", "-F", "-E", "root_owner=0:0", vm.Runtime.WorkDiskPath); err != nil {
|
|
return workDiskPreparation{}, err
|
|
}
|
|
return workDiskPreparation{}, nil
|
|
}
|
|
|
|
// sshdGuestConfig is the banger-authored drop-in that lands at
|
|
// /etc/ssh/sshd_config.d/99-banger.conf inside every guest.
|
|
//
|
|
// Banger VMs are single-user root sandboxes reachable only through the
|
|
// host bridge (default 172.16.0.0/24). The drop-in sets the minimum
|
|
// needed to make that usable while keeping the posture tight enough
|
|
// that a misconfigured host bridge does not immediately hand over an
|
|
// unauthenticated root shell.
|
|
//
|
|
// Why each line is here:
|
|
//
|
|
// - PermitRootLogin prohibit-password
|
|
// The guest IS root — there's no other account. prohibit-password
|
|
// allows pubkey login and blocks password auth at the source even
|
|
// if some future config flips PasswordAuthentication on.
|
|
//
|
|
// - PubkeyAuthentication yes
|
|
// The only auth method we expect. Explicit in case a future
|
|
// Debian default or distro package flips it off.
|
|
//
|
|
// - PasswordAuthentication no
|
|
//
|
|
// - KbdInteractiveAuthentication no
|
|
// Belt-and-braces: every interactive auth path is off, not just
|
|
// the PermitRootLogin path. These are already Debian defaults but
|
|
// stating them here means the drop-in documents the intent.
|
|
//
|
|
// - AuthorizedKeysFile /root/.ssh/authorized_keys
|
|
// Pins the lookup path so the banger-written file always wins,
|
|
// regardless of distro default ($HOME/.ssh/authorized_keys) and
|
|
// regardless of any per-image weirdness.
|
|
func sshdGuestConfig() string {
|
|
return strings.Join([]string{
|
|
"PermitRootLogin prohibit-password",
|
|
"PubkeyAuthentication yes",
|
|
"PasswordAuthentication no",
|
|
"KbdInteractiveAuthentication no",
|
|
"AuthorizedKeysFile /root/.ssh/authorized_keys",
|
|
"",
|
|
}, "\n")
|
|
}
|