Refactor VM lifecycle around capabilities

Make host-integrated VM features fit a standard Go extension path instead of adding more one-off branches through vm.go. This is the enabling refactor for future work like shared mounts, not the /work feature itself.

Add a daemon capability pipeline plus a structured guest-config builder, then move the existing /root work-disk mount, built-in DNS, and NAT wiring onto those hooks. Generalize Firecracker drive config at the same time so later storage features can extend machine setup without another hardcoded path.

Add banger doctor on top of the shared readiness checks, update the docs to describe the new architecture, and cover the new seams with guest-config, capability, report, CLI, and full go test verification. Also verify make build and a real ./banger doctor run on the host.
This commit is contained in:
Thales Maciel 2026-03-18 19:28:26 -03:00
parent 9e98445fa2
commit 4930d82cb9
No known key found for this signature in database
GPG key ID: 33112E6833C34679
18 changed files with 1120 additions and 105 deletions

View file

@ -12,6 +12,7 @@ import (
"banger/internal/api"
"banger/internal/firecracker"
"banger/internal/guestconfig"
"banger/internal/model"
"banger/internal/paths"
"banger/internal/system"
@ -223,8 +224,8 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod
if err := d.patchRootOverlay(ctx, vm, image); err != nil {
return cleanupOnErr(err)
}
op.stage("work_disk", "work_disk_path", vm.Runtime.WorkDiskPath)
if err := d.ensureWorkDisk(ctx, &vm); err != nil {
op.stage("prepare_host_features")
if err := d.prepareCapabilityHosts(ctx, &vm, image); err != nil {
return cleanupOnErr(err)
}
op.stage("tap", "tap_device", tap)
@ -243,7 +244,7 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod
}
op.stage("firecracker_launch", "log_path", vm.Runtime.LogPath, "metrics_path", vm.Runtime.MetricsPath)
firecrackerCtx := context.Background()
machine, err := firecracker.NewMachine(firecrackerCtx, firecracker.MachineConfig{
machineConfig := firecracker.MachineConfig{
BinaryPath: fcPath,
VMID: vm.ID,
SocketPath: apiSock,
@ -252,13 +253,19 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod
KernelImagePath: image.KernelPath,
InitrdPath: image.InitrdPath,
KernelArgs: system.BuildBootArgs(vm.Name, vm.Runtime.GuestIP, d.config.BridgeIP, d.config.DefaultDNS),
RootDrivePath: vm.Runtime.DMDev,
WorkDrivePath: vm.Runtime.WorkDiskPath,
TapDevice: tap,
VCPUCount: vm.Spec.VCPUCount,
MemoryMiB: vm.Spec.MemoryMiB,
Logger: d.logger,
})
Drives: []firecracker.DriveConfig{{
ID: "rootfs",
Path: vm.Runtime.DMDev,
ReadOnly: false,
IsRoot: true,
}},
TapDevice: tap,
VCPUCount: vm.Spec.VCPUCount,
MemoryMiB: vm.Spec.MemoryMiB,
Logger: d.logger,
}
d.contributeMachineConfig(&machineConfig, vm, image)
machine, err := firecracker.NewMachine(firecrackerCtx, machineConfig)
if err != nil {
return cleanupOnErr(err)
}
@ -272,16 +279,10 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod
if err := d.ensureSocketAccess(ctx, apiSock); err != nil {
return cleanupOnErr(err)
}
op.stage("dns", "dns_name", vm.Runtime.DNSName)
if err := d.setDNS(ctx, vm.Name, vm.Runtime.GuestIP); err != nil {
op.stage("post_start_features")
if err := d.postStartCapabilities(ctx, vm, image); err != nil {
return cleanupOnErr(err)
}
if vm.Spec.NATEnabled {
op.stage("nat")
if err := d.ensureNAT(ctx, vm, true); err != nil {
return cleanupOnErr(err)
}
}
system.TouchNow(&vm)
op.stage("persist")
if err := d.store.UpsertVM(ctx, vm); err != nil {
@ -455,12 +456,6 @@ func (d *Daemon) deleteVMLocked(ctx context.Context, current model.VMRecord) (vm
if err := d.cleanupRuntime(ctx, vm, false); err != nil {
return model.VMRecord{}, err
}
if vm.Spec.NATEnabled {
op.debugStage("disable_nat")
_ = d.ensureNAT(ctx, vm, false)
}
op.debugStage("remove_dns", "dns_name", vm.Runtime.DNSName)
_ = d.removeDNS(ctx, vm.Runtime.DNSName)
op.stage("delete_store_record")
if err := d.store.DeleteVM(ctx, vm.ID); err != nil {
return model.VMRecord{}, err
@ -538,10 +533,10 @@ func (d *Daemon) setVMLocked(ctx context.Context, current model.VMRecord, params
if params.NATEnabled != nil {
op.stage("update_nat", "nat_enabled", *params.NATEnabled)
vm.Spec.NATEnabled = *params.NATEnabled
if running {
if err := d.ensureNAT(ctx, vm, *params.NATEnabled); err != nil {
return model.VMRecord{}, err
}
}
if running {
if err := d.applyCapabilityConfigChanges(ctx, current, vm); err != nil {
return model.VMRecord{}, err
}
}
system.TouchNow(&vm)
@ -684,14 +679,34 @@ func (d *Daemon) patchRootOverlay(ctx context.Context, vm model.VMRecord, image
if err != nil {
fstab = ""
}
newFSTab := system.UpdateFSTab(fstab)
for guestPath, data := range map[string][]byte{
"/etc/resolv.conf": resolv,
"/etc/hostname": hostname,
"/etc/hosts": hosts,
"/etc/fstab": []byte(newFSTab),
"/etc/ssh/sshd_config.d/99-banger.conf": sshdConfig,
} {
builder := guestconfig.NewBuilder()
builder.WriteFile("/etc/resolv.conf", resolv)
builder.WriteFile("/etc/hostname", hostname)
builder.WriteFile("/etc/hosts", hosts)
builder.WriteFile("/etc/ssh/sshd_config.d/99-banger.conf", sshdConfig)
builder.DropMountTarget("/home")
builder.DropMountTarget("/var")
builder.AddMount(guestconfig.MountSpec{
Source: "tmpfs",
Target: "/run",
FSType: "tmpfs",
Options: []string{"defaults", "nodev", "nosuid", "mode=0755"},
Dump: 0,
Pass: 0,
})
builder.AddMount(guestconfig.MountSpec{
Source: "tmpfs",
Target: "/tmp",
FSType: "tmpfs",
Options: []string{"defaults", "nodev", "nosuid", "mode=1777"},
Dump: 0,
Pass: 0,
})
d.contributeGuestConfig(builder, vm, image)
builder.WriteFile("/etc/fstab", []byte(builder.RenderFSTab(fstab)))
files := builder.Files()
for _, guestPath := range builder.FilePaths() {
data := files[guestPath]
if err := system.WriteExt4File(ctx, d.runner, vm.Runtime.DMDev, guestPath, data); err != nil {
return err
}
@ -878,14 +893,11 @@ func (d *Daemon) cleanupRuntime(ctx context.Context, vm model.VMRecord, preserve
DMName: vm.Runtime.DMName,
DMDev: vm.Runtime.DMDev,
})
if vm.Spec.NATEnabled {
_ = d.ensureNAT(ctx, vm, false)
}
_ = d.removeDNS(ctx, vm.Runtime.DNSName)
featureErr := d.cleanupCapabilityState(ctx, vm)
if !preserveDisks && vm.Runtime.VMDir != "" {
return errors.Join(snapshotErr, os.RemoveAll(vm.Runtime.VMDir))
return errors.Join(snapshotErr, featureErr, os.RemoveAll(vm.Runtime.VMDir))
}
return snapshotErr
return errors.Join(snapshotErr, featureErr)
}
func clearRuntimeHandles(vm *model.VMRecord) {