Separates what a VM IS (durable intent + identity + deterministic
derived paths — `VMRuntime`) from what is CURRENTLY TRUE about it
(firecracker PID, tap device, loop devices, dm-snapshot target — new
`VMHandles`). The durable state lives in the SQLite `vms` row; the
transient state lives in an in-memory cache on the daemon plus a
per-VM `handles.json` scratch file inside VMDir, rebuilt at startup
from OS inspection. Nothing kernel-level rides the SQLite schema
anymore.
Why:
Persisting ephemeral process handles to SQLite forced reconcile to
treat "running with a stale PID" as a first-class case and mix it
with real state transitions. The schema described what we last
observed, not what the VM is. Every time the observation model
shifted (tap pool, DM naming, pgrep fallback) the reconcile logic
grew a new branch. Splitting lets each layer own what it's good at:
durable records describe intent, in-memory cache + scratch file
describe momentary reality.
Shape:
- `model.VMHandles` = PID, TapDevice, BaseLoop, COWLoop, DMName,
DMDev. Never in SQLite.
- `VMRuntime` keeps: State, GuestIP, APISockPath, VSockPath,
VSockCID, LogPath, MetricsPath, DNSName, VMDir, SystemOverlay,
WorkDiskPath, LastError. All durable or deterministic.
- `handleCache` on `*Daemon` — mutex-guarded map + scratch-file
plumbing (`writeHandlesFile` / `readHandlesFile` /
`rediscoverHandles`). See `internal/daemon/vm_handles.go`.
- `d.vmAlive(vm)` replaces the 20+ inline
`vm.State==Running && ProcessRunning(vm.Runtime.PID, apiSock)`
spreads. Single source of truth for liveness.
- Startup reconcile: per running VM, load the scratch file, pgrep
the api sock, either keep (cache seeded from scratch) or demote
to stopped (scratch handles passed to cleanupRuntime first so DM
/ loops / tap actually get torn down).
Verification:
- `go test ./...` green.
- Live: `banger vm run --name handles-test -- cat /etc/hostname`
starts; `handles.json` appears in VMDir with the expected PID,
tap, loops, DM.
- `kill -9 $(pgrep bangerd)` while the VM is running, re-invoke the
CLI, daemon auto-starts, reconcile recognises the VM as alive,
`banger vm ssh` still connects, `banger vm delete` cleans up.
Tests added:
- vm_handles_test.go: scratch-file roundtrip, missing/corrupt file
behaviour, cache concurrency, rediscoverHandles prefers pgrep
over scratch, returns scratch contents even when process is
dead (so cleanup can tear down kernel state).
- vm_test.go: reconcile test rewritten to exercise the new flow
(write scratch → reconcile reads it → verifies process is gone →
issues dmsetup/losetup teardown).
ARCHITECTURE.md updated; `handles` added to Daemon field docs.
318 lines
9.7 KiB
Go
318 lines
9.7 KiB
Go
package daemon
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"net"
|
|
"os"
|
|
"strings"
|
|
|
|
"banger/internal/firecracker"
|
|
"banger/internal/guestconfig"
|
|
"banger/internal/model"
|
|
"banger/internal/system"
|
|
"banger/internal/vmdns"
|
|
)
|
|
|
|
type vmCapability interface {
|
|
Name() string
|
|
}
|
|
|
|
type startPreflightCapability interface {
|
|
AddStartPreflight(context.Context, *Daemon, *system.Preflight, model.VMRecord, model.Image)
|
|
}
|
|
|
|
type guestConfigCapability interface {
|
|
ContributeGuest(*guestconfig.Builder, model.VMRecord, model.Image)
|
|
}
|
|
|
|
type machineConfigCapability interface {
|
|
ContributeMachine(*firecracker.MachineConfig, model.VMRecord, model.Image)
|
|
}
|
|
|
|
type prepareHostCapability interface {
|
|
PrepareHost(context.Context, *Daemon, *model.VMRecord, model.Image) error
|
|
}
|
|
|
|
type postStartCapability interface {
|
|
PostStart(context.Context, *Daemon, model.VMRecord, model.Image) error
|
|
}
|
|
|
|
type cleanupCapability interface {
|
|
Cleanup(context.Context, *Daemon, model.VMRecord) error
|
|
}
|
|
|
|
type configChangeCapability interface {
|
|
ApplyConfigChange(context.Context, *Daemon, model.VMRecord, model.VMRecord) error
|
|
}
|
|
|
|
type doctorCapability interface {
|
|
AddDoctorChecks(context.Context, *Daemon, *system.Report)
|
|
}
|
|
|
|
func (d *Daemon) registeredCapabilities() []vmCapability {
|
|
if len(d.vmCaps) > 0 {
|
|
return d.vmCaps
|
|
}
|
|
return []vmCapability{
|
|
workDiskCapability{},
|
|
dnsCapability{},
|
|
natCapability{},
|
|
}
|
|
}
|
|
|
|
func (d *Daemon) addCapabilityStartPrereqs(ctx context.Context, checks *system.Preflight, vm model.VMRecord, image model.Image) {
|
|
for _, capability := range d.registeredCapabilities() {
|
|
if hook, ok := capability.(startPreflightCapability); ok {
|
|
hook.AddStartPreflight(ctx, d, checks, vm, image)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (d *Daemon) contributeGuestConfig(builder *guestconfig.Builder, vm model.VMRecord, image model.Image) {
|
|
for _, capability := range d.registeredCapabilities() {
|
|
if hook, ok := capability.(guestConfigCapability); ok {
|
|
hook.ContributeGuest(builder, vm, image)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (d *Daemon) contributeMachineConfig(cfg *firecracker.MachineConfig, vm model.VMRecord, image model.Image) {
|
|
for _, capability := range d.registeredCapabilities() {
|
|
if hook, ok := capability.(machineConfigCapability); ok {
|
|
hook.ContributeMachine(cfg, vm, image)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (d *Daemon) prepareCapabilityHosts(ctx context.Context, vm *model.VMRecord, image model.Image) error {
|
|
prepared := make([]vmCapability, 0, len(d.registeredCapabilities()))
|
|
for _, capability := range d.registeredCapabilities() {
|
|
hook, ok := capability.(prepareHostCapability)
|
|
if !ok {
|
|
continue
|
|
}
|
|
if err := hook.PrepareHost(ctx, d, vm, image); err != nil {
|
|
d.cleanupPreparedCapabilities(context.Background(), vm, prepared)
|
|
return err
|
|
}
|
|
prepared = append(prepared, capability)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (d *Daemon) postStartCapabilities(ctx context.Context, vm model.VMRecord, image model.Image) error {
|
|
for _, capability := range d.registeredCapabilities() {
|
|
switch capability.Name() {
|
|
case "dns":
|
|
vmCreateStage(ctx, "apply_dns", "publishing vm dns record")
|
|
case "nat":
|
|
if vm.Spec.NATEnabled {
|
|
vmCreateStage(ctx, "apply_nat", "configuring nat")
|
|
}
|
|
}
|
|
if hook, ok := capability.(postStartCapability); ok {
|
|
if err := hook.PostStart(ctx, d, vm, image); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (d *Daemon) cleanupCapabilityState(ctx context.Context, vm model.VMRecord) error {
|
|
return d.cleanupPreparedCapabilities(ctx, &vm, d.registeredCapabilities())
|
|
}
|
|
|
|
func (d *Daemon) cleanupPreparedCapabilities(ctx context.Context, vm *model.VMRecord, capabilities []vmCapability) error {
|
|
var err error
|
|
for index := len(capabilities) - 1; index >= 0; index-- {
|
|
hook, ok := capabilities[index].(cleanupCapability)
|
|
if !ok {
|
|
continue
|
|
}
|
|
err = joinErr(err, hook.Cleanup(ctx, d, *vm))
|
|
}
|
|
return err
|
|
}
|
|
|
|
func (d *Daemon) applyCapabilityConfigChanges(ctx context.Context, before, after model.VMRecord) error {
|
|
for _, capability := range d.registeredCapabilities() {
|
|
if hook, ok := capability.(configChangeCapability); ok {
|
|
if err := hook.ApplyConfigChange(ctx, d, before, after); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (d *Daemon) addCapabilityDoctorChecks(ctx context.Context, report *system.Report) {
|
|
for _, capability := range d.registeredCapabilities() {
|
|
if hook, ok := capability.(doctorCapability); ok {
|
|
hook.AddDoctorChecks(ctx, d, report)
|
|
}
|
|
}
|
|
}
|
|
|
|
type workDiskCapability struct{}
|
|
|
|
func (workDiskCapability) Name() string { return "work-disk" }
|
|
|
|
func (workDiskCapability) AddStartPreflight(_ context.Context, _ *Daemon, checks *system.Preflight, vm model.VMRecord, image model.Image) {
|
|
if exists(vm.Runtime.WorkDiskPath) {
|
|
return
|
|
}
|
|
imageSeed := ""
|
|
if image.RootfsPath != "" {
|
|
imageSeed = image.WorkSeedPath
|
|
}
|
|
if exists(imageSeed) {
|
|
if info, err := os.Stat(imageSeed); err == nil && vm.Spec.WorkDiskSizeBytes > info.Size() {
|
|
checks.RequireCommand("e2fsck", toolHint("e2fsck"))
|
|
checks.RequireCommand("resize2fs", toolHint("resize2fs"))
|
|
}
|
|
return
|
|
}
|
|
for _, command := range []string{"mkfs.ext4", "mount", "umount", "cp"} {
|
|
checks.RequireCommand(command, toolHint(command))
|
|
}
|
|
}
|
|
|
|
func (workDiskCapability) ContributeGuest(builder *guestconfig.Builder, _ model.VMRecord, _ model.Image) {
|
|
builder.AddMount(guestconfig.MountSpec{
|
|
Source: "/dev/vdb",
|
|
Target: "/root",
|
|
FSType: "ext4",
|
|
Options: []string{"defaults"},
|
|
Dump: 0,
|
|
Pass: 2,
|
|
})
|
|
}
|
|
|
|
func (workDiskCapability) ContributeMachine(cfg *firecracker.MachineConfig, vm model.VMRecord, _ model.Image) {
|
|
cfg.Drives = append(cfg.Drives, firecracker.DriveConfig{
|
|
ID: "work",
|
|
Path: vm.Runtime.WorkDiskPath,
|
|
ReadOnly: false,
|
|
})
|
|
}
|
|
|
|
func (workDiskCapability) PrepareHost(ctx context.Context, d *Daemon, vm *model.VMRecord, image model.Image) error {
|
|
prep, err := d.ensureWorkDisk(ctx, vm, image)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := d.ensureAuthorizedKeyOnWorkDisk(ctx, vm, image, prep); err != nil {
|
|
return err
|
|
}
|
|
if err := d.ensureGitIdentityOnWorkDisk(ctx, vm); err != nil {
|
|
return err
|
|
}
|
|
return d.runFileSync(ctx, vm)
|
|
}
|
|
|
|
func (workDiskCapability) AddDoctorChecks(_ context.Context, d *Daemon, report *system.Report) {
|
|
if d.store != nil && strings.TrimSpace(d.config.DefaultImageName) != "" {
|
|
if image, err := d.store.GetImageByName(context.Background(), d.config.DefaultImageName); err == nil && strings.TrimSpace(image.WorkSeedPath) != "" && exists(image.WorkSeedPath) {
|
|
checks := system.NewPreflight()
|
|
checks.RequireFile(image.WorkSeedPath, "default image work-seed", `rebuild the default image to regenerate the /root seed`)
|
|
report.AddPreflight("feature /root work disk", checks, "seeded /root work disk artifact available")
|
|
return
|
|
}
|
|
}
|
|
checks := system.NewPreflight()
|
|
for _, command := range []string{"mkfs.ext4", "mount", "umount", "cp"} {
|
|
checks.RequireCommand(command, toolHint(command))
|
|
}
|
|
report.AddPreflight("feature /root work disk", checks, "fallback /root work disk tooling available")
|
|
report.AddWarn("feature /root work disk", "default image has no work-seed artifact; new VM creates will be slower until the image is rebuilt")
|
|
}
|
|
|
|
type dnsCapability struct{}
|
|
|
|
func (dnsCapability) Name() string { return "dns" }
|
|
|
|
func (dnsCapability) PostStart(ctx context.Context, d *Daemon, vm model.VMRecord, _ model.Image) error {
|
|
return d.setDNS(ctx, vm.Name, vm.Runtime.GuestIP)
|
|
}
|
|
|
|
func (dnsCapability) Cleanup(ctx context.Context, d *Daemon, vm model.VMRecord) error {
|
|
return d.removeDNS(ctx, vm.Runtime.DNSName)
|
|
}
|
|
|
|
func (dnsCapability) AddDoctorChecks(_ context.Context, _ *Daemon, report *system.Report) {
|
|
conn, err := net.ListenPacket("udp", vmdns.DefaultListenAddr)
|
|
if err != nil {
|
|
if strings.Contains(strings.ToLower(err.Error()), "address already in use") {
|
|
report.AddWarn("feature vm dns", "listener address "+vmdns.DefaultListenAddr+" is already in use")
|
|
return
|
|
}
|
|
report.AddFail("feature vm dns", "cannot bind "+vmdns.DefaultListenAddr+": "+err.Error())
|
|
return
|
|
}
|
|
_ = conn.Close()
|
|
report.AddPass("feature vm dns", "listener can bind "+vmdns.DefaultListenAddr)
|
|
}
|
|
|
|
type natCapability struct{}
|
|
|
|
func (natCapability) Name() string { return "nat" }
|
|
|
|
func (natCapability) AddStartPreflight(ctx context.Context, d *Daemon, checks *system.Preflight, vm model.VMRecord, _ model.Image) {
|
|
if !vm.Spec.NATEnabled {
|
|
return
|
|
}
|
|
d.addNATPrereqs(ctx, checks)
|
|
}
|
|
|
|
func (natCapability) PostStart(ctx context.Context, d *Daemon, vm model.VMRecord, _ model.Image) error {
|
|
if !vm.Spec.NATEnabled {
|
|
return nil
|
|
}
|
|
return d.ensureNAT(ctx, vm, true)
|
|
}
|
|
|
|
func (natCapability) Cleanup(ctx context.Context, d *Daemon, vm model.VMRecord) error {
|
|
if !vm.Spec.NATEnabled {
|
|
return nil
|
|
}
|
|
tap := d.vmHandles(vm.ID).TapDevice
|
|
if strings.TrimSpace(vm.Runtime.GuestIP) == "" || strings.TrimSpace(tap) == "" {
|
|
if d.logger != nil {
|
|
d.logger.Debug("skipping nat cleanup without runtime network handles", append(vmLogAttrs(vm), "guest_ip", vm.Runtime.GuestIP, "tap_device", tap)...)
|
|
}
|
|
return nil
|
|
}
|
|
return d.ensureNAT(ctx, vm, false)
|
|
}
|
|
|
|
func (natCapability) ApplyConfigChange(ctx context.Context, d *Daemon, before, after model.VMRecord) error {
|
|
if before.Spec.NATEnabled == after.Spec.NATEnabled {
|
|
return nil
|
|
}
|
|
if !d.vmAlive(after) {
|
|
return nil
|
|
}
|
|
return d.ensureNAT(ctx, after, after.Spec.NATEnabled)
|
|
}
|
|
|
|
func (natCapability) AddDoctorChecks(ctx context.Context, d *Daemon, report *system.Report) {
|
|
checks := system.NewPreflight()
|
|
checks.RequireCommand("ip", toolHint("ip"))
|
|
d.addNATPrereqs(ctx, checks)
|
|
if len(checks.Problems()) > 0 {
|
|
report.Add(system.CheckStatusFail, "feature nat", checks.Problems()...)
|
|
return
|
|
}
|
|
uplink, err := d.defaultUplink(ctx)
|
|
if err != nil {
|
|
report.AddFail("feature nat", err.Error())
|
|
return
|
|
}
|
|
report.AddPass("feature nat", "iptables/sysctl available, uplink "+uplink)
|
|
}
|
|
|
|
func joinErr(current, next error) error {
|
|
return errors.Join(current, next)
|
|
}
|