Multi-VM delete exposed two separate regressions: NAT teardown was still running after stopped VMs had already dropped their tap metadata, and the store was relying on one-off SQLite pragmas instead of configuring every pooled connection. Skip NAT cleanup when the runtime no longer has the network handles needed to identify rules, and move the SQLite profile into the DSN so WAL, busy timeouts, foreign keys, and the other connection-scoped settings apply consistently across the pool. Keep the write mutex in place for concurrent mutations, and update the daemon/store tests to use valid image fixtures now that foreign key enforcement is real. Validated with go test ./... and make build.
278 lines
8.3 KiB
Go
278 lines
8.3 KiB
Go
package daemon
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"net"
|
|
"strings"
|
|
|
|
"banger/internal/firecracker"
|
|
"banger/internal/guestconfig"
|
|
"banger/internal/model"
|
|
"banger/internal/system"
|
|
"banger/internal/vmdns"
|
|
)
|
|
|
|
type vmCapability interface {
|
|
Name() string
|
|
}
|
|
|
|
type startPreflightCapability interface {
|
|
AddStartPreflight(context.Context, *Daemon, *system.Preflight, model.VMRecord, model.Image)
|
|
}
|
|
|
|
type guestConfigCapability interface {
|
|
ContributeGuest(*guestconfig.Builder, model.VMRecord, model.Image)
|
|
}
|
|
|
|
type machineConfigCapability interface {
|
|
ContributeMachine(*firecracker.MachineConfig, model.VMRecord, model.Image)
|
|
}
|
|
|
|
type prepareHostCapability interface {
|
|
PrepareHost(context.Context, *Daemon, *model.VMRecord, model.Image) error
|
|
}
|
|
|
|
type postStartCapability interface {
|
|
PostStart(context.Context, *Daemon, model.VMRecord, model.Image) error
|
|
}
|
|
|
|
type cleanupCapability interface {
|
|
Cleanup(context.Context, *Daemon, model.VMRecord) error
|
|
}
|
|
|
|
type configChangeCapability interface {
|
|
ApplyConfigChange(context.Context, *Daemon, model.VMRecord, model.VMRecord) error
|
|
}
|
|
|
|
type doctorCapability interface {
|
|
AddDoctorChecks(context.Context, *Daemon, *system.Report)
|
|
}
|
|
|
|
func (d *Daemon) registeredCapabilities() []vmCapability {
|
|
if len(d.vmCaps) > 0 {
|
|
return d.vmCaps
|
|
}
|
|
return []vmCapability{
|
|
workDiskCapability{},
|
|
dnsCapability{},
|
|
natCapability{},
|
|
}
|
|
}
|
|
|
|
func (d *Daemon) addCapabilityStartPrereqs(ctx context.Context, checks *system.Preflight, vm model.VMRecord, image model.Image) {
|
|
for _, capability := range d.registeredCapabilities() {
|
|
if hook, ok := capability.(startPreflightCapability); ok {
|
|
hook.AddStartPreflight(ctx, d, checks, vm, image)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (d *Daemon) contributeGuestConfig(builder *guestconfig.Builder, vm model.VMRecord, image model.Image) {
|
|
for _, capability := range d.registeredCapabilities() {
|
|
if hook, ok := capability.(guestConfigCapability); ok {
|
|
hook.ContributeGuest(builder, vm, image)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (d *Daemon) contributeMachineConfig(cfg *firecracker.MachineConfig, vm model.VMRecord, image model.Image) {
|
|
for _, capability := range d.registeredCapabilities() {
|
|
if hook, ok := capability.(machineConfigCapability); ok {
|
|
hook.ContributeMachine(cfg, vm, image)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (d *Daemon) prepareCapabilityHosts(ctx context.Context, vm *model.VMRecord, image model.Image) error {
|
|
prepared := make([]vmCapability, 0, len(d.registeredCapabilities()))
|
|
for _, capability := range d.registeredCapabilities() {
|
|
hook, ok := capability.(prepareHostCapability)
|
|
if !ok {
|
|
continue
|
|
}
|
|
if err := hook.PrepareHost(ctx, d, vm, image); err != nil {
|
|
d.cleanupPreparedCapabilities(context.Background(), vm, prepared)
|
|
return err
|
|
}
|
|
prepared = append(prepared, capability)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (d *Daemon) postStartCapabilities(ctx context.Context, vm model.VMRecord, image model.Image) error {
|
|
for _, capability := range d.registeredCapabilities() {
|
|
if hook, ok := capability.(postStartCapability); ok {
|
|
if err := hook.PostStart(ctx, d, vm, image); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (d *Daemon) cleanupCapabilityState(ctx context.Context, vm model.VMRecord) error {
|
|
return d.cleanupPreparedCapabilities(ctx, &vm, d.registeredCapabilities())
|
|
}
|
|
|
|
func (d *Daemon) cleanupPreparedCapabilities(ctx context.Context, vm *model.VMRecord, capabilities []vmCapability) error {
|
|
var err error
|
|
for index := len(capabilities) - 1; index >= 0; index-- {
|
|
hook, ok := capabilities[index].(cleanupCapability)
|
|
if !ok {
|
|
continue
|
|
}
|
|
err = joinErr(err, hook.Cleanup(ctx, d, *vm))
|
|
}
|
|
return err
|
|
}
|
|
|
|
func (d *Daemon) applyCapabilityConfigChanges(ctx context.Context, before, after model.VMRecord) error {
|
|
for _, capability := range d.registeredCapabilities() {
|
|
if hook, ok := capability.(configChangeCapability); ok {
|
|
if err := hook.ApplyConfigChange(ctx, d, before, after); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (d *Daemon) addCapabilityDoctorChecks(ctx context.Context, report *system.Report) {
|
|
for _, capability := range d.registeredCapabilities() {
|
|
if hook, ok := capability.(doctorCapability); ok {
|
|
hook.AddDoctorChecks(ctx, d, report)
|
|
}
|
|
}
|
|
}
|
|
|
|
type workDiskCapability struct{}
|
|
|
|
func (workDiskCapability) Name() string { return "work-disk" }
|
|
|
|
func (workDiskCapability) AddStartPreflight(_ context.Context, _ *Daemon, checks *system.Preflight, vm model.VMRecord, _ model.Image) {
|
|
if exists(vm.Runtime.WorkDiskPath) {
|
|
return
|
|
}
|
|
for _, command := range []string{"mkfs.ext4", "mount", "umount", "cp"} {
|
|
checks.RequireCommand(command, toolHint(command))
|
|
}
|
|
}
|
|
|
|
func (workDiskCapability) ContributeGuest(builder *guestconfig.Builder, _ model.VMRecord, _ model.Image) {
|
|
builder.AddMount(guestconfig.MountSpec{
|
|
Source: "/dev/vdb",
|
|
Target: "/root",
|
|
FSType: "ext4",
|
|
Options: []string{"defaults"},
|
|
Dump: 0,
|
|
Pass: 2,
|
|
})
|
|
}
|
|
|
|
func (workDiskCapability) ContributeMachine(cfg *firecracker.MachineConfig, vm model.VMRecord, _ model.Image) {
|
|
cfg.Drives = append(cfg.Drives, firecracker.DriveConfig{
|
|
ID: "work",
|
|
Path: vm.Runtime.WorkDiskPath,
|
|
ReadOnly: false,
|
|
})
|
|
}
|
|
|
|
func (workDiskCapability) PrepareHost(ctx context.Context, d *Daemon, vm *model.VMRecord, _ model.Image) error {
|
|
return d.ensureWorkDisk(ctx, vm)
|
|
}
|
|
|
|
func (workDiskCapability) AddDoctorChecks(_ context.Context, _ *Daemon, report *system.Report) {
|
|
checks := system.NewPreflight()
|
|
for _, command := range []string{"mkfs.ext4", "mount", "umount", "cp"} {
|
|
checks.RequireCommand(command, toolHint(command))
|
|
}
|
|
report.AddPreflight("feature /root work disk", checks, "guest /root work disk tooling available")
|
|
}
|
|
|
|
type dnsCapability struct{}
|
|
|
|
func (dnsCapability) Name() string { return "dns" }
|
|
|
|
func (dnsCapability) PostStart(ctx context.Context, d *Daemon, vm model.VMRecord, _ model.Image) error {
|
|
return d.setDNS(ctx, vm.Name, vm.Runtime.GuestIP)
|
|
}
|
|
|
|
func (dnsCapability) Cleanup(ctx context.Context, d *Daemon, vm model.VMRecord) error {
|
|
return d.removeDNS(ctx, vm.Runtime.DNSName)
|
|
}
|
|
|
|
func (dnsCapability) AddDoctorChecks(_ context.Context, _ *Daemon, report *system.Report) {
|
|
conn, err := net.ListenPacket("udp", vmdns.DefaultListenAddr)
|
|
if err != nil {
|
|
if strings.Contains(strings.ToLower(err.Error()), "address already in use") {
|
|
report.AddWarn("feature vm dns", "listener address "+vmdns.DefaultListenAddr+" is already in use")
|
|
return
|
|
}
|
|
report.AddFail("feature vm dns", "cannot bind "+vmdns.DefaultListenAddr+": "+err.Error())
|
|
return
|
|
}
|
|
_ = conn.Close()
|
|
report.AddPass("feature vm dns", "listener can bind "+vmdns.DefaultListenAddr)
|
|
}
|
|
|
|
type natCapability struct{}
|
|
|
|
func (natCapability) Name() string { return "nat" }
|
|
|
|
func (natCapability) AddStartPreflight(ctx context.Context, d *Daemon, checks *system.Preflight, vm model.VMRecord, _ model.Image) {
|
|
if !vm.Spec.NATEnabled {
|
|
return
|
|
}
|
|
d.addNATPrereqs(ctx, checks)
|
|
}
|
|
|
|
func (natCapability) PostStart(ctx context.Context, d *Daemon, vm model.VMRecord, _ model.Image) error {
|
|
if !vm.Spec.NATEnabled {
|
|
return nil
|
|
}
|
|
return d.ensureNAT(ctx, vm, true)
|
|
}
|
|
|
|
func (natCapability) Cleanup(ctx context.Context, d *Daemon, vm model.VMRecord) error {
|
|
if !vm.Spec.NATEnabled {
|
|
return nil
|
|
}
|
|
if strings.TrimSpace(vm.Runtime.GuestIP) == "" || strings.TrimSpace(vm.Runtime.TapDevice) == "" {
|
|
if d.logger != nil {
|
|
d.logger.Debug("skipping nat cleanup without runtime network handles", append(vmLogAttrs(vm), "guest_ip", vm.Runtime.GuestIP, "tap_device", vm.Runtime.TapDevice)...)
|
|
}
|
|
return nil
|
|
}
|
|
return d.ensureNAT(ctx, vm, false)
|
|
}
|
|
|
|
func (natCapability) ApplyConfigChange(ctx context.Context, d *Daemon, before, after model.VMRecord) error {
|
|
if before.Spec.NATEnabled == after.Spec.NATEnabled {
|
|
return nil
|
|
}
|
|
if after.State != model.VMStateRunning || !system.ProcessRunning(after.Runtime.PID, after.Runtime.APISockPath) {
|
|
return nil
|
|
}
|
|
return d.ensureNAT(ctx, after, after.Spec.NATEnabled)
|
|
}
|
|
|
|
func (natCapability) AddDoctorChecks(ctx context.Context, d *Daemon, report *system.Report) {
|
|
checks := system.NewPreflight()
|
|
checks.RequireCommand("ip", toolHint("ip"))
|
|
d.addNATPrereqs(ctx, checks)
|
|
if len(checks.Problems()) > 0 {
|
|
report.Add(system.CheckStatusFail, "feature nat", checks.Problems()...)
|
|
return
|
|
}
|
|
uplink, err := d.defaultUplink(ctx)
|
|
if err != nil {
|
|
report.AddFail("feature nat", err.Error())
|
|
return
|
|
}
|
|
report.AddPass("feature nat", "iptables/sysctl available, uplink "+uplink)
|
|
}
|
|
|
|
func joinErr(current, next error) error {
|
|
return errors.Join(current, next)
|
|
}
|