banger/internal/daemon/vm.go
Thales Maciel 362009d747
daemon split (1/5): extract *HostNetwork service
First phase of splitting the daemon god-struct into focused services
with explicit ownership.

HostNetwork now owns everything host-networking: the TAP interface
pool (initializeTapPool / ensureTapPool / acquireTap / releaseTap /
createTap), bridge + socket dir setup, firecracker process primitives
(find/resolve/kill/wait/ensureSocketAccess/sendCtrlAltDel), DM
snapshot lifecycle, NAT rule enforcement, guest DNS server lifecycle
+ routing setup, and the vsock-agent readiness probe. That's 7 files
whose receivers flipped from *Daemon to *HostNetwork, plus a new
host_network.go that declares the struct, its hostNetworkDeps, and
the factored firecracker + DNS helpers that used to live in vm.go.

Daemon gives up the tapPool and vmDNS fields entirely; they're now
HostNetwork's business. Construction goes through newHostNetwork in
Daemon.Open with an explicit dependency bag (runner, logger, config,
layout, closing). A lazy-init hostNet() helper on Daemon supports
test literals that don't wire net explicitly — production always
populates it eagerly.

Signature tightenings where the old receiver reached into VM-service
state:
 - ensureNAT(ctx, vm, enable) → ensureNAT(ctx, guestIP, tap, enable).
   Callers resolve tap from the handle cache themselves.
 - initializeTapPool(ctx) → initializeTapPool(usedTaps []string).
   Daemon.Open enumerates VMs, collects taps from handles, hands the
   slice in.

rebuildDNS stays on *Daemon as the orchestrator — it filters by
vm-alive (a VMService concern handles will move to in phase 4) then
calls HostNetwork.replaceDNS with the already-filtered map.

Capability hooks continue to take *Daemon; they now use it as a
facade to reach services (d.net.ensureNAT, d.hostNet().*). Planned
CapabilityHost interface extraction is orthogonal, left for later.

Tests: dns_routing_test.go + fastpath_test.go + nat_test.go +
snapshot_test.go + open_close_test.go were touched to construct
HostNetwork literals where they exercise its methods directly, or
route through d.hostNet() where they exercise the Daemon entry
points.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-20 20:11:46 -03:00

140 lines
4.1 KiB
Go

package daemon
import (
"context"
"errors"
"fmt"
"os"
"strconv"
"strings"
"time"
"banger/internal/daemon/fcproc"
"banger/internal/model"
"banger/internal/namegen"
"banger/internal/system"
)
// Cross-service constants. Kept in vm.go because both lifecycle
// (VMService) and networking (HostNetwork) reference them; moving
// them to either owner would read as a layering violation.
var (
errWaitForExitTimeout = fcproc.ErrWaitForExitTimeout
gracefulShutdownWait = 10 * time.Second
vsockReadyWait = 30 * time.Second
vsockReadyPoll = 200 * time.Millisecond
)
// rebuildDNS enumerates live VMs and republishes the DNS record set.
// Lives on *Daemon (not HostNetwork) because "alive" is a VMService
// concern that HostNetwork shouldn't need to reach into. Daemon
// orchestrates: VM list from the store, alive filter, hand the
// resulting map to HostNetwork.replaceDNS.
func (d *Daemon) rebuildDNS(ctx context.Context) error {
if d.net == nil {
return nil
}
vms, err := d.store.ListVMs(ctx)
if err != nil {
return err
}
records := make(map[string]string)
for _, vm := range vms {
if !d.vmAlive(vm) {
continue
}
if strings.TrimSpace(vm.Runtime.GuestIP) == "" {
continue
}
records[vmDNSRecordName(vm.Name)] = vm.Runtime.GuestIP
}
return d.hostNet().replaceDNS(records)
}
// vmDNSRecordName is a small indirection so the dns-record-name
// helper is not directly pulled into every file that used to import
// vmdns for this one call. Equivalent to vmdns.RecordName.
func vmDNSRecordName(name string) string {
return strings.ToLower(strings.TrimSpace(name)) + ".vm"
}
// cleanupRuntime tears down the host-side state for a VM: firecracker
// process, DM snapshot, capabilities, tap, sockets. Stays on *Daemon
// for now because it reaches into handles (VMService-owned) and
// capabilities (still on Daemon). Phase 4 will move it to VMService.
func (d *Daemon) cleanupRuntime(ctx context.Context, vm model.VMRecord, preserveDisks bool) error {
if d.logger != nil {
d.logger.Debug("cleanup runtime", append(vmLogAttrs(vm), "preserve_disks", preserveDisks)...)
}
h := d.vmHandles(vm.ID)
cleanupPID := h.PID
if vm.Runtime.APISockPath != "" {
if pid, err := d.hostNet().findFirecrackerPID(ctx, vm.Runtime.APISockPath); err == nil && pid > 0 {
cleanupPID = pid
}
}
if cleanupPID > 0 && system.ProcessRunning(cleanupPID, vm.Runtime.APISockPath) {
_ = d.hostNet().killVMProcess(ctx, cleanupPID)
if err := d.hostNet().waitForExit(ctx, cleanupPID, vm.Runtime.APISockPath, 30*time.Second); err != nil {
return err
}
}
snapshotErr := d.hostNet().cleanupDMSnapshot(ctx, dmSnapshotHandles{
BaseLoop: h.BaseLoop,
COWLoop: h.COWLoop,
DMName: h.DMName,
DMDev: h.DMDev,
})
featureErr := d.cleanupCapabilityState(ctx, vm)
var tapErr error
if h.TapDevice != "" {
tapErr = d.hostNet().releaseTap(ctx, h.TapDevice)
}
if vm.Runtime.APISockPath != "" {
_ = os.Remove(vm.Runtime.APISockPath)
}
if vm.Runtime.VSockPath != "" {
_ = os.Remove(vm.Runtime.VSockPath)
}
// The handles are only meaningful while the kernel objects exist;
// dropping them here keeps the cache in sync with reality even
// when the caller forgets to call clearVMHandles explicitly.
d.clearVMHandles(vm)
if !preserveDisks && vm.Runtime.VMDir != "" {
return errors.Join(snapshotErr, featureErr, tapErr, os.RemoveAll(vm.Runtime.VMDir))
}
return errors.Join(snapshotErr, featureErr, tapErr)
}
func (d *Daemon) generateName(ctx context.Context) (string, error) {
_ = ctx
if name := strings.TrimSpace(namegen.Generate()); name != "" {
return name, nil
}
return "vm-" + strconv.FormatInt(time.Now().Unix(), 10), nil
}
func bridgePrefix(bridgeIP string) string {
parts := strings.Split(bridgeIP, ".")
if len(parts) < 3 {
return bridgeIP
}
return strings.Join(parts[:3], ".")
}
func optionalIntOrDefault(value *int, fallback int) int {
if value != nil {
return *value
}
return fallback
}
func validateOptionalPositiveSetting(label string, value *int) error {
if value == nil {
return nil
}
if *value <= 0 {
return fmt.Errorf("%s must be a positive integer", label)
}
return nil
}