daemon split (1/5): extract *HostNetwork service
First phase of splitting the daemon god-struct into focused services with explicit ownership. HostNetwork now owns everything host-networking: the TAP interface pool (initializeTapPool / ensureTapPool / acquireTap / releaseTap / createTap), bridge + socket dir setup, firecracker process primitives (find/resolve/kill/wait/ensureSocketAccess/sendCtrlAltDel), DM snapshot lifecycle, NAT rule enforcement, guest DNS server lifecycle + routing setup, and the vsock-agent readiness probe. That's 7 files whose receivers flipped from *Daemon to *HostNetwork, plus a new host_network.go that declares the struct, its hostNetworkDeps, and the factored firecracker + DNS helpers that used to live in vm.go. Daemon gives up the tapPool and vmDNS fields entirely; they're now HostNetwork's business. Construction goes through newHostNetwork in Daemon.Open with an explicit dependency bag (runner, logger, config, layout, closing). A lazy-init hostNet() helper on Daemon supports test literals that don't wire net explicitly — production always populates it eagerly. Signature tightenings where the old receiver reached into VM-service state: - ensureNAT(ctx, vm, enable) → ensureNAT(ctx, guestIP, tap, enable). Callers resolve tap from the handle cache themselves. - initializeTapPool(ctx) → initializeTapPool(usedTaps []string). Daemon.Open enumerates VMs, collects taps from handles, hands the slice in. rebuildDNS stays on *Daemon as the orchestrator — it filters by vm-alive (a VMService concern handles will move to in phase 4) then calls HostNetwork.replaceDNS with the already-filtered map. Capability hooks continue to take *Daemon; they now use it as a facade to reach services (d.net.ensureNAT, d.hostNet().*). Planned CapabilityHost interface extraction is orthogonal, left for later. Tests: dns_routing_test.go + fastpath_test.go + nat_test.go + snapshot_test.go + open_close_test.go were touched to construct HostNetwork literals where they exercise its methods directly, or route through d.hostNet() where they exercise the Daemon entry points. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
eba9a553bf
commit
362009d747
18 changed files with 461 additions and 326 deletions
|
|
@ -4,23 +4,20 @@ import (
|
|||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"banger/internal/daemon/fcproc"
|
||||
"banger/internal/firecracker"
|
||||
"banger/internal/model"
|
||||
"banger/internal/namegen"
|
||||
"banger/internal/system"
|
||||
"banger/internal/vmdns"
|
||||
"banger/internal/vsockagent"
|
||||
)
|
||||
|
||||
// Cross-service constants. Kept in vm.go because both lifecycle
|
||||
// (VMService) and networking (HostNetwork) reference them; moving
|
||||
// them to either owner would read as a layering violation.
|
||||
var (
|
||||
errWaitForExitTimeout = fcproc.ErrWaitForExitTimeout
|
||||
gracefulShutdownWait = 10 * time.Second
|
||||
|
|
@ -28,59 +25,43 @@ var (
|
|||
vsockReadyPoll = 200 * time.Millisecond
|
||||
)
|
||||
|
||||
// fc builds a fresh fcproc.Manager from the Daemon's current runner, config,
|
||||
// and layout. Manager is stateless beyond those handles, so constructing per
|
||||
// call keeps tests that build Daemon literals working without extra wiring.
|
||||
func (d *Daemon) fc() *fcproc.Manager {
|
||||
return fcproc.New(d.runner, fcproc.Config{
|
||||
FirecrackerBin: d.config.FirecrackerBin,
|
||||
BridgeName: d.config.BridgeName,
|
||||
BridgeIP: d.config.BridgeIP,
|
||||
CIDR: d.config.CIDR,
|
||||
RuntimeDir: d.layout.RuntimeDir,
|
||||
}, d.logger)
|
||||
// rebuildDNS enumerates live VMs and republishes the DNS record set.
|
||||
// Lives on *Daemon (not HostNetwork) because "alive" is a VMService
|
||||
// concern that HostNetwork shouldn't need to reach into. Daemon
|
||||
// orchestrates: VM list from the store, alive filter, hand the
|
||||
// resulting map to HostNetwork.replaceDNS.
|
||||
func (d *Daemon) rebuildDNS(ctx context.Context) error {
|
||||
if d.net == nil {
|
||||
return nil
|
||||
}
|
||||
vms, err := d.store.ListVMs(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
records := make(map[string]string)
|
||||
for _, vm := range vms {
|
||||
if !d.vmAlive(vm) {
|
||||
continue
|
||||
}
|
||||
if strings.TrimSpace(vm.Runtime.GuestIP) == "" {
|
||||
continue
|
||||
}
|
||||
records[vmDNSRecordName(vm.Name)] = vm.Runtime.GuestIP
|
||||
}
|
||||
return d.hostNet().replaceDNS(records)
|
||||
}
|
||||
|
||||
func (d *Daemon) ensureBridge(ctx context.Context) error {
|
||||
return d.fc().EnsureBridge(ctx)
|
||||
}
|
||||
|
||||
func (d *Daemon) ensureSocketDir() error {
|
||||
return d.fc().EnsureSocketDir()
|
||||
}
|
||||
|
||||
func (d *Daemon) createTap(ctx context.Context, tap string) error {
|
||||
return d.fc().CreateTap(ctx, tap)
|
||||
}
|
||||
|
||||
func (d *Daemon) firecrackerBinary() (string, error) {
|
||||
return d.fc().ResolveBinary()
|
||||
}
|
||||
|
||||
func (d *Daemon) ensureSocketAccess(ctx context.Context, socketPath, label string) error {
|
||||
return d.fc().EnsureSocketAccess(ctx, socketPath, label)
|
||||
}
|
||||
|
||||
func (d *Daemon) findFirecrackerPID(ctx context.Context, apiSock string) (int, error) {
|
||||
return d.fc().FindPID(ctx, apiSock)
|
||||
}
|
||||
|
||||
func (d *Daemon) resolveFirecrackerPID(ctx context.Context, machine *firecracker.Machine, apiSock string) int {
|
||||
return d.fc().ResolvePID(ctx, machine, apiSock)
|
||||
}
|
||||
|
||||
func (d *Daemon) sendCtrlAltDel(ctx context.Context, vm model.VMRecord) error {
|
||||
return d.fc().SendCtrlAltDel(ctx, vm.Runtime.APISockPath)
|
||||
}
|
||||
|
||||
func (d *Daemon) waitForExit(ctx context.Context, pid int, apiSock string, timeout time.Duration) error {
|
||||
return d.fc().WaitForExit(ctx, pid, apiSock, timeout)
|
||||
}
|
||||
|
||||
func (d *Daemon) killVMProcess(ctx context.Context, pid int) error {
|
||||
return d.fc().Kill(ctx, pid)
|
||||
// vmDNSRecordName is a small indirection so the dns-record-name
|
||||
// helper is not directly pulled into every file that used to import
|
||||
// vmdns for this one call. Equivalent to vmdns.RecordName.
|
||||
func vmDNSRecordName(name string) string {
|
||||
return strings.ToLower(strings.TrimSpace(name)) + ".vm"
|
||||
}
|
||||
|
||||
// cleanupRuntime tears down the host-side state for a VM: firecracker
|
||||
// process, DM snapshot, capabilities, tap, sockets. Stays on *Daemon
|
||||
// for now because it reaches into handles (VMService-owned) and
|
||||
// capabilities (still on Daemon). Phase 4 will move it to VMService.
|
||||
func (d *Daemon) cleanupRuntime(ctx context.Context, vm model.VMRecord, preserveDisks bool) error {
|
||||
if d.logger != nil {
|
||||
d.logger.Debug("cleanup runtime", append(vmLogAttrs(vm), "preserve_disks", preserveDisks)...)
|
||||
|
|
@ -88,17 +69,17 @@ func (d *Daemon) cleanupRuntime(ctx context.Context, vm model.VMRecord, preserve
|
|||
h := d.vmHandles(vm.ID)
|
||||
cleanupPID := h.PID
|
||||
if vm.Runtime.APISockPath != "" {
|
||||
if pid, err := d.findFirecrackerPID(ctx, vm.Runtime.APISockPath); err == nil && pid > 0 {
|
||||
if pid, err := d.hostNet().findFirecrackerPID(ctx, vm.Runtime.APISockPath); err == nil && pid > 0 {
|
||||
cleanupPID = pid
|
||||
}
|
||||
}
|
||||
if cleanupPID > 0 && system.ProcessRunning(cleanupPID, vm.Runtime.APISockPath) {
|
||||
_ = d.killVMProcess(ctx, cleanupPID)
|
||||
if err := d.waitForExit(ctx, cleanupPID, vm.Runtime.APISockPath, 30*time.Second); err != nil {
|
||||
_ = d.hostNet().killVMProcess(ctx, cleanupPID)
|
||||
if err := d.hostNet().waitForExit(ctx, cleanupPID, vm.Runtime.APISockPath, 30*time.Second); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
snapshotErr := d.cleanupDMSnapshot(ctx, dmSnapshotHandles{
|
||||
snapshotErr := d.hostNet().cleanupDMSnapshot(ctx, dmSnapshotHandles{
|
||||
BaseLoop: h.BaseLoop,
|
||||
COWLoop: h.COWLoop,
|
||||
DMName: h.DMName,
|
||||
|
|
@ -107,7 +88,7 @@ func (d *Daemon) cleanupRuntime(ctx context.Context, vm model.VMRecord, preserve
|
|||
featureErr := d.cleanupCapabilityState(ctx, vm)
|
||||
var tapErr error
|
||||
if h.TapDevice != "" {
|
||||
tapErr = d.releaseTap(ctx, h.TapDevice)
|
||||
tapErr = d.hostNet().releaseTap(ctx, h.TapDevice)
|
||||
}
|
||||
if vm.Runtime.APISockPath != "" {
|
||||
_ = os.Remove(vm.Runtime.APISockPath)
|
||||
|
|
@ -125,92 +106,6 @@ func (d *Daemon) cleanupRuntime(ctx context.Context, vm model.VMRecord, preserve
|
|||
return errors.Join(snapshotErr, featureErr, tapErr)
|
||||
}
|
||||
|
||||
func defaultVSockPath(runtimeDir, vmID string) string {
|
||||
return filepath.Join(runtimeDir, "fc-"+system.ShortID(vmID)+".vsock")
|
||||
}
|
||||
|
||||
func defaultVSockCID(guestIP string) (uint32, error) {
|
||||
ip := net.ParseIP(strings.TrimSpace(guestIP)).To4()
|
||||
if ip == nil {
|
||||
return 0, fmt.Errorf("guest IP is not IPv4: %q", guestIP)
|
||||
}
|
||||
return 10000 + uint32(ip[3]), nil
|
||||
}
|
||||
|
||||
func waitForGuestVSockAgent(ctx context.Context, logger *slog.Logger, socketPath string, timeout time.Duration) error {
|
||||
if strings.TrimSpace(socketPath) == "" {
|
||||
return errors.New("vsock path is required")
|
||||
}
|
||||
|
||||
waitCtx, cancel := context.WithTimeout(ctx, timeout)
|
||||
defer cancel()
|
||||
|
||||
ticker := time.NewTicker(vsockReadyPoll)
|
||||
defer ticker.Stop()
|
||||
|
||||
var lastErr error
|
||||
for {
|
||||
pingCtx, pingCancel := context.WithTimeout(waitCtx, 3*time.Second)
|
||||
err := vsockagent.Health(pingCtx, logger, socketPath)
|
||||
pingCancel()
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
lastErr = err
|
||||
|
||||
select {
|
||||
case <-waitCtx.Done():
|
||||
if lastErr != nil {
|
||||
return fmt.Errorf("guest vsock agent not ready: %w", lastErr)
|
||||
}
|
||||
return errors.New("guest vsock agent not ready before timeout")
|
||||
case <-ticker.C:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Daemon) setDNS(ctx context.Context, vmName, guestIP string) error {
|
||||
if d.vmDNS == nil {
|
||||
return nil
|
||||
}
|
||||
if err := d.vmDNS.Set(vmdns.RecordName(vmName), guestIP); err != nil {
|
||||
return err
|
||||
}
|
||||
d.ensureVMDNSResolverRouting(ctx)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Daemon) removeDNS(ctx context.Context, dnsName string) error {
|
||||
if dnsName == "" {
|
||||
return nil
|
||||
}
|
||||
if d.vmDNS == nil {
|
||||
return nil
|
||||
}
|
||||
return d.vmDNS.Remove(dnsName)
|
||||
}
|
||||
|
||||
func (d *Daemon) rebuildDNS(ctx context.Context) error {
|
||||
if d.vmDNS == nil {
|
||||
return nil
|
||||
}
|
||||
vms, err := d.store.ListVMs(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
records := make(map[string]string)
|
||||
for _, vm := range vms {
|
||||
if !d.vmAlive(vm) {
|
||||
continue
|
||||
}
|
||||
if strings.TrimSpace(vm.Runtime.GuestIP) == "" {
|
||||
continue
|
||||
}
|
||||
records[vmdns.RecordName(vm.Name)] = vm.Runtime.GuestIP
|
||||
}
|
||||
return d.vmDNS.Replace(records)
|
||||
}
|
||||
|
||||
func (d *Daemon) generateName(ctx context.Context) (string, error) {
|
||||
_ = ctx
|
||||
if name := strings.TrimSpace(namegen.Generate()); name != "" {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue