banger/internal/daemon/host_network.go
Thales Maciel 362009d747
daemon split (1/5): extract *HostNetwork service
First phase of splitting the daemon god-struct into focused services
with explicit ownership.

HostNetwork now owns everything host-networking: the TAP interface
pool (initializeTapPool / ensureTapPool / acquireTap / releaseTap /
createTap), bridge + socket dir setup, firecracker process primitives
(find/resolve/kill/wait/ensureSocketAccess/sendCtrlAltDel), DM
snapshot lifecycle, NAT rule enforcement, guest DNS server lifecycle
+ routing setup, and the vsock-agent readiness probe. That's 7 files
whose receivers flipped from *Daemon to *HostNetwork, plus a new
host_network.go that declares the struct, its hostNetworkDeps, and
the factored firecracker + DNS helpers that used to live in vm.go.

Daemon gives up the tapPool and vmDNS fields entirely; they're now
HostNetwork's business. Construction goes through newHostNetwork in
Daemon.Open with an explicit dependency bag (runner, logger, config,
layout, closing). A lazy-init hostNet() helper on Daemon supports
test literals that don't wire net explicitly — production always
populates it eagerly.

Signature tightenings where the old receiver reached into VM-service
state:
 - ensureNAT(ctx, vm, enable) → ensureNAT(ctx, guestIP, tap, enable).
   Callers resolve tap from the handle cache themselves.
 - initializeTapPool(ctx) → initializeTapPool(usedTaps []string).
   Daemon.Open enumerates VMs, collects taps from handles, hands the
   slice in.

rebuildDNS stays on *Daemon as the orchestrator — it filters by
vm-alive (a VMService concern handles will move to in phase 4) then
calls HostNetwork.replaceDNS with the already-filtered map.

Capability hooks continue to take *Daemon; they now use it as a
facade to reach services (d.net.ensureNAT, d.hostNet().*). Planned
CapabilityHost interface extraction is orthogonal, left for later.

Tests: dns_routing_test.go + fastpath_test.go + nat_test.go +
snapshot_test.go + open_close_test.go were touched to construct
HostNetwork literals where they exercise its methods directly, or
route through d.hostNet() where they exercise the Daemon entry
points.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-20 20:11:46 -03:00

242 lines
6.7 KiB
Go

package daemon
import (
"context"
"errors"
"fmt"
"log/slog"
"net"
"path/filepath"
"strings"
"time"
"banger/internal/daemon/fcproc"
"banger/internal/firecracker"
"banger/internal/model"
"banger/internal/paths"
"banger/internal/system"
"banger/internal/vmdns"
"banger/internal/vsockagent"
)
// HostNetwork owns the daemon's side of host networking: the TAP
// interface pool, the bridge, per-VM tap/NAT/DNS wiring, and the
// firecracker-process primitives (bridge setup, socket access,
// pgrep-based PID resolution, ctrl-alt-del, wait/kill) plus DM
// snapshot helpers. The Daemon holds one *HostNetwork and routes
// lifecycle calls through it instead of reaching into host-state
// directly.
//
// Fields stay unexported so peer services (VMService, etc.) access
// HostNetwork only through consumer-defined interfaces, not by
// fishing around in its struct. Construction goes through
// newHostNetwork with an explicit dependency bag so the wiring is
// auditable.
type HostNetwork struct {
runner system.CommandRunner
logger *slog.Logger
config model.DaemonConfig
layout paths.Layout
closing chan struct{}
tapPool tapPool
vmDNS *vmdns.Server
}
// hostNetworkDeps is the explicit wiring bag newHostNetwork expects.
// Keeping the deps in a dedicated struct rather than positional args
// makes the construction site in Daemon.Open read like a declaration.
type hostNetworkDeps struct {
runner system.CommandRunner
logger *slog.Logger
config model.DaemonConfig
layout paths.Layout
closing chan struct{}
}
func newHostNetwork(deps hostNetworkDeps) *HostNetwork {
return &HostNetwork{
runner: deps.runner,
logger: deps.logger,
config: deps.config,
layout: deps.layout,
closing: deps.closing,
}
}
// hostNet returns the HostNetwork service, lazily constructing it from
// the Daemon's current fields if a test literal didn't wire one up.
// Production paths go through Daemon.Open, which always populates d.net
// eagerly; this lazy path exists only so tests that build `&Daemon{...}`
// literals without spelling out a HostNetwork don't have to learn the
// new construction pattern. Every call from production code that
// touches HostNetwork funnels through here.
func (d *Daemon) hostNet() *HostNetwork {
if d.net != nil {
return d.net
}
d.net = newHostNetwork(hostNetworkDeps{
runner: d.runner,
logger: d.logger,
config: d.config,
layout: d.layout,
closing: d.closing,
})
return d.net
}
// --- DNS server lifecycle -------------------------------------------
func (n *HostNetwork) startVMDNS(addr string) error {
server, err := vmdns.New(addr, n.logger)
if err != nil {
return err
}
n.vmDNS = server
if n.logger != nil {
n.logger.Info("vm dns serving", "dns_addr", server.Addr())
}
return nil
}
func (n *HostNetwork) stopVMDNS() error {
if n.vmDNS == nil {
return nil
}
err := n.vmDNS.Close()
n.vmDNS = nil
return err
}
func (n *HostNetwork) setDNS(ctx context.Context, vmName, guestIP string) error {
if n.vmDNS == nil {
return nil
}
if err := n.vmDNS.Set(vmdns.RecordName(vmName), guestIP); err != nil {
return err
}
n.ensureVMDNSResolverRouting(ctx)
return nil
}
func (n *HostNetwork) removeDNS(dnsName string) error {
if dnsName == "" || n.vmDNS == nil {
return nil
}
return n.vmDNS.Remove(dnsName)
}
// replaceDNS replaces the DNS server's full record set. Callers
// (Daemon.rebuildDNS) filter by vm-alive first; HostNetwork just
// takes the pre-filtered map.
func (n *HostNetwork) replaceDNS(records map[string]string) error {
if n.vmDNS == nil {
return nil
}
return n.vmDNS.Replace(records)
}
// --- Firecracker process helpers ------------------------------------
// fc builds a fresh fcproc.Manager from the HostNetwork's current
// runner, config, and layout. Manager is stateless beyond those
// handles, so constructing per call keeps tests that build literals
// working without extra wiring.
func (n *HostNetwork) fc() *fcproc.Manager {
return fcproc.New(n.runner, fcproc.Config{
FirecrackerBin: n.config.FirecrackerBin,
BridgeName: n.config.BridgeName,
BridgeIP: n.config.BridgeIP,
CIDR: n.config.CIDR,
RuntimeDir: n.layout.RuntimeDir,
}, n.logger)
}
func (n *HostNetwork) ensureBridge(ctx context.Context) error {
return n.fc().EnsureBridge(ctx)
}
func (n *HostNetwork) ensureSocketDir() error {
return n.fc().EnsureSocketDir()
}
func (n *HostNetwork) createTap(ctx context.Context, tap string) error {
return n.fc().CreateTap(ctx, tap)
}
func (n *HostNetwork) firecrackerBinary() (string, error) {
return n.fc().ResolveBinary()
}
func (n *HostNetwork) ensureSocketAccess(ctx context.Context, socketPath, label string) error {
return n.fc().EnsureSocketAccess(ctx, socketPath, label)
}
func (n *HostNetwork) findFirecrackerPID(ctx context.Context, apiSock string) (int, error) {
return n.fc().FindPID(ctx, apiSock)
}
func (n *HostNetwork) resolveFirecrackerPID(ctx context.Context, machine *firecracker.Machine, apiSock string) int {
return n.fc().ResolvePID(ctx, machine, apiSock)
}
func (n *HostNetwork) sendCtrlAltDel(ctx context.Context, apiSockPath string) error {
return n.fc().SendCtrlAltDel(ctx, apiSockPath)
}
func (n *HostNetwork) waitForExit(ctx context.Context, pid int, apiSock string, timeout time.Duration) error {
return n.fc().WaitForExit(ctx, pid, apiSock, timeout)
}
func (n *HostNetwork) killVMProcess(ctx context.Context, pid int) error {
return n.fc().Kill(ctx, pid)
}
// waitForGuestVSockAgent is a HostNetwork helper because it's
// fundamentally about waiting for a vsock socket the firecracker
// process is serving on. No daemon state needed.
func (n *HostNetwork) waitForGuestVSockAgent(ctx context.Context, socketPath string, timeout time.Duration) error {
if strings.TrimSpace(socketPath) == "" {
return errors.New("vsock path is required")
}
waitCtx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
ticker := time.NewTicker(vsockReadyPoll)
defer ticker.Stop()
var lastErr error
for {
pingCtx, pingCancel := context.WithTimeout(waitCtx, 3*time.Second)
err := vsockagent.Health(pingCtx, n.logger, socketPath)
pingCancel()
if err == nil {
return nil
}
lastErr = err
select {
case <-waitCtx.Done():
if lastErr != nil {
return fmt.Errorf("guest vsock agent not ready: %w", lastErr)
}
return errors.New("guest vsock agent not ready before timeout")
case <-ticker.C:
}
}
}
// --- Utilities used across networking ------------------------------
func defaultVSockPath(runtimeDir, vmID string) string {
return filepath.Join(runtimeDir, "fc-"+system.ShortID(vmID)+".vsock")
}
func defaultVSockCID(guestIP string) (uint32, error) {
ip := net.ParseIP(strings.TrimSpace(guestIP)).To4()
if ip == nil {
return 0, fmt.Errorf("guest IP is not IPv4: %q", guestIP)
}
return 10000 + uint32(ip[3]), nil
}