daemon split (1/5): extract *HostNetwork service

First phase of splitting the daemon god-struct into focused services
with explicit ownership.

HostNetwork now owns everything host-networking: the TAP interface
pool (initializeTapPool / ensureTapPool / acquireTap / releaseTap /
createTap), bridge + socket dir setup, firecracker process primitives
(find/resolve/kill/wait/ensureSocketAccess/sendCtrlAltDel), DM
snapshot lifecycle, NAT rule enforcement, guest DNS server lifecycle
+ routing setup, and the vsock-agent readiness probe. That's 7 files
whose receivers flipped from *Daemon to *HostNetwork, plus a new
host_network.go that declares the struct, its hostNetworkDeps, and
the factored firecracker + DNS helpers that used to live in vm.go.

Daemon gives up the tapPool and vmDNS fields entirely; they're now
HostNetwork's business. Construction goes through newHostNetwork in
Daemon.Open with an explicit dependency bag (runner, logger, config,
layout, closing). A lazy-init hostNet() helper on Daemon supports
test literals that don't wire net explicitly — production always
populates it eagerly.

Signature tightenings where the old receiver reached into VM-service
state:
 - ensureNAT(ctx, vm, enable) → ensureNAT(ctx, guestIP, tap, enable).
   Callers resolve tap from the handle cache themselves.
 - initializeTapPool(ctx) → initializeTapPool(usedTaps []string).
   Daemon.Open enumerates VMs, collects taps from handles, hands the
   slice in.

rebuildDNS stays on *Daemon as the orchestrator — it filters by
vm-alive (a VMService concern handles will move to in phase 4) then
calls HostNetwork.replaceDNS with the already-filtered map.

Capability hooks continue to take *Daemon; they now use it as a
facade to reach services (d.net.ensureNAT, d.hostNet().*). Planned
CapabilityHost interface extraction is orthogonal, left for later.

Tests: dns_routing_test.go + fastpath_test.go + nat_test.go +
snapshot_test.go + open_close_test.go were touched to construct
HostNetwork literals where they exercise its methods directly, or
route through d.hostNet() where they exercise the Daemon entry
points.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Thales Maciel 2026-04-20 20:11:46 -03:00
parent eba9a553bf
commit 362009d747
No known key found for this signature in database
GPG key ID: 33112E6833C34679
18 changed files with 461 additions and 326 deletions

View file

@ -56,11 +56,11 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod
}
d.clearVMHandles(vm)
op.stage("bridge")
if err := d.ensureBridge(ctx); err != nil {
if err := d.hostNet().ensureBridge(ctx); err != nil {
return model.VMRecord{}, err
}
op.stage("socket_dir")
if err := d.ensureSocketDir(); err != nil {
if err := d.hostNet().ensureSocketDir(); err != nil {
return model.VMRecord{}, err
}
@ -92,7 +92,7 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod
op.stage("dm_snapshot", "dm_name", dmName)
vmCreateStage(ctx, "prepare_rootfs", "creating root filesystem snapshot")
snapHandles, err := d.createDMSnapshot(ctx, image.RootfsPath, vm.Runtime.SystemOverlay, dmName)
snapHandles, err := d.hostNet().createDMSnapshot(ctx, image.RootfsPath, vm.Runtime.SystemOverlay, dmName)
if err != nil {
return model.VMRecord{}, err
}
@ -138,7 +138,7 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod
return cleanupOnErr(err)
}
op.stage("tap")
tap, err := d.acquireTap(ctx, tapName)
tap, err := d.hostNet().acquireTap(ctx, tapName)
if err != nil {
return cleanupOnErr(err)
}
@ -150,7 +150,7 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod
}
op.stage("firecracker_binary")
fcPath, err := d.firecrackerBinary()
fcPath, err := d.hostNet().firecrackerBinary()
if err != nil {
return cleanupOnErr(err)
}
@ -200,23 +200,23 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod
// Use a fresh context: the request ctx may already be cancelled (client
// disconnect), but we still need the PID so cleanupRuntime can kill the
// Firecracker process that was spawned before the failure.
live.PID = d.resolveFirecrackerPID(context.Background(), machine, apiSock)
live.PID = d.hostNet().resolveFirecrackerPID(context.Background(), machine, apiSock)
d.setVMHandles(vm, live)
return cleanupOnErr(err)
}
live.PID = d.resolveFirecrackerPID(context.Background(), machine, apiSock)
live.PID = d.hostNet().resolveFirecrackerPID(context.Background(), machine, apiSock)
d.setVMHandles(vm, live)
op.debugStage("firecracker_started", "pid", live.PID)
op.stage("socket_access", "api_socket", apiSock)
if err := d.ensureSocketAccess(ctx, apiSock, "firecracker api socket"); err != nil {
if err := d.hostNet().ensureSocketAccess(ctx, apiSock, "firecracker api socket"); err != nil {
return cleanupOnErr(err)
}
op.stage("vsock_access", "vsock_path", vm.Runtime.VSockPath, "vsock_cid", vm.Runtime.VSockCID)
if err := d.ensureSocketAccess(ctx, vm.Runtime.VSockPath, "firecracker vsock socket"); err != nil {
if err := d.hostNet().ensureSocketAccess(ctx, vm.Runtime.VSockPath, "firecracker vsock socket"); err != nil {
return cleanupOnErr(err)
}
vmCreateStage(ctx, "wait_vsock_agent", "waiting for guest vsock agent")
if err := waitForGuestVSockAgent(ctx, d.logger, vm.Runtime.VSockPath, vsockReadyWait); err != nil {
if err := d.hostNet().waitForGuestVSockAgent(ctx, vm.Runtime.VSockPath, vsockReadyWait); err != nil {
return cleanupOnErr(err)
}
op.stage("post_start_features")
@ -264,11 +264,11 @@ func (d *Daemon) stopVMLocked(ctx context.Context, current model.VMRecord) (vm m
}
pid := d.vmHandles(vm.ID).PID
op.stage("graceful_shutdown")
if err := d.sendCtrlAltDel(ctx, vm); err != nil {
if err := d.hostNet().sendCtrlAltDel(ctx, vm.Runtime.APISockPath); err != nil {
return model.VMRecord{}, err
}
op.stage("wait_for_exit", "pid", pid)
if err := d.waitForExit(ctx, pid, vm.Runtime.APISockPath, gracefulShutdownWait); err != nil {
if err := d.hostNet().waitForExit(ctx, pid, vm.Runtime.APISockPath, gracefulShutdownWait); err != nil {
if !errors.Is(err, errWaitForExitTimeout) {
return model.VMRecord{}, err
}
@ -328,7 +328,7 @@ func (d *Daemon) killVMLocked(ctx context.Context, current model.VMRecord, signa
return model.VMRecord{}, err
}
op.stage("wait_for_exit", "pid", pid)
if err := d.waitForExit(ctx, pid, vm.Runtime.APISockPath, 30*time.Second); err != nil {
if err := d.hostNet().waitForExit(ctx, pid, vm.Runtime.APISockPath, 30*time.Second); err != nil {
if !errors.Is(err, errWaitForExitTimeout) {
return model.VMRecord{}, err
}
@ -395,7 +395,7 @@ func (d *Daemon) deleteVMLocked(ctx context.Context, current model.VMRecord) (vm
if d.vmAlive(vm) {
pid := d.vmHandles(vm.ID).PID
op.stage("kill_running_vm", "pid", pid)
_ = d.killVMProcess(ctx, pid)
_ = d.hostNet().killVMProcess(ctx, pid)
}
op.stage("cleanup_runtime")
if err := d.cleanupRuntime(ctx, vm, false); err != nil {