daemon split (1/5): extract *HostNetwork service

First phase of splitting the daemon god-struct into focused services
with explicit ownership.

HostNetwork now owns everything host-networking: the TAP interface
pool (initializeTapPool / ensureTapPool / acquireTap / releaseTap /
createTap), bridge + socket dir setup, firecracker process primitives
(find/resolve/kill/wait/ensureSocketAccess/sendCtrlAltDel), DM
snapshot lifecycle, NAT rule enforcement, guest DNS server lifecycle
+ routing setup, and the vsock-agent readiness probe. That's 7 files
whose receivers flipped from *Daemon to *HostNetwork, plus a new
host_network.go that declares the struct, its hostNetworkDeps, and
the factored firecracker + DNS helpers that used to live in vm.go.

Daemon gives up the tapPool and vmDNS fields entirely; they're now
HostNetwork's business. Construction goes through newHostNetwork in
Daemon.Open with an explicit dependency bag (runner, logger, config,
layout, closing). A lazy-init hostNet() helper on Daemon supports
test literals that don't wire net explicitly — production always
populates it eagerly.

Signature tightenings where the old receiver reached into VM-service
state:
 - ensureNAT(ctx, vm, enable) → ensureNAT(ctx, guestIP, tap, enable).
   Callers resolve tap from the handle cache themselves.
 - initializeTapPool(ctx) → initializeTapPool(usedTaps []string).
   Daemon.Open enumerates VMs, collects taps from handles, hands the
   slice in.

rebuildDNS stays on *Daemon as the orchestrator — it filters by
vm-alive (a VMService concern handles will move to in phase 4) then
calls HostNetwork.replaceDNS with the already-filtered map.

Capability hooks continue to take *Daemon; they now use it as a
facade to reach services (d.net.ensureNAT, d.hostNet().*). Planned
CapabilityHost interface extraction is orthogonal, left for later.

Tests: dns_routing_test.go + fastpath_test.go + nat_test.go +
snapshot_test.go + open_close_test.go were touched to construct
HostNetwork literals where they exercise its methods directly, or
route through d.hostNet() where they exercise the Daemon entry
points.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Thales Maciel 2026-04-20 20:11:46 -03:00
parent eba9a553bf
commit 362009d747
No known key found for this signature in database
GPG key ID: 33112E6833C34679
18 changed files with 461 additions and 326 deletions

View file

@ -18,98 +18,97 @@ type tapPool struct {
next int
}
func (d *Daemon) initializeTapPool(ctx context.Context) error {
if d.config.TapPoolSize <= 0 || d.store == nil {
return nil
}
vms, err := d.store.ListVMs(ctx)
if err != nil {
return err
// initializeTapPool seeds the monotonic pool index from the set of
// tap names already in use by running/stopped VMs, so newly warmed
// pool entries don't collide with existing ones. Callers (Daemon.Open)
// enumerate used taps from the handle cache and pass them in.
func (n *HostNetwork) initializeTapPool(usedTaps []string) {
if n.config.TapPoolSize <= 0 {
return
}
next := 0
for _, vm := range vms {
if index, ok := parseTapPoolIndex(d.vmHandles(vm.ID).TapDevice); ok && index >= next {
for _, tapName := range usedTaps {
if index, ok := parseTapPoolIndex(tapName); ok && index >= next {
next = index + 1
}
}
d.tapPool.mu.Lock()
d.tapPool.next = next
d.tapPool.mu.Unlock()
return nil
n.tapPool.mu.Lock()
n.tapPool.next = next
n.tapPool.mu.Unlock()
}
func (d *Daemon) ensureTapPool(ctx context.Context) {
if d.config.TapPoolSize <= 0 {
func (n *HostNetwork) ensureTapPool(ctx context.Context) {
if n.config.TapPoolSize <= 0 {
return
}
for {
select {
case <-ctx.Done():
return
case <-d.closing:
case <-n.closing:
return
default:
}
d.tapPool.mu.Lock()
if len(d.tapPool.entries) >= d.config.TapPoolSize {
d.tapPool.mu.Unlock()
n.tapPool.mu.Lock()
if len(n.tapPool.entries) >= n.config.TapPoolSize {
n.tapPool.mu.Unlock()
return
}
tapName := fmt.Sprintf("%s%d", tapPoolPrefix, d.tapPool.next)
d.tapPool.next++
d.tapPool.mu.Unlock()
tapName := fmt.Sprintf("%s%d", tapPoolPrefix, n.tapPool.next)
n.tapPool.next++
n.tapPool.mu.Unlock()
if err := d.createTap(ctx, tapName); err != nil {
if d.logger != nil {
d.logger.Warn("tap pool warmup failed", "tap_device", tapName, "error", err.Error())
if err := n.createTap(ctx, tapName); err != nil {
if n.logger != nil {
n.logger.Warn("tap pool warmup failed", "tap_device", tapName, "error", err.Error())
}
return
}
d.tapPool.mu.Lock()
d.tapPool.entries = append(d.tapPool.entries, tapName)
d.tapPool.mu.Unlock()
n.tapPool.mu.Lock()
n.tapPool.entries = append(n.tapPool.entries, tapName)
n.tapPool.mu.Unlock()
if d.logger != nil {
d.logger.Debug("tap added to idle pool", "tap_device", tapName)
if n.logger != nil {
n.logger.Debug("tap added to idle pool", "tap_device", tapName)
}
}
}
func (d *Daemon) acquireTap(ctx context.Context, fallbackName string) (string, error) {
d.tapPool.mu.Lock()
if n := len(d.tapPool.entries); n > 0 {
tapName := d.tapPool.entries[n-1]
d.tapPool.entries = d.tapPool.entries[:n-1]
d.tapPool.mu.Unlock()
func (n *HostNetwork) acquireTap(ctx context.Context, fallbackName string) (string, error) {
n.tapPool.mu.Lock()
if count := len(n.tapPool.entries); count > 0 {
tapName := n.tapPool.entries[count-1]
n.tapPool.entries = n.tapPool.entries[:count-1]
n.tapPool.mu.Unlock()
return tapName, nil
}
d.tapPool.mu.Unlock()
n.tapPool.mu.Unlock()
if err := d.createTap(ctx, fallbackName); err != nil {
if err := n.createTap(ctx, fallbackName); err != nil {
return "", err
}
return fallbackName, nil
}
func (d *Daemon) releaseTap(ctx context.Context, tapName string) error {
func (n *HostNetwork) releaseTap(ctx context.Context, tapName string) error {
tapName = strings.TrimSpace(tapName)
if tapName == "" {
return nil
}
if isTapPoolName(tapName) {
d.tapPool.mu.Lock()
if len(d.tapPool.entries) < d.config.TapPoolSize {
d.tapPool.entries = append(d.tapPool.entries, tapName)
d.tapPool.mu.Unlock()
n.tapPool.mu.Lock()
if len(n.tapPool.entries) < n.config.TapPoolSize {
n.tapPool.entries = append(n.tapPool.entries, tapName)
n.tapPool.mu.Unlock()
return nil
}
d.tapPool.mu.Unlock()
n.tapPool.mu.Unlock()
}
_, err := d.runner.RunSudo(ctx, "ip", "link", "del", tapName)
_, err := n.runner.RunSudo(ctx, "ip", "link", "del", tapName)
if err == nil {
go d.ensureTapPool(context.Background())
go n.ensureTapPool(context.Background())
}
return err
}