banger/internal/daemon/vm_stats.go
Thales Maciel 362009d747
daemon split (1/5): extract *HostNetwork service
First phase of splitting the daemon god-struct into focused services
with explicit ownership.

HostNetwork now owns everything host-networking: the TAP interface
pool (initializeTapPool / ensureTapPool / acquireTap / releaseTap /
createTap), bridge + socket dir setup, firecracker process primitives
(find/resolve/kill/wait/ensureSocketAccess/sendCtrlAltDel), DM
snapshot lifecycle, NAT rule enforcement, guest DNS server lifecycle
+ routing setup, and the vsock-agent readiness probe. That's 7 files
whose receivers flipped from *Daemon to *HostNetwork, plus a new
host_network.go that declares the struct, its hostNetworkDeps, and
the factored firecracker + DNS helpers that used to live in vm.go.

Daemon gives up the tapPool and vmDNS fields entirely; they're now
HostNetwork's business. Construction goes through newHostNetwork in
Daemon.Open with an explicit dependency bag (runner, logger, config,
layout, closing). A lazy-init hostNet() helper on Daemon supports
test literals that don't wire net explicitly — production always
populates it eagerly.

Signature tightenings where the old receiver reached into VM-service
state:
 - ensureNAT(ctx, vm, enable) → ensureNAT(ctx, guestIP, tap, enable).
   Callers resolve tap from the handle cache themselves.
 - initializeTapPool(ctx) → initializeTapPool(usedTaps []string).
   Daemon.Open enumerates VMs, collects taps from handles, hands the
   slice in.

rebuildDNS stays on *Daemon as the orchestrator — it filters by
vm-alive (a VMService concern handles will move to in phase 4) then
calls HostNetwork.replaceDNS with the already-filtered map.

Capability hooks continue to take *Daemon; they now use it as a
facade to reach services (d.net.ensureNAT, d.hostNet().*). Planned
CapabilityHost interface extraction is orthogonal, left for later.

Tests: dns_routing_test.go + fastpath_test.go + nat_test.go +
snapshot_test.go + open_close_test.go were touched to construct
HostNetwork literals where they exercise its methods directly, or
route through d.hostNet() where they exercise the Daemon entry
points.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-20 20:11:46 -03:00

156 lines
4.3 KiB
Go

package daemon
import (
"context"
"errors"
"strings"
"time"
"banger/internal/api"
"banger/internal/model"
"banger/internal/system"
"banger/internal/vsockagent"
)
func (d *Daemon) GetVMStats(ctx context.Context, idOrName string) (model.VMRecord, model.VMStats, error) {
vm, err := d.withVMLockByRef(ctx, idOrName, func(vm model.VMRecord) (model.VMRecord, error) {
return d.getVMStatsLocked(ctx, vm)
})
if err != nil {
return model.VMRecord{}, model.VMStats{}, err
}
return vm, vm.Stats, nil
}
func (d *Daemon) HealthVM(ctx context.Context, idOrName string) (result api.VMHealthResult, err error) {
_, err = d.withVMLockByRef(ctx, idOrName, func(vm model.VMRecord) (model.VMRecord, error) {
result.Name = vm.Name
if !d.vmAlive(vm) {
result.Healthy = false
return vm, nil
}
if strings.TrimSpace(vm.Runtime.VSockPath) == "" {
return model.VMRecord{}, errors.New("vm has no vsock path")
}
if vm.Runtime.VSockCID == 0 {
return model.VMRecord{}, errors.New("vm has no vsock cid")
}
if err := d.hostNet().ensureSocketAccess(ctx, vm.Runtime.VSockPath, "firecracker vsock socket"); err != nil {
return model.VMRecord{}, err
}
pingCtx, cancel := context.WithTimeout(ctx, 3*time.Second)
defer cancel()
if err := vsockagent.Health(pingCtx, d.logger, vm.Runtime.VSockPath); err != nil {
return model.VMRecord{}, err
}
result.Healthy = true
return vm, nil
})
return result, err
}
func (d *Daemon) PingVM(ctx context.Context, idOrName string) (result api.VMPingResult, err error) {
health, err := d.HealthVM(ctx, idOrName)
if err != nil {
return api.VMPingResult{}, err
}
return api.VMPingResult{Name: health.Name, Alive: health.Healthy}, nil
}
func (d *Daemon) getVMStatsLocked(ctx context.Context, vm model.VMRecord) (model.VMRecord, error) {
stats, err := d.collectStats(ctx, vm)
if err == nil {
vm.Stats = stats
vm.UpdatedAt = model.Now()
_ = d.store.UpsertVM(ctx, vm)
if d.logger != nil {
d.logger.Debug("vm stats collected", append(vmLogAttrs(vm), "rss_bytes", stats.RSSBytes, "vsz_bytes", stats.VSZBytes, "cpu_percent", stats.CPUPercent)...)
}
}
return vm, nil
}
func (d *Daemon) pollStats(ctx context.Context) error {
vms, err := d.store.ListVMs(ctx)
if err != nil {
return err
}
for _, vm := range vms {
if err := d.withVMLockByIDErr(ctx, vm.ID, func(vm model.VMRecord) error {
if !d.vmAlive(vm) {
return nil
}
stats, err := d.collectStats(ctx, vm)
if err != nil {
if d.logger != nil {
d.logger.Debug("vm stats collection failed", append(vmLogAttrs(vm), "error", err.Error())...)
}
return nil
}
vm.Stats = stats
vm.UpdatedAt = model.Now()
return d.store.UpsertVM(ctx, vm)
}); err != nil {
return err
}
}
return nil
}
func (d *Daemon) stopStaleVMs(ctx context.Context) (err error) {
if d.config.AutoStopStaleAfter <= 0 {
return nil
}
op := d.beginOperation("vm.stop_stale")
defer func() {
if err != nil {
op.fail(err)
return
}
op.done()
}()
vms, err := d.store.ListVMs(ctx)
if err != nil {
return err
}
now := model.Now()
for _, vm := range vms {
if err := d.withVMLockByIDErr(ctx, vm.ID, func(vm model.VMRecord) error {
if !d.vmAlive(vm) {
return nil
}
if now.Sub(vm.LastTouchedAt) < d.config.AutoStopStaleAfter {
return nil
}
op.stage("stopping_vm", vmLogAttrs(vm)...)
_ = d.hostNet().sendCtrlAltDel(ctx, vm.Runtime.APISockPath)
_ = d.hostNet().waitForExit(ctx, d.vmHandles(vm.ID).PID, vm.Runtime.APISockPath, 10*time.Second)
_ = d.cleanupRuntime(ctx, vm, true)
vm.State = model.VMStateStopped
vm.Runtime.State = model.VMStateStopped
d.clearVMHandles(vm)
vm.UpdatedAt = model.Now()
return d.store.UpsertVM(ctx, vm)
}); err != nil {
return err
}
}
return nil
}
func (d *Daemon) collectStats(ctx context.Context, vm model.VMRecord) (model.VMStats, error) {
stats := model.VMStats{
CollectedAt: model.Now(),
SystemOverlayBytes: system.AllocatedBytes(vm.Runtime.SystemOverlay),
WorkDiskBytes: system.AllocatedBytes(vm.Runtime.WorkDiskPath),
MetricsRaw: system.ParseMetricsFile(vm.Runtime.MetricsPath),
}
if d.vmAlive(vm) {
if ps, err := system.ReadProcessStats(ctx, d.vmHandles(vm.ID).PID); err == nil {
stats.CPUPercent = ps.CPUPercent
stats.RSSBytes = ps.RSSBytes
stats.VSZBytes = ps.VSZBytes
}
}
return stats, nil
}