daemon split (4/5): extract *VMService service

Phase 4 of the daemon god-struct refactor. VM lifecycle, create-op
registry, handle cache, disk provisioning, stats polling, ports
query, and the per-VM lock set all move off *Daemon onto *VMService.

Daemon keeps thin forwarders only for FindVM / TouchVM (dispatch
surface) and is otherwise out of VM lifecycle. Lazy-init via
d.vmSvc() mirrors the earlier services so test literals like
\`&Daemon{store: db, runner: r}\` still get a functional service
without spelling one out.

Three small cleanups along the way:

  * preflight helpers (validateStartPrereqs / addBaseStartPrereqs
    / addBaseStartCommandPrereqs / validateWorkDiskResizePrereqs)
    move with the VM methods that call them.
  * cleanupRuntime / rebuildDNS move to *VMService, with
    HostNetwork primitives (findFirecrackerPID, cleanupDMSnapshot,
    killVMProcess, releaseTap, waitForExit, sendCtrlAltDel)
    reached through s.net instead of the hostNet() facade.
  * vsockAgentBinary becomes a package-level function so both
    *Daemon (doctor) and *VMService (preflight) call one entry
    point instead of each owning a forwarder method.

WorkspaceService's peer deps switch from eager method values to
closures — vmSvc() constructs VMService with WorkspaceService as a
peer, so resolving d.vmSvc().FindVM at construction time recursed
through workspaceSvc() → vmSvc(). Closures defer the lookup to call
time.

Pure code motion: build + unit tests green, lint clean. No RPC
surface or lock-ordering changes.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Thales Maciel 2026-04-20 20:57:05 -03:00
parent c0d456e734
commit 466a7c30c4
No known key found for this signature in database
GPG key ID: 33112E6833C34679
23 changed files with 655 additions and 463 deletions

View file

@ -12,9 +12,9 @@ import (
"banger/internal/vsockagent"
)
func (d *Daemon) GetVMStats(ctx context.Context, idOrName string) (model.VMRecord, model.VMStats, error) {
vm, err := d.withVMLockByRef(ctx, idOrName, func(vm model.VMRecord) (model.VMRecord, error) {
return d.getVMStatsLocked(ctx, vm)
func (s *VMService) GetVMStats(ctx context.Context, idOrName string) (model.VMRecord, model.VMStats, error) {
vm, err := s.withVMLockByRef(ctx, idOrName, func(vm model.VMRecord) (model.VMRecord, error) {
return s.getVMStatsLocked(ctx, vm)
})
if err != nil {
return model.VMRecord{}, model.VMStats{}, err
@ -22,10 +22,10 @@ func (d *Daemon) GetVMStats(ctx context.Context, idOrName string) (model.VMRecor
return vm, vm.Stats, nil
}
func (d *Daemon) HealthVM(ctx context.Context, idOrName string) (result api.VMHealthResult, err error) {
_, err = d.withVMLockByRef(ctx, idOrName, func(vm model.VMRecord) (model.VMRecord, error) {
func (s *VMService) HealthVM(ctx context.Context, idOrName string) (result api.VMHealthResult, err error) {
_, err = s.withVMLockByRef(ctx, idOrName, func(vm model.VMRecord) (model.VMRecord, error) {
result.Name = vm.Name
if !d.vmAlive(vm) {
if !s.vmAlive(vm) {
result.Healthy = false
return vm, nil
}
@ -35,12 +35,12 @@ func (d *Daemon) HealthVM(ctx context.Context, idOrName string) (result api.VMHe
if vm.Runtime.VSockCID == 0 {
return model.VMRecord{}, errors.New("vm has no vsock cid")
}
if err := d.hostNet().ensureSocketAccess(ctx, vm.Runtime.VSockPath, "firecracker vsock socket"); err != nil {
if err := s.net.ensureSocketAccess(ctx, vm.Runtime.VSockPath, "firecracker vsock socket"); err != nil {
return model.VMRecord{}, err
}
pingCtx, cancel := context.WithTimeout(ctx, 3*time.Second)
defer cancel()
if err := vsockagent.Health(pingCtx, d.logger, vm.Runtime.VSockPath); err != nil {
if err := vsockagent.Health(pingCtx, s.logger, vm.Runtime.VSockPath); err != nil {
return model.VMRecord{}, err
}
result.Healthy = true
@ -49,47 +49,47 @@ func (d *Daemon) HealthVM(ctx context.Context, idOrName string) (result api.VMHe
return result, err
}
func (d *Daemon) PingVM(ctx context.Context, idOrName string) (result api.VMPingResult, err error) {
health, err := d.HealthVM(ctx, idOrName)
func (s *VMService) PingVM(ctx context.Context, idOrName string) (result api.VMPingResult, err error) {
health, err := s.HealthVM(ctx, idOrName)
if err != nil {
return api.VMPingResult{}, err
}
return api.VMPingResult{Name: health.Name, Alive: health.Healthy}, nil
}
func (d *Daemon) getVMStatsLocked(ctx context.Context, vm model.VMRecord) (model.VMRecord, error) {
stats, err := d.collectStats(ctx, vm)
func (s *VMService) getVMStatsLocked(ctx context.Context, vm model.VMRecord) (model.VMRecord, error) {
stats, err := s.collectStats(ctx, vm)
if err == nil {
vm.Stats = stats
vm.UpdatedAt = model.Now()
_ = d.store.UpsertVM(ctx, vm)
if d.logger != nil {
d.logger.Debug("vm stats collected", append(vmLogAttrs(vm), "rss_bytes", stats.RSSBytes, "vsz_bytes", stats.VSZBytes, "cpu_percent", stats.CPUPercent)...)
_ = s.store.UpsertVM(ctx, vm)
if s.logger != nil {
s.logger.Debug("vm stats collected", append(vmLogAttrs(vm), "rss_bytes", stats.RSSBytes, "vsz_bytes", stats.VSZBytes, "cpu_percent", stats.CPUPercent)...)
}
}
return vm, nil
}
func (d *Daemon) pollStats(ctx context.Context) error {
vms, err := d.store.ListVMs(ctx)
func (s *VMService) pollStats(ctx context.Context) error {
vms, err := s.store.ListVMs(ctx)
if err != nil {
return err
}
for _, vm := range vms {
if err := d.withVMLockByIDErr(ctx, vm.ID, func(vm model.VMRecord) error {
if !d.vmAlive(vm) {
if err := s.withVMLockByIDErr(ctx, vm.ID, func(vm model.VMRecord) error {
if !s.vmAlive(vm) {
return nil
}
stats, err := d.collectStats(ctx, vm)
stats, err := s.collectStats(ctx, vm)
if err != nil {
if d.logger != nil {
d.logger.Debug("vm stats collection failed", append(vmLogAttrs(vm), "error", err.Error())...)
if s.logger != nil {
s.logger.Debug("vm stats collection failed", append(vmLogAttrs(vm), "error", err.Error())...)
}
return nil
}
vm.Stats = stats
vm.UpdatedAt = model.Now()
return d.store.UpsertVM(ctx, vm)
return s.store.UpsertVM(ctx, vm)
}); err != nil {
return err
}
@ -97,11 +97,11 @@ func (d *Daemon) pollStats(ctx context.Context) error {
return nil
}
func (d *Daemon) stopStaleVMs(ctx context.Context) (err error) {
if d.config.AutoStopStaleAfter <= 0 {
func (s *VMService) stopStaleVMs(ctx context.Context) (err error) {
if s.config.AutoStopStaleAfter <= 0 {
return nil
}
op := d.beginOperation("vm.stop_stale")
op := s.beginOperation("vm.stop_stale")
defer func() {
if err != nil {
op.fail(err)
@ -109,28 +109,28 @@ func (d *Daemon) stopStaleVMs(ctx context.Context) (err error) {
}
op.done()
}()
vms, err := d.store.ListVMs(ctx)
vms, err := s.store.ListVMs(ctx)
if err != nil {
return err
}
now := model.Now()
for _, vm := range vms {
if err := d.withVMLockByIDErr(ctx, vm.ID, func(vm model.VMRecord) error {
if !d.vmAlive(vm) {
if err := s.withVMLockByIDErr(ctx, vm.ID, func(vm model.VMRecord) error {
if !s.vmAlive(vm) {
return nil
}
if now.Sub(vm.LastTouchedAt) < d.config.AutoStopStaleAfter {
if now.Sub(vm.LastTouchedAt) < s.config.AutoStopStaleAfter {
return nil
}
op.stage("stopping_vm", vmLogAttrs(vm)...)
_ = d.hostNet().sendCtrlAltDel(ctx, vm.Runtime.APISockPath)
_ = d.hostNet().waitForExit(ctx, d.vmHandles(vm.ID).PID, vm.Runtime.APISockPath, 10*time.Second)
_ = d.cleanupRuntime(ctx, vm, true)
_ = s.net.sendCtrlAltDel(ctx, vm.Runtime.APISockPath)
_ = s.net.waitForExit(ctx, s.vmHandles(vm.ID).PID, vm.Runtime.APISockPath, 10*time.Second)
_ = s.cleanupRuntime(ctx, vm, true)
vm.State = model.VMStateStopped
vm.Runtime.State = model.VMStateStopped
d.clearVMHandles(vm)
s.clearVMHandles(vm)
vm.UpdatedAt = model.Now()
return d.store.UpsertVM(ctx, vm)
return s.store.UpsertVM(ctx, vm)
}); err != nil {
return err
}
@ -138,15 +138,15 @@ func (d *Daemon) stopStaleVMs(ctx context.Context) (err error) {
return nil
}
func (d *Daemon) collectStats(ctx context.Context, vm model.VMRecord) (model.VMStats, error) {
func (s *VMService) collectStats(ctx context.Context, vm model.VMRecord) (model.VMStats, error) {
stats := model.VMStats{
CollectedAt: model.Now(),
SystemOverlayBytes: system.AllocatedBytes(vm.Runtime.SystemOverlay),
WorkDiskBytes: system.AllocatedBytes(vm.Runtime.WorkDiskPath),
MetricsRaw: system.ParseMetricsFile(vm.Runtime.MetricsPath),
}
if d.vmAlive(vm) {
if ps, err := system.ReadProcessStats(ctx, d.vmHandles(vm.ID).PID); err == nil {
if s.vmAlive(vm) {
if ps, err := system.ReadProcessStats(ctx, s.vmHandles(vm.ID).PID); err == nil {
stats.CPUPercent = ps.CPUPercent
stats.RSSBytes = ps.RSSBytes
stats.VSZBytes = ps.VSZBytes