daemon split (4/5): extract *VMService service
Phase 4 of the daemon god-struct refactor. VM lifecycle, create-op
registry, handle cache, disk provisioning, stats polling, ports
query, and the per-VM lock set all move off *Daemon onto *VMService.
Daemon keeps thin forwarders only for FindVM / TouchVM (dispatch
surface) and is otherwise out of VM lifecycle. Lazy-init via
d.vmSvc() mirrors the earlier services so test literals like
\`&Daemon{store: db, runner: r}\` still get a functional service
without spelling one out.
Three small cleanups along the way:
* preflight helpers (validateStartPrereqs / addBaseStartPrereqs
/ addBaseStartCommandPrereqs / validateWorkDiskResizePrereqs)
move with the VM methods that call them.
* cleanupRuntime / rebuildDNS move to *VMService, with
HostNetwork primitives (findFirecrackerPID, cleanupDMSnapshot,
killVMProcess, releaseTap, waitForExit, sendCtrlAltDel)
reached through s.net instead of the hostNet() facade.
* vsockAgentBinary becomes a package-level function so both
*Daemon (doctor) and *VMService (preflight) call one entry
point instead of each owning a forwarder method.
WorkspaceService's peer deps switch from eager method values to
closures — vmSvc() constructs VMService with WorkspaceService as a
peer, so resolving d.vmSvc().FindVM at construction time recursed
through workspaceSvc() → vmSvc(). Closures defer the lookup to call
time.
Pure code motion: build + unit tests green, lint clean. No RPC
surface or lock-ordering changes.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
c0d456e734
commit
466a7c30c4
23 changed files with 655 additions and 463 deletions
256
internal/daemon/vm_service.go
Normal file
256
internal/daemon/vm_service.go
Normal file
|
|
@ -0,0 +1,256 @@
|
|||
package daemon
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"banger/internal/daemon/opstate"
|
||||
"banger/internal/firecracker"
|
||||
"banger/internal/guestconfig"
|
||||
"banger/internal/model"
|
||||
"banger/internal/paths"
|
||||
"banger/internal/store"
|
||||
"banger/internal/system"
|
||||
)
|
||||
|
||||
// VMService owns VM lifecycle — create / start / stop / restart /
|
||||
// kill / delete / set — plus the handle cache, create-operation
|
||||
// registry, stats polling, disk provisioning, ports query, and the
|
||||
// SSH-client test seams.
|
||||
//
|
||||
// It holds pointers to its peer services (HostNetwork, ImageService,
|
||||
// WorkspaceService) because VM lifecycle really does orchestrate
|
||||
// across them (start needs bridge + tap + firecracker + auth sync +
|
||||
// boot). Defining narrow function-typed interfaces for every peer
|
||||
// method VMService calls would balloon the diff for no real win —
|
||||
// services remain unexported within the package so nothing outside
|
||||
// the daemon can see them.
|
||||
//
|
||||
// Capability invocation still runs through Daemon because the hook
|
||||
// interfaces take *Daemon directly. VMService calls back via the
|
||||
// capHooks seam rather than holding a *Daemon pointer, to keep the
|
||||
// dependency graph acyclic.
|
||||
type VMService struct {
|
||||
runner system.CommandRunner
|
||||
logger *slog.Logger
|
||||
config model.DaemonConfig
|
||||
layout paths.Layout
|
||||
store *store.Store
|
||||
|
||||
// vmLocks is the per-VM mutex set. Held across entire lifecycle
|
||||
// ops (start, stop, delete, set) — not just the validation window.
|
||||
// Workspace.prepare intentionally splits off onto its own lock
|
||||
// scope; see WorkspaceService.
|
||||
vmLocks vmLockSet
|
||||
createVMMu sync.Mutex
|
||||
createOps opstate.Registry[*vmCreateOperationState]
|
||||
|
||||
// handles caches per-VM transient kernel/process state (PID, tap,
|
||||
// loop devices, DM name/device). Rebuildable at daemon startup
|
||||
// from a per-VM handles.json scratch file plus OS inspection.
|
||||
handles *handleCache
|
||||
|
||||
// Peer services. VMService orchestrates across all three during
|
||||
// start/stop/delete; pointer fields keep call sites direct without
|
||||
// promoting the peer API to package-level interfaces.
|
||||
net *HostNetwork
|
||||
img *ImageService
|
||||
ws *WorkspaceService
|
||||
|
||||
// Test seams.
|
||||
guestWaitForSSH func(context.Context, string, string, time.Duration) error
|
||||
guestDial func(context.Context, string, string) (guestSSHClient, error)
|
||||
|
||||
// Capability hook dispatch. Capabilities themselves live on
|
||||
// *Daemon (their interface takes *Daemon as receiver); VMService
|
||||
// invokes them via these seams so it doesn't need a *Daemon
|
||||
// pointer.
|
||||
capHooks capabilityHooks
|
||||
|
||||
beginOperation func(name string, attrs ...any) *operationLog
|
||||
}
|
||||
|
||||
// capabilityHooks bundles the capability-dispatch entry points that
|
||||
// VMService needs. Populated by Daemon.buildCapabilityHooks() at
|
||||
// service construction; stubbable in tests that don't care about
|
||||
// capability side effects.
|
||||
type capabilityHooks struct {
|
||||
addStartPrereqs func(ctx context.Context, checks *system.Preflight, vm model.VMRecord, image model.Image)
|
||||
contributeGuest func(builder *guestconfig.Builder, vm model.VMRecord, image model.Image)
|
||||
contributeMachine func(cfg *firecracker.MachineConfig, vm model.VMRecord, image model.Image)
|
||||
prepareHosts func(ctx context.Context, vm *model.VMRecord, image model.Image) error
|
||||
postStart func(ctx context.Context, vm model.VMRecord, image model.Image) error
|
||||
cleanupState func(ctx context.Context, vm model.VMRecord) error
|
||||
applyConfigChanges func(ctx context.Context, before, after model.VMRecord) error
|
||||
}
|
||||
|
||||
type vmServiceDeps struct {
|
||||
runner system.CommandRunner
|
||||
logger *slog.Logger
|
||||
config model.DaemonConfig
|
||||
layout paths.Layout
|
||||
store *store.Store
|
||||
net *HostNetwork
|
||||
img *ImageService
|
||||
ws *WorkspaceService
|
||||
guestWaitForSSH func(context.Context, string, string, time.Duration) error
|
||||
guestDial func(context.Context, string, string) (guestSSHClient, error)
|
||||
capHooks capabilityHooks
|
||||
beginOperation func(name string, attrs ...any) *operationLog
|
||||
}
|
||||
|
||||
func newVMService(deps vmServiceDeps) *VMService {
|
||||
return &VMService{
|
||||
runner: deps.runner,
|
||||
logger: deps.logger,
|
||||
config: deps.config,
|
||||
layout: deps.layout,
|
||||
store: deps.store,
|
||||
net: deps.net,
|
||||
img: deps.img,
|
||||
ws: deps.ws,
|
||||
guestWaitForSSH: deps.guestWaitForSSH,
|
||||
guestDial: deps.guestDial,
|
||||
capHooks: deps.capHooks,
|
||||
beginOperation: deps.beginOperation,
|
||||
handles: newHandleCache(),
|
||||
}
|
||||
}
|
||||
|
||||
// vmSvc is Daemon's lazy-init getter. Mirrors hostNet() / imageSvc() /
|
||||
// workspaceSvc() so test literals like `&Daemon{store: db, runner: r}`
|
||||
// still get a functional VMService without spelling one out.
|
||||
func (d *Daemon) vmSvc() *VMService {
|
||||
if d.vm != nil {
|
||||
return d.vm
|
||||
}
|
||||
d.vm = newVMService(vmServiceDeps{
|
||||
runner: d.runner,
|
||||
logger: d.logger,
|
||||
config: d.config,
|
||||
layout: d.layout,
|
||||
store: d.store,
|
||||
net: d.hostNet(),
|
||||
img: d.imageSvc(),
|
||||
ws: d.workspaceSvc(),
|
||||
guestWaitForSSH: d.guestWaitForSSH,
|
||||
guestDial: d.guestDial,
|
||||
capHooks: d.buildCapabilityHooks(),
|
||||
beginOperation: d.beginOperation,
|
||||
})
|
||||
return d.vm
|
||||
}
|
||||
|
||||
// buildCapabilityHooks adapts Daemon's existing capability-dispatch
|
||||
// methods into the capabilityHooks bag VMService takes. Keeps the
|
||||
// registry + capability types on *Daemon while letting VMService call
|
||||
// into them through explicit function seams.
|
||||
func (d *Daemon) buildCapabilityHooks() capabilityHooks {
|
||||
return capabilityHooks{
|
||||
addStartPrereqs: d.addCapabilityStartPrereqs,
|
||||
contributeGuest: d.contributeGuestConfig,
|
||||
contributeMachine: d.contributeMachineConfig,
|
||||
prepareHosts: d.prepareCapabilityHosts,
|
||||
postStart: d.postStartCapabilities,
|
||||
cleanupState: d.cleanupCapabilityState,
|
||||
applyConfigChanges: d.applyCapabilityConfigChanges,
|
||||
}
|
||||
}
|
||||
|
||||
// FindVM resolves an ID-or-name against the store with the historical
|
||||
// precedence: exact-ID / exact-name first, then unambiguous prefix
|
||||
// match. Returns an error when no match is found or when a prefix
|
||||
// matches more than one record.
|
||||
func (s *VMService) FindVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
|
||||
if idOrName == "" {
|
||||
return model.VMRecord{}, errors.New("vm id or name is required")
|
||||
}
|
||||
if vm, err := s.store.GetVM(ctx, idOrName); err == nil {
|
||||
return vm, nil
|
||||
}
|
||||
vms, err := s.store.ListVMs(ctx)
|
||||
if err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
matchCount := 0
|
||||
var match model.VMRecord
|
||||
for _, vm := range vms {
|
||||
if strings.HasPrefix(vm.ID, idOrName) || strings.HasPrefix(vm.Name, idOrName) {
|
||||
match = vm
|
||||
matchCount++
|
||||
}
|
||||
}
|
||||
if matchCount == 1 {
|
||||
return match, nil
|
||||
}
|
||||
if matchCount > 1 {
|
||||
return model.VMRecord{}, fmt.Errorf("multiple VMs match %q", idOrName)
|
||||
}
|
||||
return model.VMRecord{}, fmt.Errorf("vm %q not found", idOrName)
|
||||
}
|
||||
|
||||
// TouchVM bumps a VM's updated-at timestamp under the per-VM lock.
|
||||
func (s *VMService) TouchVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
|
||||
return s.withVMLockByRef(ctx, idOrName, func(vm model.VMRecord) (model.VMRecord, error) {
|
||||
system.TouchNow(&vm)
|
||||
if err := s.store.UpsertVM(ctx, vm); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
return vm, nil
|
||||
})
|
||||
}
|
||||
|
||||
// withVMLockByRef resolves idOrName then serialises fn under the
|
||||
// per-VM lock. Every mutating VM operation funnels through here.
|
||||
func (s *VMService) withVMLockByRef(ctx context.Context, idOrName string, fn func(model.VMRecord) (model.VMRecord, error)) (model.VMRecord, error) {
|
||||
vm, err := s.FindVM(ctx, idOrName)
|
||||
if err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
return s.withVMLockByID(ctx, vm.ID, fn)
|
||||
}
|
||||
|
||||
// withVMLockByID locks on the stable VM ID (so a rename mid-flight
|
||||
// doesn't drop the lock) and re-reads the record under the lock so
|
||||
// fn sees the committed state.
|
||||
func (s *VMService) withVMLockByID(ctx context.Context, id string, fn func(model.VMRecord) (model.VMRecord, error)) (model.VMRecord, error) {
|
||||
if strings.TrimSpace(id) == "" {
|
||||
return model.VMRecord{}, errors.New("vm id is required")
|
||||
}
|
||||
unlock := s.lockVMID(id)
|
||||
defer unlock()
|
||||
|
||||
vm, err := s.store.GetVMByID(ctx, id)
|
||||
if err != nil {
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return model.VMRecord{}, fmt.Errorf("vm %q not found", id)
|
||||
}
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
return fn(vm)
|
||||
}
|
||||
|
||||
// withVMLockByIDErr is the error-only variant of withVMLockByID for
|
||||
// callers that don't need the returned record.
|
||||
func (s *VMService) withVMLockByIDErr(ctx context.Context, id string, fn func(model.VMRecord) error) error {
|
||||
_, err := s.withVMLockByID(ctx, id, func(vm model.VMRecord) (model.VMRecord, error) {
|
||||
if err := fn(vm); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
return vm, nil
|
||||
})
|
||||
return err
|
||||
}
|
||||
|
||||
// lockVMID exposes the per-VM mutex for callers that need to hold it
|
||||
// outside the usual withVMLockByRef/withVMLockByID helpers
|
||||
// (workspace prepare, for example).
|
||||
func (s *VMService) lockVMID(id string) func() {
|
||||
return s.vmLocks.lock(id)
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue