daemon split (4/5): extract *VMService service
Phase 4 of the daemon god-struct refactor. VM lifecycle, create-op
registry, handle cache, disk provisioning, stats polling, ports
query, and the per-VM lock set all move off *Daemon onto *VMService.
Daemon keeps thin forwarders only for FindVM / TouchVM (dispatch
surface) and is otherwise out of VM lifecycle. Lazy-init via
d.vmSvc() mirrors the earlier services so test literals like
\`&Daemon{store: db, runner: r}\` still get a functional service
without spelling one out.
Three small cleanups along the way:
* preflight helpers (validateStartPrereqs / addBaseStartPrereqs
/ addBaseStartCommandPrereqs / validateWorkDiskResizePrereqs)
move with the VM methods that call them.
* cleanupRuntime / rebuildDNS move to *VMService, with
HostNetwork primitives (findFirecrackerPID, cleanupDMSnapshot,
killVMProcess, releaseTap, waitForExit, sendCtrlAltDel)
reached through s.net instead of the hostNet() facade.
* vsockAgentBinary becomes a package-level function so both
*Daemon (doctor) and *VMService (preflight) call one entry
point instead of each owning a forwarder method.
WorkspaceService's peer deps switch from eager method values to
closures — vmSvc() constructs VMService with WorkspaceService as a
peer, so resolving d.vmSvc().FindVM at construction time recursed
through workspaceSvc() → vmSvc(). Closures defer the lookup to call
time.
Pure code motion: build + unit tests green, lint clean. No RPC
surface or lock-ordering changes.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
c0d456e734
commit
466a7c30c4
23 changed files with 655 additions and 463 deletions
|
|
@ -3,21 +3,18 @@ package daemon
|
|||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"banger/internal/api"
|
||||
"banger/internal/buildinfo"
|
||||
"banger/internal/config"
|
||||
"banger/internal/daemon/opstate"
|
||||
"banger/internal/model"
|
||||
"banger/internal/paths"
|
||||
"banger/internal/rpc"
|
||||
|
|
@ -26,31 +23,23 @@ import (
|
|||
"banger/internal/vmdns"
|
||||
)
|
||||
|
||||
// Daemon is the composition root: shared infrastructure (store,
|
||||
// runner, logger, layout, config) plus pointers to the four focused
|
||||
// services that own behavior. Open wires the services; the dispatch
|
||||
// loop forwards RPCs to them. No lifecycle / image / workspace /
|
||||
// networking behavior lives on *Daemon itself — it's wiring.
|
||||
type Daemon struct {
|
||||
layout paths.Layout
|
||||
config model.DaemonConfig
|
||||
store *store.Store
|
||||
runner system.CommandRunner
|
||||
logger *slog.Logger
|
||||
createVMMu sync.Mutex
|
||||
createOps opstate.Registry[*vmCreateOperationState]
|
||||
vmLocks vmLockSet
|
||||
// workspaceLocks serialises workspace.prepare / workspace.export
|
||||
// calls on the same VM (two concurrent prepares would clobber each
|
||||
// other's tar streams). It is a SEPARATE scope from vmLocks so
|
||||
// slow guest I/O — SSH dial, tar upload, chmod — does not block
|
||||
// vm stop/delete/restart. See ARCHITECTURE.md.
|
||||
workspaceLocks vmLockSet
|
||||
// handles caches per-VM transient kernel/process handles (PID,
|
||||
// tap device, loop devices, DM name/device). Populated at vm
|
||||
// start and at daemon startup reconcile; cleared on stop/delete.
|
||||
// See internal/daemon/vm_handles.go — persistent durable state
|
||||
// lives in the store, this is rebuildable from a per-VM
|
||||
// handles.json scratch file and OS inspection.
|
||||
handles *handleCache
|
||||
net *HostNetwork
|
||||
img *ImageService
|
||||
ws *WorkspaceService
|
||||
layout paths.Layout
|
||||
config model.DaemonConfig
|
||||
store *store.Store
|
||||
runner system.CommandRunner
|
||||
logger *slog.Logger
|
||||
|
||||
net *HostNetwork
|
||||
img *ImageService
|
||||
ws *WorkspaceService
|
||||
vm *VMService
|
||||
|
||||
closing chan struct{}
|
||||
once sync.Once
|
||||
pid int
|
||||
|
|
@ -92,7 +81,6 @@ func Open(ctx context.Context) (d *Daemon, err error) {
|
|||
logger: logger,
|
||||
closing: closing,
|
||||
pid: os.Getpid(),
|
||||
handles: newHandleCache(),
|
||||
net: newHostNetwork(hostNetworkDeps{
|
||||
runner: runner,
|
||||
logger: logger,
|
||||
|
|
@ -134,7 +122,7 @@ func Open(ctx context.Context) (d *Daemon, err error) {
|
|||
}
|
||||
used := make([]string, 0, len(vms))
|
||||
for _, vm := range vms {
|
||||
if tap := d.vmHandles(vm.ID).TapDevice; tap != "" {
|
||||
if tap := d.vmSvc().vmHandles(vm.ID).TapDevice; tap != "" {
|
||||
used = append(used, tap)
|
||||
}
|
||||
}
|
||||
|
|
@ -294,28 +282,28 @@ func (d *Daemon) dispatch(ctx context.Context, req rpc.Request) rpc.Response {
|
|||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
vm, err := d.CreateVM(ctx, params)
|
||||
vm, err := d.vmSvc().CreateVM(ctx, params)
|
||||
return marshalResultOrError(api.VMShowResult{VM: vm}, err)
|
||||
case "vm.create.begin":
|
||||
params, err := rpc.DecodeParams[api.VMCreateParams](req)
|
||||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
op, err := d.BeginVMCreate(ctx, params)
|
||||
op, err := d.vmSvc().BeginVMCreate(ctx, params)
|
||||
return marshalResultOrError(api.VMCreateBeginResult{Operation: op}, err)
|
||||
case "vm.create.status":
|
||||
params, err := rpc.DecodeParams[api.VMCreateStatusParams](req)
|
||||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
op, err := d.VMCreateStatus(ctx, params.ID)
|
||||
op, err := d.vmSvc().VMCreateStatus(ctx, params.ID)
|
||||
return marshalResultOrError(api.VMCreateStatusResult{Operation: op}, err)
|
||||
case "vm.create.cancel":
|
||||
params, err := rpc.DecodeParams[api.VMCreateStatusParams](req)
|
||||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
err = d.CancelVMCreate(ctx, params.ID)
|
||||
err = d.vmSvc().CancelVMCreate(ctx, params.ID)
|
||||
return marshalResultOrError(api.Empty{}, err)
|
||||
case "vm.list":
|
||||
vms, err := d.store.ListVMs(ctx)
|
||||
|
|
@ -325,63 +313,63 @@ func (d *Daemon) dispatch(ctx context.Context, req rpc.Request) rpc.Response {
|
|||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
vm, err := d.FindVM(ctx, params.IDOrName)
|
||||
vm, err := d.vmSvc().FindVM(ctx, params.IDOrName)
|
||||
return marshalResultOrError(api.VMShowResult{VM: vm}, err)
|
||||
case "vm.start":
|
||||
params, err := rpc.DecodeParams[api.VMRefParams](req)
|
||||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
vm, err := d.StartVM(ctx, params.IDOrName)
|
||||
vm, err := d.vmSvc().StartVM(ctx, params.IDOrName)
|
||||
return marshalResultOrError(api.VMShowResult{VM: vm}, err)
|
||||
case "vm.stop":
|
||||
params, err := rpc.DecodeParams[api.VMRefParams](req)
|
||||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
vm, err := d.StopVM(ctx, params.IDOrName)
|
||||
vm, err := d.vmSvc().StopVM(ctx, params.IDOrName)
|
||||
return marshalResultOrError(api.VMShowResult{VM: vm}, err)
|
||||
case "vm.kill":
|
||||
params, err := rpc.DecodeParams[api.VMKillParams](req)
|
||||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
vm, err := d.KillVM(ctx, params)
|
||||
vm, err := d.vmSvc().KillVM(ctx, params)
|
||||
return marshalResultOrError(api.VMShowResult{VM: vm}, err)
|
||||
case "vm.restart":
|
||||
params, err := rpc.DecodeParams[api.VMRefParams](req)
|
||||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
vm, err := d.RestartVM(ctx, params.IDOrName)
|
||||
vm, err := d.vmSvc().RestartVM(ctx, params.IDOrName)
|
||||
return marshalResultOrError(api.VMShowResult{VM: vm}, err)
|
||||
case "vm.delete":
|
||||
params, err := rpc.DecodeParams[api.VMRefParams](req)
|
||||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
vm, err := d.DeleteVM(ctx, params.IDOrName)
|
||||
vm, err := d.vmSvc().DeleteVM(ctx, params.IDOrName)
|
||||
return marshalResultOrError(api.VMShowResult{VM: vm}, err)
|
||||
case "vm.set":
|
||||
params, err := rpc.DecodeParams[api.VMSetParams](req)
|
||||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
vm, err := d.SetVM(ctx, params)
|
||||
vm, err := d.vmSvc().SetVM(ctx, params)
|
||||
return marshalResultOrError(api.VMShowResult{VM: vm}, err)
|
||||
case "vm.stats":
|
||||
params, err := rpc.DecodeParams[api.VMRefParams](req)
|
||||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
vm, stats, err := d.GetVMStats(ctx, params.IDOrName)
|
||||
vm, stats, err := d.vmSvc().GetVMStats(ctx, params.IDOrName)
|
||||
return marshalResultOrError(api.VMStatsResult{VM: vm, Stats: stats}, err)
|
||||
case "vm.logs":
|
||||
params, err := rpc.DecodeParams[api.VMRefParams](req)
|
||||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
vm, err := d.FindVM(ctx, params.IDOrName)
|
||||
vm, err := d.vmSvc().FindVM(ctx, params.IDOrName)
|
||||
if err != nil {
|
||||
return rpc.NewError("not_found", err.Error())
|
||||
}
|
||||
|
|
@ -391,11 +379,11 @@ func (d *Daemon) dispatch(ctx context.Context, req rpc.Request) rpc.Response {
|
|||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
vm, err := d.TouchVM(ctx, params.IDOrName)
|
||||
vm, err := d.vmSvc().TouchVM(ctx, params.IDOrName)
|
||||
if err != nil {
|
||||
return rpc.NewError("not_found", err.Error())
|
||||
}
|
||||
if !d.vmAlive(vm) {
|
||||
if !d.vmSvc().vmAlive(vm) {
|
||||
return rpc.NewError("not_running", fmt.Sprintf("vm %s is not running", vm.Name))
|
||||
}
|
||||
return marshalResultOrError(api.VMSSHResult{Name: vm.Name, GuestIP: vm.Runtime.GuestIP}, nil)
|
||||
|
|
@ -404,21 +392,21 @@ func (d *Daemon) dispatch(ctx context.Context, req rpc.Request) rpc.Response {
|
|||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
result, err := d.HealthVM(ctx, params.IDOrName)
|
||||
result, err := d.vmSvc().HealthVM(ctx, params.IDOrName)
|
||||
return marshalResultOrError(result, err)
|
||||
case "vm.ping":
|
||||
params, err := rpc.DecodeParams[api.VMRefParams](req)
|
||||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
result, err := d.PingVM(ctx, params.IDOrName)
|
||||
result, err := d.vmSvc().PingVM(ctx, params.IDOrName)
|
||||
return marshalResultOrError(result, err)
|
||||
case "vm.ports":
|
||||
params, err := rpc.DecodeParams[api.VMRefParams](req)
|
||||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
result, err := d.PortsVM(ctx, params.IDOrName)
|
||||
result, err := d.vmSvc().PortsVM(ctx, params.IDOrName)
|
||||
return marshalResultOrError(result, err)
|
||||
case "vm.workspace.prepare":
|
||||
params, err := rpc.DecodeParams[api.VMWorkspacePrepareParams](req)
|
||||
|
|
@ -519,14 +507,14 @@ func (d *Daemon) backgroundLoop() {
|
|||
case <-d.closing:
|
||||
return
|
||||
case <-statsTicker.C:
|
||||
if err := d.pollStats(context.Background()); err != nil && d.logger != nil {
|
||||
if err := d.vmSvc().pollStats(context.Background()); err != nil && d.logger != nil {
|
||||
d.logger.Error("background stats poll failed", "error", err.Error())
|
||||
}
|
||||
case <-staleTicker.C:
|
||||
if err := d.stopStaleVMs(context.Background()); err != nil && d.logger != nil {
|
||||
if err := d.vmSvc().stopStaleVMs(context.Background()); err != nil && d.logger != nil {
|
||||
d.logger.Error("background stale sweep failed", "error", err.Error())
|
||||
}
|
||||
d.pruneVMCreateOperations(time.Now().Add(-10 * time.Minute))
|
||||
d.vmSvc().pruneVMCreateOperations(time.Now().Add(-10 * time.Minute))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -543,18 +531,18 @@ func (d *Daemon) reconcile(ctx context.Context) error {
|
|||
return op.fail(err)
|
||||
}
|
||||
for _, vm := range vms {
|
||||
if err := d.withVMLockByIDErr(ctx, vm.ID, func(vm model.VMRecord) error {
|
||||
if err := d.vmSvc().withVMLockByIDErr(ctx, vm.ID, func(vm model.VMRecord) error {
|
||||
if vm.State != model.VMStateRunning {
|
||||
// Belt-and-braces: a stopped VM should never have a
|
||||
// scratch file or a cache entry. Clean up anything
|
||||
// left by an ungraceful previous daemon crash.
|
||||
d.clearVMHandles(vm)
|
||||
d.vmSvc().clearVMHandles(vm)
|
||||
return nil
|
||||
}
|
||||
// Rebuild the in-memory handle cache by loading the per-VM
|
||||
// scratch file and verifying the firecracker process is
|
||||
// still alive.
|
||||
h, alive, err := d.rediscoverHandles(ctx, vm)
|
||||
h, alive, err := d.vmSvc().rediscoverHandles(ctx, vm)
|
||||
if err != nil && d.logger != nil {
|
||||
d.logger.Warn("rediscover handles failed", "vm_id", vm.ID, "error", err.Error())
|
||||
}
|
||||
|
|
@ -562,54 +550,33 @@ func (d *Daemon) reconcile(ctx context.Context) error {
|
|||
// claimed. If alive, subsequent vmAlive() calls pass; if
|
||||
// not, cleanupRuntime needs these handles to know which
|
||||
// kernel resources (DM / loops / tap) to tear down.
|
||||
d.setVMHandlesInMemory(vm.ID, h)
|
||||
d.vmSvc().setVMHandlesInMemory(vm.ID, h)
|
||||
if alive {
|
||||
return nil
|
||||
}
|
||||
op.stage("stale_vm", vmLogAttrs(vm)...)
|
||||
_ = d.cleanupRuntime(ctx, vm, true)
|
||||
_ = d.vmSvc().cleanupRuntime(ctx, vm, true)
|
||||
vm.State = model.VMStateStopped
|
||||
vm.Runtime.State = model.VMStateStopped
|
||||
d.clearVMHandles(vm)
|
||||
d.vmSvc().clearVMHandles(vm)
|
||||
vm.UpdatedAt = model.Now()
|
||||
return d.store.UpsertVM(ctx, vm)
|
||||
}); err != nil {
|
||||
return op.fail(err, "vm_id", vm.ID)
|
||||
}
|
||||
}
|
||||
if err := d.rebuildDNS(ctx); err != nil {
|
||||
if err := d.vmSvc().rebuildDNS(ctx); err != nil {
|
||||
return op.fail(err)
|
||||
}
|
||||
op.done()
|
||||
return nil
|
||||
}
|
||||
|
||||
// FindVM stays on Daemon as a thin forwarder to the VM service lookup.
|
||||
// Dispatch code reads the facade directly; tests that pre-date the
|
||||
// service split keep compiling.
|
||||
func (d *Daemon) FindVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
|
||||
if idOrName == "" {
|
||||
return model.VMRecord{}, errors.New("vm id or name is required")
|
||||
}
|
||||
if vm, err := d.store.GetVM(ctx, idOrName); err == nil {
|
||||
return vm, nil
|
||||
}
|
||||
vms, err := d.store.ListVMs(ctx)
|
||||
if err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
matchCount := 0
|
||||
var match model.VMRecord
|
||||
for _, vm := range vms {
|
||||
if strings.HasPrefix(vm.ID, idOrName) || strings.HasPrefix(vm.Name, idOrName) {
|
||||
match = vm
|
||||
matchCount++
|
||||
}
|
||||
}
|
||||
if matchCount == 1 {
|
||||
return match, nil
|
||||
}
|
||||
if matchCount > 1 {
|
||||
return model.VMRecord{}, fmt.Errorf("multiple VMs match %q", idOrName)
|
||||
}
|
||||
return model.VMRecord{}, fmt.Errorf("vm %q not found", idOrName)
|
||||
return d.vmSvc().FindVM(ctx, idOrName)
|
||||
}
|
||||
|
||||
// FindImage stays on Daemon as a thin forwarder to the image service
|
||||
|
|
@ -620,52 +587,7 @@ func (d *Daemon) FindImage(ctx context.Context, idOrName string) (model.Image, e
|
|||
}
|
||||
|
||||
func (d *Daemon) TouchVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
|
||||
return d.withVMLockByRef(ctx, idOrName, func(vm model.VMRecord) (model.VMRecord, error) {
|
||||
system.TouchNow(&vm)
|
||||
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
return vm, nil
|
||||
})
|
||||
}
|
||||
|
||||
func (d *Daemon) withVMLockByRef(ctx context.Context, idOrName string, fn func(model.VMRecord) (model.VMRecord, error)) (model.VMRecord, error) {
|
||||
vm, err := d.FindVM(ctx, idOrName)
|
||||
if err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
return d.withVMLockByID(ctx, vm.ID, fn)
|
||||
}
|
||||
|
||||
func (d *Daemon) withVMLockByID(ctx context.Context, id string, fn func(model.VMRecord) (model.VMRecord, error)) (model.VMRecord, error) {
|
||||
if strings.TrimSpace(id) == "" {
|
||||
return model.VMRecord{}, errors.New("vm id is required")
|
||||
}
|
||||
unlock := d.lockVMID(id)
|
||||
defer unlock()
|
||||
|
||||
vm, err := d.store.GetVMByID(ctx, id)
|
||||
if err != nil {
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return model.VMRecord{}, fmt.Errorf("vm %q not found", id)
|
||||
}
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
return fn(vm)
|
||||
}
|
||||
|
||||
func (d *Daemon) withVMLockByIDErr(ctx context.Context, id string, fn func(model.VMRecord) error) error {
|
||||
_, err := d.withVMLockByID(ctx, id, func(vm model.VMRecord) (model.VMRecord, error) {
|
||||
if err := fn(vm); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
return vm, nil
|
||||
})
|
||||
return err
|
||||
}
|
||||
|
||||
func (d *Daemon) lockVMID(id string) func() {
|
||||
return d.vmLocks.lock(id)
|
||||
return d.vmSvc().TouchVM(ctx, idOrName)
|
||||
}
|
||||
|
||||
func marshalResultOrError(v any, err error) rpc.Response {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue