Move the supported systemd path to two services: an owner-user bangerd for orchestration and a narrow root helper for bridge/tap, NAT/resolver, dm/loop, and Firecracker ownership. This removes repeated sudo from daily vm and image flows without leaving the general daemon running as root. Add install metadata, system install/status/restart/uninstall commands, and a system-owned runtime layout. Keep user SSH/config material in the owner home, lock file_sync to the owner home, and move daemon known_hosts handling out of the old root-owned control path. Route privileged lifecycle steps through typed privilegedOps calls, harden the two systemd units, and rewrite smoke plus docs around the supported service model. Verified with make build, make test, make lint, and make smoke on the supported systemd host path.
239 lines
8.4 KiB
Go
239 lines
8.4 KiB
Go
package daemon
|
|
|
|
import (
|
|
"context"
|
|
"database/sql"
|
|
"errors"
|
|
"fmt"
|
|
"log/slog"
|
|
"strings"
|
|
"sync"
|
|
|
|
"banger/internal/daemon/opstate"
|
|
"banger/internal/firecracker"
|
|
"banger/internal/guestconfig"
|
|
"banger/internal/model"
|
|
"banger/internal/paths"
|
|
"banger/internal/store"
|
|
"banger/internal/system"
|
|
)
|
|
|
|
// VMService owns VM lifecycle — create / start / stop / restart /
|
|
// kill / delete / set — plus the handle cache, create-operation
|
|
// registry, stats polling, disk provisioning, ports query, and the
|
|
// SSH-client test seams.
|
|
//
|
|
// It holds pointers to its peer services (HostNetwork, ImageService,
|
|
// WorkspaceService) because VM lifecycle really does orchestrate
|
|
// across them (start needs bridge + tap + firecracker + auth sync +
|
|
// boot). Defining narrow function-typed interfaces for every peer
|
|
// method VMService calls would balloon the diff for no real win —
|
|
// services remain unexported within the package so nothing outside
|
|
// the daemon can see them.
|
|
//
|
|
// Capability dispatch goes through the capHooks seam rather than a
|
|
// *Daemon pointer, so VMService has no path back to the composition
|
|
// root. Daemon.buildCapabilityHooks() populates the seam at wiring
|
|
// time with the registered-capabilities loops from capabilities.go.
|
|
type VMService struct {
|
|
runner system.CommandRunner
|
|
logger *slog.Logger
|
|
config model.DaemonConfig
|
|
layout paths.Layout
|
|
store *store.Store
|
|
|
|
// vmLocks is the per-VM mutex set. Held across entire lifecycle
|
|
// ops (start, stop, delete, set) — not just the validation window.
|
|
// Workspace.prepare intentionally splits off onto its own lock
|
|
// scope; see WorkspaceService.
|
|
vmLocks vmLockSet
|
|
createVMMu sync.Mutex
|
|
createOps opstate.Registry[*vmCreateOperationState]
|
|
|
|
// handles caches per-VM transient kernel/process state (PID, tap,
|
|
// loop devices, DM name/device). Rebuildable at daemon startup
|
|
// from a per-VM handles.json scratch file plus OS inspection.
|
|
handles *handleCache
|
|
|
|
// Peer services. VMService orchestrates across all three during
|
|
// start/stop/delete; pointer fields keep call sites direct without
|
|
// promoting the peer API to package-level interfaces.
|
|
net *HostNetwork
|
|
img *ImageService
|
|
ws *WorkspaceService
|
|
priv privilegedOps
|
|
|
|
// vsockHostDevice is the path preflight + doctor expect to find for
|
|
// the vhost-vsock device. Defaults to defaultVsockHostDevice; tests
|
|
// point at a tempfile so RequireFile passes without needing the
|
|
// real kernel module loaded.
|
|
vsockHostDevice string
|
|
|
|
// Capability hook dispatch. VMService invokes capabilities via
|
|
// these seams, populated by Daemon.buildCapabilityHooks() at
|
|
// wiring time. Capability implementations themselves are
|
|
// structs with explicit service-pointer fields (see capabilities.go);
|
|
// VMService never reaches back to *Daemon.
|
|
capHooks capabilityHooks
|
|
|
|
beginOperation func(name string, attrs ...any) *operationLog
|
|
}
|
|
|
|
// capabilityHooks bundles the capability-dispatch entry points that
|
|
// VMService needs. Populated by Daemon.buildCapabilityHooks() at
|
|
// service construction; stubbable in tests that don't care about
|
|
// capability side effects.
|
|
type capabilityHooks struct {
|
|
addStartPrereqs func(ctx context.Context, checks *system.Preflight, vm model.VMRecord, image model.Image)
|
|
contributeGuest func(builder *guestconfig.Builder, vm model.VMRecord, image model.Image)
|
|
contributeMachine func(cfg *firecracker.MachineConfig, vm model.VMRecord, image model.Image)
|
|
prepareHosts func(ctx context.Context, vm *model.VMRecord, image model.Image) error
|
|
postStart func(ctx context.Context, vm model.VMRecord, image model.Image) error
|
|
cleanupState func(ctx context.Context, vm model.VMRecord) error
|
|
applyConfigChanges func(ctx context.Context, before, after model.VMRecord) error
|
|
}
|
|
|
|
type vmServiceDeps struct {
|
|
runner system.CommandRunner
|
|
logger *slog.Logger
|
|
config model.DaemonConfig
|
|
layout paths.Layout
|
|
store *store.Store
|
|
net *HostNetwork
|
|
img *ImageService
|
|
ws *WorkspaceService
|
|
priv privilegedOps
|
|
capHooks capabilityHooks
|
|
beginOperation func(name string, attrs ...any) *operationLog
|
|
vsockHostDevice string
|
|
}
|
|
|
|
func newVMService(deps vmServiceDeps) *VMService {
|
|
vsockPath := deps.vsockHostDevice
|
|
if vsockPath == "" {
|
|
vsockPath = defaultVsockHostDevice
|
|
}
|
|
return &VMService{
|
|
runner: deps.runner,
|
|
logger: deps.logger,
|
|
config: deps.config,
|
|
layout: deps.layout,
|
|
store: deps.store,
|
|
net: deps.net,
|
|
img: deps.img,
|
|
ws: deps.ws,
|
|
priv: deps.priv,
|
|
capHooks: deps.capHooks,
|
|
beginOperation: deps.beginOperation,
|
|
vsockHostDevice: vsockPath,
|
|
handles: newHandleCache(),
|
|
}
|
|
}
|
|
|
|
// buildCapabilityHooks adapts Daemon's existing capability-dispatch
|
|
// methods into the capabilityHooks bag VMService takes. Keeps the
|
|
// registry + capability types on *Daemon while letting VMService call
|
|
// into them through explicit function seams.
|
|
func (d *Daemon) buildCapabilityHooks() capabilityHooks {
|
|
return capabilityHooks{
|
|
addStartPrereqs: d.addCapabilityStartPrereqs,
|
|
contributeGuest: d.contributeGuestConfig,
|
|
contributeMachine: d.contributeMachineConfig,
|
|
prepareHosts: d.prepareCapabilityHosts,
|
|
postStart: d.postStartCapabilities,
|
|
cleanupState: d.cleanupCapabilityState,
|
|
applyConfigChanges: d.applyCapabilityConfigChanges,
|
|
}
|
|
}
|
|
|
|
// FindVM resolves an ID-or-name against the store with the historical
|
|
// precedence: exact-ID / exact-name first, then unambiguous prefix
|
|
// match. Returns an error when no match is found or when a prefix
|
|
// matches more than one record.
|
|
func (s *VMService) FindVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
|
|
if idOrName == "" {
|
|
return model.VMRecord{}, errors.New("vm id or name is required")
|
|
}
|
|
if vm, err := s.store.GetVM(ctx, idOrName); err == nil {
|
|
return vm, nil
|
|
}
|
|
vms, err := s.store.ListVMs(ctx)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
matchCount := 0
|
|
var match model.VMRecord
|
|
for _, vm := range vms {
|
|
if strings.HasPrefix(vm.ID, idOrName) || strings.HasPrefix(vm.Name, idOrName) {
|
|
match = vm
|
|
matchCount++
|
|
}
|
|
}
|
|
if matchCount == 1 {
|
|
return match, nil
|
|
}
|
|
if matchCount > 1 {
|
|
return model.VMRecord{}, fmt.Errorf("multiple VMs match %q", idOrName)
|
|
}
|
|
return model.VMRecord{}, fmt.Errorf("vm %q not found", idOrName)
|
|
}
|
|
|
|
// TouchVM bumps a VM's updated-at timestamp under the per-VM lock.
|
|
func (s *VMService) TouchVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
|
|
return s.withVMLockByRef(ctx, idOrName, func(vm model.VMRecord) (model.VMRecord, error) {
|
|
system.TouchNow(&vm)
|
|
if err := s.store.UpsertVM(ctx, vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
return vm, nil
|
|
})
|
|
}
|
|
|
|
// withVMLockByRef resolves idOrName then serialises fn under the
|
|
// per-VM lock. Every mutating VM operation funnels through here.
|
|
func (s *VMService) withVMLockByRef(ctx context.Context, idOrName string, fn func(model.VMRecord) (model.VMRecord, error)) (model.VMRecord, error) {
|
|
vm, err := s.FindVM(ctx, idOrName)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
return s.withVMLockByID(ctx, vm.ID, fn)
|
|
}
|
|
|
|
// withVMLockByID locks on the stable VM ID (so a rename mid-flight
|
|
// doesn't drop the lock) and re-reads the record under the lock so
|
|
// fn sees the committed state.
|
|
func (s *VMService) withVMLockByID(ctx context.Context, id string, fn func(model.VMRecord) (model.VMRecord, error)) (model.VMRecord, error) {
|
|
if strings.TrimSpace(id) == "" {
|
|
return model.VMRecord{}, errors.New("vm id is required")
|
|
}
|
|
unlock := s.lockVMID(id)
|
|
defer unlock()
|
|
|
|
vm, err := s.store.GetVMByID(ctx, id)
|
|
if err != nil {
|
|
if errors.Is(err, sql.ErrNoRows) {
|
|
return model.VMRecord{}, fmt.Errorf("vm %q not found", id)
|
|
}
|
|
return model.VMRecord{}, err
|
|
}
|
|
return fn(vm)
|
|
}
|
|
|
|
// withVMLockByIDErr is the error-only variant of withVMLockByID for
|
|
// callers that don't need the returned record.
|
|
func (s *VMService) withVMLockByIDErr(ctx context.Context, id string, fn func(model.VMRecord) error) error {
|
|
_, err := s.withVMLockByID(ctx, id, func(vm model.VMRecord) (model.VMRecord, error) {
|
|
if err := fn(vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
return vm, nil
|
|
})
|
|
return err
|
|
}
|
|
|
|
// lockVMID exposes the per-VM mutex for callers that need to hold it
|
|
// outside the usual withVMLockByRef/withVMLockByID helpers
|
|
// (workspace prepare, for example).
|
|
func (s *VMService) lockVMID(id string) func() {
|
|
return s.vmLocks.lock(id)
|
|
}
|