banger/internal/daemon/vm.go
Thales Maciel 4930d82cb9
Refactor VM lifecycle around capabilities
Make host-integrated VM features fit a standard Go extension path instead of adding more one-off branches through vm.go. This is the enabling refactor for future work like shared mounts, not the /work feature itself.

Add a daemon capability pipeline plus a structured guest-config builder, then move the existing /root work-disk mount, built-in DNS, and NAT wiring onto those hooks. Generalize Firecracker drive config at the same time so later storage features can extend machine setup without another hardcoded path.

Add banger doctor on top of the shared readiness checks, update the docs to describe the new architecture, and cover the new seams with guest-config, capability, report, CLI, and full go test verification. Also verify make build and a real ./banger doctor run on the host.
2026-03-18 19:28:26 -03:00

995 lines
30 KiB
Go

package daemon
import (
"context"
"errors"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"time"
"banger/internal/api"
"banger/internal/firecracker"
"banger/internal/guestconfig"
"banger/internal/model"
"banger/internal/paths"
"banger/internal/system"
"banger/internal/vmdns"
)
var (
errWaitForExitTimeout = errors.New("timed out waiting for VM to exit")
gracefulShutdownWait = 10 * time.Second
)
func (d *Daemon) CreateVM(ctx context.Context, params api.VMCreateParams) (vm model.VMRecord, err error) {
d.mu.Lock()
defer d.mu.Unlock()
op := d.beginOperation("vm.create")
defer func() {
if err != nil {
op.fail(err)
return
}
op.done(vmLogAttrs(vm)...)
}()
if err := validateOptionalPositiveSetting("vcpu", params.VCPUCount); err != nil {
return model.VMRecord{}, err
}
if err := validateOptionalPositiveSetting("memory", params.MemoryMiB); err != nil {
return model.VMRecord{}, err
}
imageName := params.ImageName
if imageName == "" {
imageName = d.config.DefaultImageName
}
image, err := d.FindImage(ctx, imageName)
if err != nil {
return model.VMRecord{}, err
}
op.stage("image_resolved", imageLogAttrs(image)...)
name := strings.TrimSpace(params.Name)
if name == "" {
name, err = d.generateName(ctx)
if err != nil {
return model.VMRecord{}, err
}
}
if _, err := d.FindVM(ctx, name); err == nil {
return model.VMRecord{}, fmt.Errorf("vm name already exists: %s", name)
}
id, err := model.NewID()
if err != nil {
return model.VMRecord{}, err
}
unlockVM := d.lockVMID(id)
defer unlockVM()
guestIP, err := d.store.NextGuestIP(ctx, bridgePrefix(d.config.BridgeIP))
if err != nil {
return model.VMRecord{}, err
}
vmDir := filepath.Join(d.layout.VMsDir, id)
if err := os.MkdirAll(vmDir, 0o755); err != nil {
return model.VMRecord{}, err
}
systemOverlaySize := int64(model.DefaultSystemOverlaySize)
if params.SystemOverlaySize != "" {
systemOverlaySize, err = model.ParseSize(params.SystemOverlaySize)
if err != nil {
return model.VMRecord{}, err
}
}
workDiskSize := int64(model.DefaultWorkDiskSize)
if params.WorkDiskSize != "" {
workDiskSize, err = model.ParseSize(params.WorkDiskSize)
if err != nil {
return model.VMRecord{}, err
}
}
now := model.Now()
spec := model.VMSpec{
VCPUCount: optionalIntOrDefault(params.VCPUCount, model.DefaultVCPUCount),
MemoryMiB: optionalIntOrDefault(params.MemoryMiB, model.DefaultMemoryMiB),
SystemOverlaySizeByte: systemOverlaySize,
WorkDiskSizeBytes: workDiskSize,
NATEnabled: params.NATEnabled,
}
vm = model.VMRecord{
ID: id,
Name: name,
ImageID: image.ID,
State: model.VMStateCreated,
CreatedAt: now,
UpdatedAt: now,
LastTouchedAt: now,
Spec: spec,
Runtime: model.VMRuntime{
State: model.VMStateCreated,
GuestIP: guestIP,
DNSName: vmdns.RecordName(name),
VMDir: vmDir,
SystemOverlay: filepath.Join(vmDir, "system.cow"),
WorkDiskPath: filepath.Join(vmDir, "root.ext4"),
LogPath: filepath.Join(vmDir, "firecracker.log"),
MetricsPath: filepath.Join(vmDir, "metrics.json"),
},
}
if err := d.store.UpsertVM(ctx, vm); err != nil {
return model.VMRecord{}, err
}
op.stage("persisted", vmLogAttrs(vm)...)
if params.NoStart {
vm.State = model.VMStateStopped
vm.Runtime.State = model.VMStateStopped
if err := d.store.UpsertVM(ctx, vm); err != nil {
return model.VMRecord{}, err
}
return vm, nil
}
return d.startVMLocked(ctx, vm, image)
}
func (d *Daemon) StartVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
return d.withVMLockByRef(ctx, idOrName, func(vm model.VMRecord) (model.VMRecord, error) {
image, err := d.store.GetImageByID(ctx, vm.ImageID)
if err != nil {
return model.VMRecord{}, err
}
if vm.State == model.VMStateRunning && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
if d.logger != nil {
d.logger.Info("vm already running", vmLogAttrs(vm)...)
}
return vm, nil
}
return d.startVMLocked(ctx, vm, image)
})
}
func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image model.Image) (_ model.VMRecord, err error) {
op := d.beginOperation("vm.start", append(vmLogAttrs(vm), imageLogAttrs(image)...)...)
defer func() {
if err != nil {
err = annotateLogPath(err, vm.Runtime.LogPath)
op.fail(err, vmLogAttrs(vm)...)
return
}
op.done(vmLogAttrs(vm)...)
}()
op.stage("preflight")
if err := d.validateStartPrereqs(ctx, vm, image); err != nil {
return model.VMRecord{}, err
}
if err := os.MkdirAll(vm.Runtime.VMDir, 0o755); err != nil {
return model.VMRecord{}, err
}
op.stage("cleanup_runtime")
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
return model.VMRecord{}, err
}
clearRuntimeHandles(&vm)
op.stage("bridge")
if err := d.ensureBridge(ctx); err != nil {
return model.VMRecord{}, err
}
op.stage("socket_dir")
if err := d.ensureSocketDir(); err != nil {
return model.VMRecord{}, err
}
shortID := system.ShortID(vm.ID)
apiSock := filepath.Join(d.layout.RuntimeDir, "fc-"+shortID+".sock")
tap := "tap-fc-" + shortID
dmName := "fc-rootfs-" + shortID
if err := os.RemoveAll(apiSock); err != nil && !os.IsNotExist(err) {
return model.VMRecord{}, err
}
op.stage("system_overlay", "overlay_path", vm.Runtime.SystemOverlay)
if err := d.ensureSystemOverlay(ctx, &vm); err != nil {
return model.VMRecord{}, err
}
op.stage("dm_snapshot", "dm_name", dmName)
handles, err := d.createDMSnapshot(ctx, image.RootfsPath, vm.Runtime.SystemOverlay, dmName)
if err != nil {
return model.VMRecord{}, err
}
vm.Runtime.BaseLoop = handles.BaseLoop
vm.Runtime.COWLoop = handles.COWLoop
vm.Runtime.DMName = handles.DMName
vm.Runtime.DMDev = handles.DMDev
vm.Runtime.APISockPath = apiSock
vm.Runtime.TapDevice = tap
vm.Runtime.State = model.VMStateRunning
vm.State = model.VMStateRunning
vm.Runtime.LastError = ""
cleanupOnErr := func(err error) (model.VMRecord, error) {
vm.State = model.VMStateError
vm.Runtime.State = model.VMStateError
vm.Runtime.LastError = err.Error()
op.stage("cleanup_after_failure", "error", err.Error())
if cleanupErr := d.cleanupRuntime(context.Background(), vm, true); cleanupErr != nil {
err = errors.Join(err, cleanupErr)
}
clearRuntimeHandles(&vm)
_ = d.store.UpsertVM(context.Background(), vm)
return model.VMRecord{}, err
}
op.stage("patch_root_overlay")
if err := d.patchRootOverlay(ctx, vm, image); err != nil {
return cleanupOnErr(err)
}
op.stage("prepare_host_features")
if err := d.prepareCapabilityHosts(ctx, &vm, image); err != nil {
return cleanupOnErr(err)
}
op.stage("tap", "tap_device", tap)
if err := d.createTap(ctx, tap); err != nil {
return cleanupOnErr(err)
}
op.stage("metrics_file", "metrics_path", vm.Runtime.MetricsPath)
if err := os.WriteFile(vm.Runtime.MetricsPath, nil, 0o644); err != nil {
return cleanupOnErr(err)
}
op.stage("firecracker_binary")
fcPath, err := d.firecrackerBinary()
if err != nil {
return cleanupOnErr(err)
}
op.stage("firecracker_launch", "log_path", vm.Runtime.LogPath, "metrics_path", vm.Runtime.MetricsPath)
firecrackerCtx := context.Background()
machineConfig := firecracker.MachineConfig{
BinaryPath: fcPath,
VMID: vm.ID,
SocketPath: apiSock,
LogPath: vm.Runtime.LogPath,
MetricsPath: vm.Runtime.MetricsPath,
KernelImagePath: image.KernelPath,
InitrdPath: image.InitrdPath,
KernelArgs: system.BuildBootArgs(vm.Name, vm.Runtime.GuestIP, d.config.BridgeIP, d.config.DefaultDNS),
Drives: []firecracker.DriveConfig{{
ID: "rootfs",
Path: vm.Runtime.DMDev,
ReadOnly: false,
IsRoot: true,
}},
TapDevice: tap,
VCPUCount: vm.Spec.VCPUCount,
MemoryMiB: vm.Spec.MemoryMiB,
Logger: d.logger,
}
d.contributeMachineConfig(&machineConfig, vm, image)
machine, err := firecracker.NewMachine(firecrackerCtx, machineConfig)
if err != nil {
return cleanupOnErr(err)
}
if err := machine.Start(firecrackerCtx); err != nil {
vm.Runtime.PID = d.resolveFirecrackerPID(firecrackerCtx, machine, apiSock)
return cleanupOnErr(err)
}
vm.Runtime.PID = d.resolveFirecrackerPID(firecrackerCtx, machine, apiSock)
op.debugStage("firecracker_started", "pid", vm.Runtime.PID)
op.stage("socket_access", "api_socket", apiSock)
if err := d.ensureSocketAccess(ctx, apiSock); err != nil {
return cleanupOnErr(err)
}
op.stage("post_start_features")
if err := d.postStartCapabilities(ctx, vm, image); err != nil {
return cleanupOnErr(err)
}
system.TouchNow(&vm)
op.stage("persist")
if err := d.store.UpsertVM(ctx, vm); err != nil {
return cleanupOnErr(err)
}
return vm, nil
}
func (d *Daemon) StopVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
return d.withVMLockByRef(ctx, idOrName, func(vm model.VMRecord) (model.VMRecord, error) {
return d.stopVMLocked(ctx, vm)
})
}
func (d *Daemon) stopVMLocked(ctx context.Context, current model.VMRecord) (vm model.VMRecord, err error) {
vm = current
op := d.beginOperation("vm.stop", "vm_ref", vm.ID)
defer func() {
if err != nil {
op.fail(err, vmLogAttrs(vm)...)
return
}
op.done(vmLogAttrs(vm)...)
}()
if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
op.stage("cleanup_stale_runtime")
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
return model.VMRecord{}, err
}
vm.State = model.VMStateStopped
vm.Runtime.State = model.VMStateStopped
clearRuntimeHandles(&vm)
if err := d.store.UpsertVM(ctx, vm); err != nil {
return model.VMRecord{}, err
}
return vm, nil
}
op.stage("graceful_shutdown")
if err := d.sendCtrlAltDel(ctx, vm); err != nil {
return model.VMRecord{}, err
}
op.stage("wait_for_exit", "pid", vm.Runtime.PID)
if err := d.waitForExit(ctx, vm.Runtime.PID, vm.Runtime.APISockPath, gracefulShutdownWait); err != nil {
if !errors.Is(err, errWaitForExitTimeout) {
return model.VMRecord{}, err
}
op.stage("graceful_shutdown_timeout", "pid", vm.Runtime.PID)
}
op.stage("cleanup_runtime")
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
return model.VMRecord{}, err
}
vm.State = model.VMStateStopped
vm.Runtime.State = model.VMStateStopped
clearRuntimeHandles(&vm)
system.TouchNow(&vm)
if err := d.store.UpsertVM(ctx, vm); err != nil {
return model.VMRecord{}, err
}
return vm, nil
}
func (d *Daemon) KillVM(ctx context.Context, params api.VMKillParams) (model.VMRecord, error) {
return d.withVMLockByRef(ctx, params.IDOrName, func(vm model.VMRecord) (model.VMRecord, error) {
return d.killVMLocked(ctx, vm, params.Signal)
})
}
func (d *Daemon) killVMLocked(ctx context.Context, current model.VMRecord, signalValue string) (vm model.VMRecord, err error) {
vm = current
op := d.beginOperation("vm.kill", "vm_ref", vm.ID, "signal", signalValue)
defer func() {
if err != nil {
op.fail(err, vmLogAttrs(vm)...)
return
}
op.done(vmLogAttrs(vm)...)
}()
if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
op.stage("cleanup_stale_runtime")
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
return model.VMRecord{}, err
}
vm.State = model.VMStateStopped
vm.Runtime.State = model.VMStateStopped
clearRuntimeHandles(&vm)
if err := d.store.UpsertVM(ctx, vm); err != nil {
return model.VMRecord{}, err
}
return vm, nil
}
signal := strings.TrimSpace(signalValue)
if signal == "" {
signal = "TERM"
}
op.stage("send_signal", "pid", vm.Runtime.PID, "signal", signal)
if _, err := d.runner.RunSudo(ctx, "kill", "-"+signal, strconv.Itoa(vm.Runtime.PID)); err != nil {
return model.VMRecord{}, err
}
op.stage("wait_for_exit", "pid", vm.Runtime.PID)
if err := d.waitForExit(ctx, vm.Runtime.PID, vm.Runtime.APISockPath, 30*time.Second); err != nil {
if !errors.Is(err, errWaitForExitTimeout) {
return model.VMRecord{}, err
}
op.stage("signal_timeout", "pid", vm.Runtime.PID, "signal", signal)
}
op.stage("cleanup_runtime")
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
return model.VMRecord{}, err
}
vm.State = model.VMStateStopped
vm.Runtime.State = model.VMStateStopped
clearRuntimeHandles(&vm)
system.TouchNow(&vm)
if err := d.store.UpsertVM(ctx, vm); err != nil {
return model.VMRecord{}, err
}
return vm, nil
}
func (d *Daemon) RestartVM(ctx context.Context, idOrName string) (vm model.VMRecord, err error) {
op := d.beginOperation("vm.restart", "vm_ref", idOrName)
defer func() {
if err != nil {
op.fail(err, vmLogAttrs(vm)...)
return
}
op.done(vmLogAttrs(vm)...)
}()
resolved, err := d.FindVM(ctx, idOrName)
if err != nil {
return model.VMRecord{}, err
}
return d.withVMLockByID(ctx, resolved.ID, func(vm model.VMRecord) (model.VMRecord, error) {
op.stage("stop")
vm, err = d.stopVMLocked(ctx, vm)
if err != nil {
return model.VMRecord{}, err
}
image, err := d.store.GetImageByID(ctx, vm.ImageID)
if err != nil {
return model.VMRecord{}, err
}
op.stage("start", vmLogAttrs(vm)...)
return d.startVMLocked(ctx, vm, image)
})
}
func (d *Daemon) DeleteVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
return d.withVMLockByRef(ctx, idOrName, func(vm model.VMRecord) (model.VMRecord, error) {
return d.deleteVMLocked(ctx, vm)
})
}
func (d *Daemon) deleteVMLocked(ctx context.Context, current model.VMRecord) (vm model.VMRecord, err error) {
vm = current
op := d.beginOperation("vm.delete", "vm_ref", vm.ID)
defer func() {
if err != nil {
op.fail(err, vmLogAttrs(vm)...)
return
}
op.done(vmLogAttrs(vm)...)
}()
if vm.State == model.VMStateRunning && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
op.stage("kill_running_vm", "pid", vm.Runtime.PID)
_ = d.killVMProcess(ctx, vm.Runtime.PID)
}
op.stage("cleanup_runtime")
if err := d.cleanupRuntime(ctx, vm, false); err != nil {
return model.VMRecord{}, err
}
op.stage("delete_store_record")
if err := d.store.DeleteVM(ctx, vm.ID); err != nil {
return model.VMRecord{}, err
}
if vm.Runtime.VMDir != "" {
op.stage("delete_vm_dir", "vm_dir", vm.Runtime.VMDir)
if err := os.RemoveAll(vm.Runtime.VMDir); err != nil {
return model.VMRecord{}, err
}
}
return vm, nil
}
func (d *Daemon) SetVM(ctx context.Context, params api.VMSetParams) (model.VMRecord, error) {
return d.withVMLockByRef(ctx, params.IDOrName, func(vm model.VMRecord) (model.VMRecord, error) {
return d.setVMLocked(ctx, vm, params)
})
}
func (d *Daemon) setVMLocked(ctx context.Context, current model.VMRecord, params api.VMSetParams) (vm model.VMRecord, err error) {
vm = current
op := d.beginOperation("vm.set", "vm_ref", vm.ID)
defer func() {
if err != nil {
op.fail(err, vmLogAttrs(vm)...)
return
}
op.done(vmLogAttrs(vm)...)
}()
running := vm.State == model.VMStateRunning && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath)
if params.VCPUCount != nil {
if err := validateOptionalPositiveSetting("vcpu", params.VCPUCount); err != nil {
return model.VMRecord{}, err
}
if running {
return model.VMRecord{}, errors.New("vcpu changes require the VM to be stopped")
}
op.stage("update_vcpu", "vcpu_count", *params.VCPUCount)
vm.Spec.VCPUCount = *params.VCPUCount
}
if params.MemoryMiB != nil {
if err := validateOptionalPositiveSetting("memory", params.MemoryMiB); err != nil {
return model.VMRecord{}, err
}
if running {
return model.VMRecord{}, errors.New("memory changes require the VM to be stopped")
}
op.stage("update_memory", "memory_mib", *params.MemoryMiB)
vm.Spec.MemoryMiB = *params.MemoryMiB
}
if params.WorkDiskSize != "" {
size, err := model.ParseSize(params.WorkDiskSize)
if err != nil {
return model.VMRecord{}, err
}
if running {
return model.VMRecord{}, errors.New("disk changes require the VM to be stopped")
}
if size < vm.Spec.WorkDiskSizeBytes {
return model.VMRecord{}, errors.New("disk size can only grow")
}
if size > vm.Spec.WorkDiskSizeBytes {
if exists(vm.Runtime.WorkDiskPath) {
op.stage("resize_work_disk", "from_bytes", vm.Spec.WorkDiskSizeBytes, "to_bytes", size)
if err := d.validateWorkDiskResizePrereqs(); err != nil {
return model.VMRecord{}, err
}
if err := system.ResizeExt4Image(ctx, d.runner, vm.Runtime.WorkDiskPath, size); err != nil {
return model.VMRecord{}, err
}
}
vm.Spec.WorkDiskSizeBytes = size
}
}
if params.NATEnabled != nil {
op.stage("update_nat", "nat_enabled", *params.NATEnabled)
vm.Spec.NATEnabled = *params.NATEnabled
}
if running {
if err := d.applyCapabilityConfigChanges(ctx, current, vm); err != nil {
return model.VMRecord{}, err
}
}
system.TouchNow(&vm)
if err := d.store.UpsertVM(ctx, vm); err != nil {
return model.VMRecord{}, err
}
return vm, nil
}
func (d *Daemon) GetVMStats(ctx context.Context, idOrName string) (model.VMRecord, model.VMStats, error) {
vm, err := d.withVMLockByRef(ctx, idOrName, func(vm model.VMRecord) (model.VMRecord, error) {
return d.getVMStatsLocked(ctx, vm)
})
if err != nil {
return model.VMRecord{}, model.VMStats{}, err
}
return vm, vm.Stats, nil
}
func (d *Daemon) getVMStatsLocked(ctx context.Context, vm model.VMRecord) (model.VMRecord, error) {
stats, err := d.collectStats(ctx, vm)
if err == nil {
vm.Stats = stats
vm.UpdatedAt = model.Now()
_ = d.store.UpsertVM(ctx, vm)
if d.logger != nil {
d.logger.Debug("vm stats collected", append(vmLogAttrs(vm), "rss_bytes", stats.RSSBytes, "vsz_bytes", stats.VSZBytes, "cpu_percent", stats.CPUPercent)...)
}
}
return vm, nil
}
func (d *Daemon) pollStats(ctx context.Context) error {
vms, err := d.store.ListVMs(ctx)
if err != nil {
return err
}
for _, vm := range vms {
if err := d.withVMLockByIDErr(ctx, vm.ID, func(vm model.VMRecord) error {
if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
return nil
}
stats, err := d.collectStats(ctx, vm)
if err != nil {
if d.logger != nil {
d.logger.Debug("vm stats collection failed", append(vmLogAttrs(vm), "error", err.Error())...)
}
return nil
}
vm.Stats = stats
vm.UpdatedAt = model.Now()
return d.store.UpsertVM(ctx, vm)
}); err != nil {
return err
}
}
return nil
}
func (d *Daemon) stopStaleVMs(ctx context.Context) (err error) {
if d.config.AutoStopStaleAfter <= 0 {
return nil
}
op := d.beginOperation("vm.stop_stale")
defer func() {
if err != nil {
op.fail(err)
return
}
op.done()
}()
vms, err := d.store.ListVMs(ctx)
if err != nil {
return err
}
now := model.Now()
for _, vm := range vms {
if err := d.withVMLockByIDErr(ctx, vm.ID, func(vm model.VMRecord) error {
if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
return nil
}
if now.Sub(vm.LastTouchedAt) < d.config.AutoStopStaleAfter {
return nil
}
op.stage("stopping_vm", vmLogAttrs(vm)...)
_ = d.sendCtrlAltDel(ctx, vm)
_ = d.waitForExit(ctx, vm.Runtime.PID, vm.Runtime.APISockPath, 10*time.Second)
_ = d.cleanupRuntime(ctx, vm, true)
vm.State = model.VMStateStopped
vm.Runtime.State = model.VMStateStopped
clearRuntimeHandles(&vm)
vm.UpdatedAt = model.Now()
return d.store.UpsertVM(ctx, vm)
}); err != nil {
return err
}
}
return nil
}
func (d *Daemon) collectStats(ctx context.Context, vm model.VMRecord) (model.VMStats, error) {
stats := model.VMStats{
CollectedAt: model.Now(),
SystemOverlayBytes: system.AllocatedBytes(vm.Runtime.SystemOverlay),
WorkDiskBytes: system.AllocatedBytes(vm.Runtime.WorkDiskPath),
MetricsRaw: system.ParseMetricsFile(vm.Runtime.MetricsPath),
}
if vm.Runtime.PID > 0 && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
ps, err := system.ReadProcessStats(ctx, vm.Runtime.PID)
if err == nil {
stats.CPUPercent = ps.CPUPercent
stats.RSSBytes = ps.RSSBytes
stats.VSZBytes = ps.VSZBytes
}
}
return stats, nil
}
func (d *Daemon) ensureSystemOverlay(ctx context.Context, vm *model.VMRecord) error {
if exists(vm.Runtime.SystemOverlay) {
return nil
}
_, err := d.runner.Run(ctx, "truncate", "-s", strconv.FormatInt(vm.Spec.SystemOverlaySizeByte, 10), vm.Runtime.SystemOverlay)
return err
}
func (d *Daemon) patchRootOverlay(ctx context.Context, vm model.VMRecord, image model.Image) error {
resolv := []byte(fmt.Sprintf("nameserver %s\n", d.config.DefaultDNS))
hostname := []byte(vm.Name + "\n")
hosts := []byte(fmt.Sprintf("127.0.0.1 localhost\n127.0.1.1 %s\n", vm.Name))
sshdConfig := []byte(strings.Join([]string{
"LogLevel DEBUG3",
"PermitRootLogin yes",
"PubkeyAuthentication yes",
"AuthorizedKeysFile /root/.ssh/authorized_keys",
"StrictModes no",
"",
}, "\n"))
fstab, err := system.ReadDebugFSText(ctx, d.runner, vm.Runtime.DMDev, "/etc/fstab")
if err != nil {
fstab = ""
}
builder := guestconfig.NewBuilder()
builder.WriteFile("/etc/resolv.conf", resolv)
builder.WriteFile("/etc/hostname", hostname)
builder.WriteFile("/etc/hosts", hosts)
builder.WriteFile("/etc/ssh/sshd_config.d/99-banger.conf", sshdConfig)
builder.DropMountTarget("/home")
builder.DropMountTarget("/var")
builder.AddMount(guestconfig.MountSpec{
Source: "tmpfs",
Target: "/run",
FSType: "tmpfs",
Options: []string{"defaults", "nodev", "nosuid", "mode=0755"},
Dump: 0,
Pass: 0,
})
builder.AddMount(guestconfig.MountSpec{
Source: "tmpfs",
Target: "/tmp",
FSType: "tmpfs",
Options: []string{"defaults", "nodev", "nosuid", "mode=1777"},
Dump: 0,
Pass: 0,
})
d.contributeGuestConfig(builder, vm, image)
builder.WriteFile("/etc/fstab", []byte(builder.RenderFSTab(fstab)))
files := builder.Files()
for _, guestPath := range builder.FilePaths() {
data := files[guestPath]
if err := system.WriteExt4File(ctx, d.runner, vm.Runtime.DMDev, guestPath, data); err != nil {
return err
}
}
return nil
}
func (d *Daemon) ensureWorkDisk(ctx context.Context, vm *model.VMRecord) error {
if exists(vm.Runtime.WorkDiskPath) {
return nil
}
if _, err := d.runner.Run(ctx, "truncate", "-s", strconv.FormatInt(vm.Spec.WorkDiskSizeBytes, 10), vm.Runtime.WorkDiskPath); err != nil {
return err
}
if _, err := d.runner.Run(ctx, "mkfs.ext4", "-F", vm.Runtime.WorkDiskPath); err != nil {
return err
}
rootMount, cleanupRoot, err := system.MountTempDir(ctx, d.runner, vm.Runtime.DMDev, true)
if err != nil {
return err
}
defer cleanupRoot()
workMount, cleanupWork, err := system.MountTempDir(ctx, d.runner, vm.Runtime.WorkDiskPath, false)
if err != nil {
return err
}
defer cleanupWork()
if err := system.CopyDirContents(ctx, d.runner, filepath.Join(rootMount, "root"), workMount, true); err != nil {
return err
}
if err := d.flattenNestedWorkHome(ctx, workMount); err != nil {
return err
}
return nil
}
func (d *Daemon) flattenNestedWorkHome(ctx context.Context, workMount string) error {
nestedHome := filepath.Join(workMount, "root")
if !exists(nestedHome) {
return nil
}
if _, err := d.runner.RunSudo(ctx, "chmod", "755", nestedHome); err != nil {
return err
}
entries, err := os.ReadDir(nestedHome)
if err != nil {
return err
}
for _, entry := range entries {
sourcePath := filepath.Join(nestedHome, entry.Name())
if _, err := d.runner.RunSudo(ctx, "cp", "-a", sourcePath, workMount+"/"); err != nil {
return err
}
}
_, err = d.runner.RunSudo(ctx, "rm", "-rf", nestedHome)
return err
}
func (d *Daemon) ensureBridge(ctx context.Context) error {
if _, err := d.runner.Run(ctx, "ip", "link", "show", d.config.BridgeName); err == nil {
_, err = d.runner.RunSudo(ctx, "ip", "link", "set", d.config.BridgeName, "up")
return err
}
if _, err := d.runner.RunSudo(ctx, "ip", "link", "add", "name", d.config.BridgeName, "type", "bridge"); err != nil {
return err
}
if _, err := d.runner.RunSudo(ctx, "ip", "addr", "add", fmt.Sprintf("%s/%s", d.config.BridgeIP, d.config.CIDR), "dev", d.config.BridgeName); err != nil {
return err
}
_, err := d.runner.RunSudo(ctx, "ip", "link", "set", d.config.BridgeName, "up")
return err
}
func (d *Daemon) ensureSocketDir() error {
return os.MkdirAll(d.layout.RuntimeDir, 0o755)
}
func (d *Daemon) createTap(ctx context.Context, tap string) error {
if _, err := d.runner.Run(ctx, "ip", "link", "show", tap); err == nil {
_, _ = d.runner.RunSudo(ctx, "ip", "link", "del", tap)
}
if _, err := d.runner.RunSudo(ctx, "ip", "tuntap", "add", "dev", tap, "mode", "tap", "user", strconv.Itoa(os.Getuid()), "group", strconv.Itoa(os.Getgid())); err != nil {
return err
}
if _, err := d.runner.RunSudo(ctx, "ip", "link", "set", tap, "master", d.config.BridgeName); err != nil {
return err
}
if _, err := d.runner.RunSudo(ctx, "ip", "link", "set", tap, "up"); err != nil {
return err
}
_, err := d.runner.RunSudo(ctx, "ip", "link", "set", d.config.BridgeName, "up")
return err
}
func (d *Daemon) firecrackerBinary() (string, error) {
if d.config.FirecrackerBin == "" {
return "", fmt.Errorf("firecracker binary not configured; %s", paths.RuntimeBundleHint())
}
path := d.config.FirecrackerBin
if !exists(path) {
return "", fmt.Errorf("firecracker binary not found at %s; %s", path, paths.RuntimeBundleHint())
}
return path, nil
}
func (d *Daemon) ensureSocketAccess(ctx context.Context, apiSock string) error {
if _, err := d.runner.RunSudo(ctx, "chown", fmt.Sprintf("%d:%d", os.Getuid(), os.Getgid()), apiSock); err != nil {
return err
}
_, err := d.runner.RunSudo(ctx, "chmod", "600", apiSock)
return err
}
func (d *Daemon) findFirecrackerPID(ctx context.Context, apiSock string) (int, error) {
out, err := d.runner.Run(ctx, "pgrep", "-n", "-f", apiSock)
if err != nil {
return 0, err
}
return strconv.Atoi(strings.TrimSpace(string(out)))
}
func (d *Daemon) resolveFirecrackerPID(ctx context.Context, machine *firecracker.Machine, apiSock string) int {
if pid, err := d.findFirecrackerPID(ctx, apiSock); err == nil && pid > 0 {
return pid
}
if machine != nil {
if pid, err := machine.PID(); err == nil && pid > 0 {
return pid
}
}
return 0
}
func (d *Daemon) sendCtrlAltDel(ctx context.Context, vm model.VMRecord) error {
if err := d.ensureSocketAccess(ctx, vm.Runtime.APISockPath); err != nil {
return err
}
client := firecracker.New(vm.Runtime.APISockPath, d.logger)
return client.SendCtrlAltDel(ctx)
}
func (d *Daemon) waitForExit(ctx context.Context, pid int, apiSock string, timeout time.Duration) error {
deadline := time.Now().Add(timeout)
for {
if !system.ProcessRunning(pid, apiSock) {
return nil
}
if time.Now().After(deadline) {
return errWaitForExitTimeout
}
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(100 * time.Millisecond):
}
}
}
func (d *Daemon) cleanupRuntime(ctx context.Context, vm model.VMRecord, preserveDisks bool) error {
if d.logger != nil {
d.logger.Debug("cleanup runtime", append(vmLogAttrs(vm), "preserve_disks", preserveDisks)...)
}
cleanupPID := vm.Runtime.PID
if vm.Runtime.APISockPath != "" {
if pid, err := d.findFirecrackerPID(ctx, vm.Runtime.APISockPath); err == nil && pid > 0 {
cleanupPID = pid
}
}
if cleanupPID > 0 && system.ProcessRunning(cleanupPID, vm.Runtime.APISockPath) {
_ = d.killVMProcess(ctx, cleanupPID)
if err := d.waitForExit(ctx, cleanupPID, vm.Runtime.APISockPath, 30*time.Second); err != nil {
return err
}
}
if vm.Runtime.TapDevice != "" {
_, _ = d.runner.RunSudo(ctx, "ip", "link", "del", vm.Runtime.TapDevice)
}
if vm.Runtime.APISockPath != "" {
_ = os.Remove(vm.Runtime.APISockPath)
}
snapshotErr := d.cleanupDMSnapshot(ctx, dmSnapshotHandles{
BaseLoop: vm.Runtime.BaseLoop,
COWLoop: vm.Runtime.COWLoop,
DMName: vm.Runtime.DMName,
DMDev: vm.Runtime.DMDev,
})
featureErr := d.cleanupCapabilityState(ctx, vm)
if !preserveDisks && vm.Runtime.VMDir != "" {
return errors.Join(snapshotErr, featureErr, os.RemoveAll(vm.Runtime.VMDir))
}
return errors.Join(snapshotErr, featureErr)
}
func clearRuntimeHandles(vm *model.VMRecord) {
vm.Runtime.PID = 0
vm.Runtime.APISockPath = ""
vm.Runtime.TapDevice = ""
vm.Runtime.BaseLoop = ""
vm.Runtime.COWLoop = ""
vm.Runtime.DMName = ""
vm.Runtime.DMDev = ""
}
func (d *Daemon) setDNS(ctx context.Context, vmName, guestIP string) error {
if d.vmDNS == nil {
return nil
}
return d.vmDNS.Set(vmdns.RecordName(vmName), guestIP)
}
func (d *Daemon) removeDNS(ctx context.Context, dnsName string) error {
if dnsName == "" {
return nil
}
if d.vmDNS == nil {
return nil
}
return d.vmDNS.Remove(dnsName)
}
func (d *Daemon) rebuildDNS(ctx context.Context) error {
if d.vmDNS == nil {
return nil
}
vms, err := d.store.ListVMs(ctx)
if err != nil {
return err
}
records := make(map[string]string)
for _, vm := range vms {
if vm.State != model.VMStateRunning {
continue
}
if !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
continue
}
if strings.TrimSpace(vm.Runtime.GuestIP) == "" {
continue
}
records[vmdns.RecordName(vm.Name)] = vm.Runtime.GuestIP
}
return d.vmDNS.Replace(records)
}
func (d *Daemon) killVMProcess(ctx context.Context, pid int) error {
_, err := d.runner.RunSudo(ctx, "kill", "-KILL", strconv.Itoa(pid))
return err
}
func (d *Daemon) generateName(ctx context.Context) (string, error) {
if exists(d.config.NamegenPath) {
out, err := d.runner.Run(ctx, d.config.NamegenPath)
if err == nil {
name := strings.TrimSpace(string(out))
if name != "" {
return name, nil
}
}
}
return "vm-" + strconv.FormatInt(time.Now().Unix(), 10), nil
}
func bridgePrefix(bridgeIP string) string {
parts := strings.Split(bridgeIP, ".")
if len(parts) < 3 {
return bridgeIP
}
return strings.Join(parts[:3], ".")
}
func optionalIntOrDefault(value *int, fallback int) int {
if value != nil {
return *value
}
return fallback
}
func validateOptionalPositiveSetting(label string, value *int) error {
if value == nil {
return nil
}
if *value <= 0 {
return fmt.Errorf("%s must be a positive integer", label)
}
return nil
}