VM create and vm set accepted zero or negative CPU and memory values, which either got stored directly or silently fell back to defaults and only surfaced as failures later. This tightens validation so bad settings are rejected at the user boundary and again in the daemon before any VM record is persisted. Change vm.create CPU and memory request fields to optional pointers so omitted values still mean defaults, while explicit non-positive values can be distinguished and rejected. Update Cobra create/set parsing, keep the TUI aligned with the new API shape, and add regression tests for CLI parsing, daemon-side validation, and the create-defaults path. Validation: go test ./... and make build. Left my-rootfs.ext4 untracked.
910 lines
27 KiB
Go
910 lines
27 KiB
Go
package daemon
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"banger/internal/api"
|
|
"banger/internal/firecracker"
|
|
"banger/internal/model"
|
|
"banger/internal/paths"
|
|
"banger/internal/system"
|
|
)
|
|
|
|
func (d *Daemon) CreateVM(ctx context.Context, params api.VMCreateParams) (vm model.VMRecord, err error) {
|
|
d.mu.Lock()
|
|
defer d.mu.Unlock()
|
|
op := d.beginOperation("vm.create")
|
|
defer func() {
|
|
if err != nil {
|
|
op.fail(err)
|
|
return
|
|
}
|
|
op.done(vmLogAttrs(vm)...)
|
|
}()
|
|
if err := validateOptionalPositiveSetting("vcpu", params.VCPUCount); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if err := validateOptionalPositiveSetting("memory", params.MemoryMiB); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
|
|
imageName := params.ImageName
|
|
if imageName == "" {
|
|
imageName = d.config.DefaultImageName
|
|
}
|
|
image, err := d.FindImage(ctx, imageName)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("image_resolved", imageLogAttrs(image)...)
|
|
name := strings.TrimSpace(params.Name)
|
|
if name == "" {
|
|
name, err = d.generateName(ctx)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
}
|
|
if _, err := d.FindVM(ctx, name); err == nil {
|
|
return model.VMRecord{}, fmt.Errorf("vm name already exists: %s", name)
|
|
}
|
|
id, err := model.NewID()
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
guestIP, err := d.store.NextGuestIP(ctx, bridgePrefix(d.config.BridgeIP))
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
vmDir := filepath.Join(d.layout.VMsDir, id)
|
|
if err := os.MkdirAll(vmDir, 0o755); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
systemOverlaySize := int64(model.DefaultSystemOverlaySize)
|
|
if params.SystemOverlaySize != "" {
|
|
systemOverlaySize, err = model.ParseSize(params.SystemOverlaySize)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
}
|
|
workDiskSize := int64(model.DefaultWorkDiskSize)
|
|
if params.WorkDiskSize != "" {
|
|
workDiskSize, err = model.ParseSize(params.WorkDiskSize)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
}
|
|
now := model.Now()
|
|
spec := model.VMSpec{
|
|
VCPUCount: optionalIntOrDefault(params.VCPUCount, model.DefaultVCPUCount),
|
|
MemoryMiB: optionalIntOrDefault(params.MemoryMiB, model.DefaultMemoryMiB),
|
|
SystemOverlaySizeByte: systemOverlaySize,
|
|
WorkDiskSizeBytes: workDiskSize,
|
|
NATEnabled: params.NATEnabled,
|
|
}
|
|
vm = model.VMRecord{
|
|
ID: id,
|
|
Name: name,
|
|
ImageID: image.ID,
|
|
State: model.VMStateCreated,
|
|
CreatedAt: now,
|
|
UpdatedAt: now,
|
|
LastTouchedAt: now,
|
|
Spec: spec,
|
|
Runtime: model.VMRuntime{
|
|
State: model.VMStateCreated,
|
|
GuestIP: guestIP,
|
|
DNSName: name + ".vm",
|
|
VMDir: vmDir,
|
|
SystemOverlay: filepath.Join(vmDir, "system.cow"),
|
|
WorkDiskPath: filepath.Join(vmDir, "root.ext4"),
|
|
LogPath: filepath.Join(vmDir, "firecracker.log"),
|
|
MetricsPath: filepath.Join(vmDir, "metrics.json"),
|
|
},
|
|
}
|
|
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("persisted", vmLogAttrs(vm)...)
|
|
if params.NoStart {
|
|
vm.State = model.VMStateStopped
|
|
vm.Runtime.State = model.VMStateStopped
|
|
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
return vm, nil
|
|
}
|
|
return d.startVMLocked(ctx, vm, image)
|
|
}
|
|
|
|
func (d *Daemon) StartVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
|
|
d.mu.Lock()
|
|
defer d.mu.Unlock()
|
|
vm, err := d.FindVM(ctx, idOrName)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
image, err := d.store.GetImageByID(ctx, vm.ImageID)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if vm.State == model.VMStateRunning && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
|
if d.logger != nil {
|
|
d.logger.Info("vm already running", vmLogAttrs(vm)...)
|
|
}
|
|
return vm, nil
|
|
}
|
|
return d.startVMLocked(ctx, vm, image)
|
|
}
|
|
|
|
func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image model.Image) (_ model.VMRecord, err error) {
|
|
op := d.beginOperation("vm.start", append(vmLogAttrs(vm), imageLogAttrs(image)...)...)
|
|
defer func() {
|
|
if err != nil {
|
|
err = annotateLogPath(err, vm.Runtime.LogPath)
|
|
op.fail(err, vmLogAttrs(vm)...)
|
|
return
|
|
}
|
|
op.done(vmLogAttrs(vm)...)
|
|
}()
|
|
op.stage("preflight")
|
|
if err := d.validateStartPrereqs(ctx, vm, image); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if err := os.MkdirAll(vm.Runtime.VMDir, 0o755); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("cleanup_runtime")
|
|
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
clearRuntimeHandles(&vm)
|
|
op.stage("bridge")
|
|
if err := d.ensureBridge(ctx); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("socket_dir")
|
|
if err := d.ensureSocketDir(); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
|
|
shortID := system.ShortID(vm.ID)
|
|
apiSock := filepath.Join(d.layout.RuntimeDir, "fc-"+shortID+".sock")
|
|
tap := "tap-fc-" + shortID
|
|
dmName := "fc-rootfs-" + shortID
|
|
if err := os.RemoveAll(apiSock); err != nil && !os.IsNotExist(err) {
|
|
return model.VMRecord{}, err
|
|
}
|
|
|
|
op.stage("system_overlay", "overlay_path", vm.Runtime.SystemOverlay)
|
|
if err := d.ensureSystemOverlay(ctx, &vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
|
|
op.stage("dm_snapshot", "dm_name", dmName)
|
|
handles, err := d.createDMSnapshot(ctx, image.RootfsPath, vm.Runtime.SystemOverlay, dmName)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
vm.Runtime.BaseLoop = handles.BaseLoop
|
|
vm.Runtime.COWLoop = handles.COWLoop
|
|
vm.Runtime.DMName = handles.DMName
|
|
vm.Runtime.DMDev = handles.DMDev
|
|
vm.Runtime.APISockPath = apiSock
|
|
vm.Runtime.TapDevice = tap
|
|
vm.Runtime.State = model.VMStateRunning
|
|
vm.State = model.VMStateRunning
|
|
vm.Runtime.LastError = ""
|
|
|
|
cleanupOnErr := func(err error) (model.VMRecord, error) {
|
|
vm.State = model.VMStateError
|
|
vm.Runtime.State = model.VMStateError
|
|
vm.Runtime.LastError = err.Error()
|
|
op.stage("cleanup_after_failure", "error", err.Error())
|
|
if cleanupErr := d.cleanupRuntime(context.Background(), vm, true); cleanupErr != nil {
|
|
err = errors.Join(err, cleanupErr)
|
|
}
|
|
clearRuntimeHandles(&vm)
|
|
_ = d.store.UpsertVM(context.Background(), vm)
|
|
return model.VMRecord{}, err
|
|
}
|
|
|
|
op.stage("patch_root_overlay")
|
|
if err := d.patchRootOverlay(ctx, vm, image); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
op.stage("work_disk", "work_disk_path", vm.Runtime.WorkDiskPath)
|
|
if err := d.ensureWorkDisk(ctx, &vm); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
op.stage("tap", "tap_device", tap)
|
|
if err := d.createTap(ctx, tap); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
op.stage("metrics_file", "metrics_path", vm.Runtime.MetricsPath)
|
|
if err := os.WriteFile(vm.Runtime.MetricsPath, nil, 0o644); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
|
|
op.stage("firecracker_binary")
|
|
fcPath, err := d.firecrackerBinary()
|
|
if err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
op.stage("firecracker_launch", "log_path", vm.Runtime.LogPath, "metrics_path", vm.Runtime.MetricsPath)
|
|
machine, err := firecracker.NewMachine(ctx, firecracker.MachineConfig{
|
|
BinaryPath: fcPath,
|
|
VMID: vm.ID,
|
|
SocketPath: apiSock,
|
|
LogPath: vm.Runtime.LogPath,
|
|
MetricsPath: vm.Runtime.MetricsPath,
|
|
KernelImagePath: image.KernelPath,
|
|
InitrdPath: image.InitrdPath,
|
|
KernelArgs: system.BuildBootArgs(vm.Name, vm.Runtime.GuestIP, d.config.BridgeIP, d.config.DefaultDNS),
|
|
RootDrivePath: vm.Runtime.DMDev,
|
|
WorkDrivePath: vm.Runtime.WorkDiskPath,
|
|
TapDevice: tap,
|
|
VCPUCount: vm.Spec.VCPUCount,
|
|
MemoryMiB: vm.Spec.MemoryMiB,
|
|
Logger: d.logger,
|
|
})
|
|
if err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
if err := machine.Start(ctx); err != nil {
|
|
vm.Runtime.PID = d.resolveFirecrackerPID(ctx, machine, apiSock)
|
|
return cleanupOnErr(err)
|
|
}
|
|
vm.Runtime.PID = d.resolveFirecrackerPID(ctx, machine, apiSock)
|
|
op.debugStage("firecracker_started", "pid", vm.Runtime.PID)
|
|
op.stage("socket_access", "api_socket", apiSock)
|
|
if err := d.ensureSocketAccess(ctx, apiSock); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
op.stage("dns", "dns_name", vm.Runtime.DNSName)
|
|
if err := d.setDNS(ctx, vm.Name, vm.Runtime.GuestIP); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
if vm.Spec.NATEnabled {
|
|
op.stage("nat")
|
|
if err := d.ensureNAT(ctx, vm, true); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
}
|
|
system.TouchNow(&vm)
|
|
op.stage("persist")
|
|
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
return vm, nil
|
|
}
|
|
|
|
func (d *Daemon) StopVM(ctx context.Context, idOrName string) (vm model.VMRecord, err error) {
|
|
d.mu.Lock()
|
|
defer d.mu.Unlock()
|
|
op := d.beginOperation("vm.stop", "vm_ref", idOrName)
|
|
defer func() {
|
|
if err != nil {
|
|
op.fail(err, vmLogAttrs(vm)...)
|
|
return
|
|
}
|
|
op.done(vmLogAttrs(vm)...)
|
|
}()
|
|
vm, err = d.FindVM(ctx, idOrName)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
|
op.stage("cleanup_stale_runtime")
|
|
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
vm.State = model.VMStateStopped
|
|
vm.Runtime.State = model.VMStateStopped
|
|
clearRuntimeHandles(&vm)
|
|
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
return vm, nil
|
|
}
|
|
op.stage("graceful_shutdown")
|
|
if err := d.sendCtrlAltDel(ctx, vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("wait_for_exit", "pid", vm.Runtime.PID)
|
|
if err := d.waitForExit(ctx, vm.Runtime.PID, vm.Runtime.APISockPath, 30*time.Second); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("cleanup_runtime")
|
|
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
vm.State = model.VMStateStopped
|
|
vm.Runtime.State = model.VMStateStopped
|
|
clearRuntimeHandles(&vm)
|
|
system.TouchNow(&vm)
|
|
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
return vm, nil
|
|
}
|
|
|
|
func (d *Daemon) KillVM(ctx context.Context, params api.VMKillParams) (vm model.VMRecord, err error) {
|
|
d.mu.Lock()
|
|
defer d.mu.Unlock()
|
|
op := d.beginOperation("vm.kill", "vm_ref", params.IDOrName, "signal", params.Signal)
|
|
defer func() {
|
|
if err != nil {
|
|
op.fail(err, vmLogAttrs(vm)...)
|
|
return
|
|
}
|
|
op.done(vmLogAttrs(vm)...)
|
|
}()
|
|
|
|
vm, err = d.FindVM(ctx, params.IDOrName)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
|
op.stage("cleanup_stale_runtime")
|
|
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
vm.State = model.VMStateStopped
|
|
vm.Runtime.State = model.VMStateStopped
|
|
clearRuntimeHandles(&vm)
|
|
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
return vm, nil
|
|
}
|
|
|
|
signal := strings.TrimSpace(params.Signal)
|
|
if signal == "" {
|
|
signal = "TERM"
|
|
}
|
|
op.stage("send_signal", "pid", vm.Runtime.PID, "signal", signal)
|
|
if _, err := d.runner.RunSudo(ctx, "kill", "-"+signal, strconv.Itoa(vm.Runtime.PID)); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("wait_for_exit", "pid", vm.Runtime.PID)
|
|
if err := d.waitForExit(ctx, vm.Runtime.PID, vm.Runtime.APISockPath, 30*time.Second); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("cleanup_runtime")
|
|
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
vm.State = model.VMStateStopped
|
|
vm.Runtime.State = model.VMStateStopped
|
|
clearRuntimeHandles(&vm)
|
|
system.TouchNow(&vm)
|
|
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
return vm, nil
|
|
}
|
|
|
|
func (d *Daemon) RestartVM(ctx context.Context, idOrName string) (vm model.VMRecord, err error) {
|
|
op := d.beginOperation("vm.restart", "vm_ref", idOrName)
|
|
defer func() {
|
|
if err != nil {
|
|
op.fail(err, vmLogAttrs(vm)...)
|
|
return
|
|
}
|
|
op.done(vmLogAttrs(vm)...)
|
|
}()
|
|
op.stage("stop")
|
|
vm, err = d.StopVM(ctx, idOrName)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("start", vmLogAttrs(vm)...)
|
|
return d.StartVM(ctx, vm.ID)
|
|
}
|
|
|
|
func (d *Daemon) DeleteVM(ctx context.Context, idOrName string) (vm model.VMRecord, err error) {
|
|
d.mu.Lock()
|
|
defer d.mu.Unlock()
|
|
op := d.beginOperation("vm.delete", "vm_ref", idOrName)
|
|
defer func() {
|
|
if err != nil {
|
|
op.fail(err, vmLogAttrs(vm)...)
|
|
return
|
|
}
|
|
op.done(vmLogAttrs(vm)...)
|
|
}()
|
|
vm, err = d.FindVM(ctx, idOrName)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if vm.State == model.VMStateRunning && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
|
op.stage("kill_running_vm", "pid", vm.Runtime.PID)
|
|
_ = d.killVMProcess(ctx, vm.Runtime.PID)
|
|
}
|
|
op.stage("cleanup_runtime")
|
|
if err := d.cleanupRuntime(ctx, vm, false); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if vm.Spec.NATEnabled {
|
|
op.debugStage("disable_nat")
|
|
_ = d.ensureNAT(ctx, vm, false)
|
|
}
|
|
op.debugStage("remove_dns", "dns_name", vm.Runtime.DNSName)
|
|
_ = d.removeDNS(ctx, vm.Runtime.DNSName)
|
|
op.stage("delete_store_record")
|
|
if err := d.store.DeleteVM(ctx, vm.ID); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if vm.Runtime.VMDir != "" {
|
|
op.stage("delete_vm_dir", "vm_dir", vm.Runtime.VMDir)
|
|
if err := os.RemoveAll(vm.Runtime.VMDir); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
}
|
|
return vm, nil
|
|
}
|
|
|
|
func (d *Daemon) SetVM(ctx context.Context, params api.VMSetParams) (vm model.VMRecord, err error) {
|
|
d.mu.Lock()
|
|
defer d.mu.Unlock()
|
|
op := d.beginOperation("vm.set", "vm_ref", params.IDOrName)
|
|
defer func() {
|
|
if err != nil {
|
|
op.fail(err, vmLogAttrs(vm)...)
|
|
return
|
|
}
|
|
op.done(vmLogAttrs(vm)...)
|
|
}()
|
|
vm, err = d.FindVM(ctx, params.IDOrName)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
running := vm.State == model.VMStateRunning && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath)
|
|
if params.VCPUCount != nil {
|
|
if err := validateOptionalPositiveSetting("vcpu", params.VCPUCount); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if running {
|
|
return model.VMRecord{}, errors.New("vcpu changes require the VM to be stopped")
|
|
}
|
|
op.stage("update_vcpu", "vcpu_count", *params.VCPUCount)
|
|
vm.Spec.VCPUCount = *params.VCPUCount
|
|
}
|
|
if params.MemoryMiB != nil {
|
|
if err := validateOptionalPositiveSetting("memory", params.MemoryMiB); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if running {
|
|
return model.VMRecord{}, errors.New("memory changes require the VM to be stopped")
|
|
}
|
|
op.stage("update_memory", "memory_mib", *params.MemoryMiB)
|
|
vm.Spec.MemoryMiB = *params.MemoryMiB
|
|
}
|
|
if params.WorkDiskSize != "" {
|
|
size, err := model.ParseSize(params.WorkDiskSize)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if running {
|
|
return model.VMRecord{}, errors.New("disk changes require the VM to be stopped")
|
|
}
|
|
if size < vm.Spec.WorkDiskSizeBytes {
|
|
return model.VMRecord{}, errors.New("disk size can only grow")
|
|
}
|
|
if size > vm.Spec.WorkDiskSizeBytes {
|
|
if exists(vm.Runtime.WorkDiskPath) {
|
|
op.stage("resize_work_disk", "from_bytes", vm.Spec.WorkDiskSizeBytes, "to_bytes", size)
|
|
if err := d.validateWorkDiskResizePrereqs(); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if err := system.ResizeExt4Image(ctx, d.runner, vm.Runtime.WorkDiskPath, size); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
}
|
|
vm.Spec.WorkDiskSizeBytes = size
|
|
}
|
|
}
|
|
if params.NATEnabled != nil {
|
|
op.stage("update_nat", "nat_enabled", *params.NATEnabled)
|
|
vm.Spec.NATEnabled = *params.NATEnabled
|
|
if running {
|
|
if err := d.ensureNAT(ctx, vm, *params.NATEnabled); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
}
|
|
}
|
|
system.TouchNow(&vm)
|
|
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
return vm, nil
|
|
}
|
|
|
|
func (d *Daemon) GetVMStats(ctx context.Context, idOrName string) (model.VMRecord, model.VMStats, error) {
|
|
d.mu.Lock()
|
|
defer d.mu.Unlock()
|
|
vm, err := d.FindVM(ctx, idOrName)
|
|
if err != nil {
|
|
return model.VMRecord{}, model.VMStats{}, err
|
|
}
|
|
stats, err := d.collectStats(ctx, vm)
|
|
if err == nil {
|
|
vm.Stats = stats
|
|
vm.UpdatedAt = model.Now()
|
|
_ = d.store.UpsertVM(ctx, vm)
|
|
if d.logger != nil {
|
|
d.logger.Debug("vm stats collected", append(vmLogAttrs(vm), "rss_bytes", stats.RSSBytes, "vsz_bytes", stats.VSZBytes, "cpu_percent", stats.CPUPercent)...)
|
|
}
|
|
}
|
|
return vm, vm.Stats, nil
|
|
}
|
|
|
|
func (d *Daemon) pollStats(ctx context.Context) error {
|
|
d.mu.Lock()
|
|
defer d.mu.Unlock()
|
|
vms, err := d.store.ListVMs(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
for _, vm := range vms {
|
|
if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
|
continue
|
|
}
|
|
stats, err := d.collectStats(ctx, vm)
|
|
if err != nil {
|
|
if d.logger != nil {
|
|
d.logger.Debug("vm stats collection failed", append(vmLogAttrs(vm), "error", err.Error())...)
|
|
}
|
|
continue
|
|
}
|
|
vm.Stats = stats
|
|
vm.UpdatedAt = model.Now()
|
|
_ = d.store.UpsertVM(ctx, vm)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (d *Daemon) stopStaleVMs(ctx context.Context) (err error) {
|
|
if d.config.AutoStopStaleAfter <= 0 {
|
|
return nil
|
|
}
|
|
op := d.beginOperation("vm.stop_stale")
|
|
defer func() {
|
|
if err != nil {
|
|
op.fail(err)
|
|
return
|
|
}
|
|
op.done()
|
|
}()
|
|
d.mu.Lock()
|
|
defer d.mu.Unlock()
|
|
vms, err := d.store.ListVMs(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
now := model.Now()
|
|
for _, vm := range vms {
|
|
if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
|
continue
|
|
}
|
|
if now.Sub(vm.LastTouchedAt) < d.config.AutoStopStaleAfter {
|
|
continue
|
|
}
|
|
op.stage("stopping_vm", vmLogAttrs(vm)...)
|
|
_ = d.sendCtrlAltDel(ctx, vm)
|
|
_ = d.waitForExit(ctx, vm.Runtime.PID, vm.Runtime.APISockPath, 10*time.Second)
|
|
_ = d.cleanupRuntime(ctx, vm, true)
|
|
vm.State = model.VMStateStopped
|
|
vm.Runtime.State = model.VMStateStopped
|
|
clearRuntimeHandles(&vm)
|
|
vm.UpdatedAt = model.Now()
|
|
_ = d.store.UpsertVM(ctx, vm)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (d *Daemon) collectStats(ctx context.Context, vm model.VMRecord) (model.VMStats, error) {
|
|
stats := model.VMStats{
|
|
CollectedAt: model.Now(),
|
|
SystemOverlayBytes: system.AllocatedBytes(vm.Runtime.SystemOverlay),
|
|
WorkDiskBytes: system.AllocatedBytes(vm.Runtime.WorkDiskPath),
|
|
MetricsRaw: system.ParseMetricsFile(vm.Runtime.MetricsPath),
|
|
}
|
|
if vm.Runtime.PID > 0 && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
|
ps, err := system.ReadProcessStats(ctx, vm.Runtime.PID)
|
|
if err == nil {
|
|
stats.CPUPercent = ps.CPUPercent
|
|
stats.RSSBytes = ps.RSSBytes
|
|
stats.VSZBytes = ps.VSZBytes
|
|
}
|
|
}
|
|
return stats, nil
|
|
}
|
|
|
|
func (d *Daemon) ensureSystemOverlay(ctx context.Context, vm *model.VMRecord) error {
|
|
if exists(vm.Runtime.SystemOverlay) {
|
|
return nil
|
|
}
|
|
_, err := d.runner.Run(ctx, "truncate", "-s", strconv.FormatInt(vm.Spec.SystemOverlaySizeByte, 10), vm.Runtime.SystemOverlay)
|
|
return err
|
|
}
|
|
|
|
func (d *Daemon) patchRootOverlay(ctx context.Context, vm model.VMRecord, image model.Image) error {
|
|
resolv := []byte(fmt.Sprintf("nameserver %s\n", d.config.DefaultDNS))
|
|
hostname := []byte(vm.Name + "\n")
|
|
hosts := []byte(fmt.Sprintf("127.0.0.1 localhost\n127.0.1.1 %s\n", vm.Name))
|
|
fstab, err := system.ReadDebugFSText(ctx, d.runner, vm.Runtime.DMDev, "/etc/fstab")
|
|
if err != nil {
|
|
fstab = ""
|
|
}
|
|
newFSTab := system.UpdateFSTab(fstab)
|
|
for guestPath, data := range map[string][]byte{
|
|
"/etc/resolv.conf": resolv,
|
|
"/etc/hostname": hostname,
|
|
"/etc/hosts": hosts,
|
|
"/etc/fstab": []byte(newFSTab),
|
|
} {
|
|
if err := system.WriteExt4File(ctx, d.runner, vm.Runtime.DMDev, guestPath, data); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (d *Daemon) ensureWorkDisk(ctx context.Context, vm *model.VMRecord) error {
|
|
if exists(vm.Runtime.WorkDiskPath) {
|
|
return nil
|
|
}
|
|
if _, err := d.runner.Run(ctx, "truncate", "-s", strconv.FormatInt(vm.Spec.WorkDiskSizeBytes, 10), vm.Runtime.WorkDiskPath); err != nil {
|
|
return err
|
|
}
|
|
if _, err := d.runner.Run(ctx, "mkfs.ext4", "-F", vm.Runtime.WorkDiskPath); err != nil {
|
|
return err
|
|
}
|
|
rootMount, cleanupRoot, err := system.MountTempDir(ctx, d.runner, vm.Runtime.DMDev, true)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer cleanupRoot()
|
|
workMount, cleanupWork, err := system.MountTempDir(ctx, d.runner, vm.Runtime.WorkDiskPath, false)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer cleanupWork()
|
|
if err := system.CopyDirContents(ctx, d.runner, filepath.Join(rootMount, "root"), workMount, true); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (d *Daemon) ensureBridge(ctx context.Context) error {
|
|
if _, err := d.runner.Run(ctx, "ip", "link", "show", d.config.BridgeName); err == nil {
|
|
_, err = d.runner.RunSudo(ctx, "ip", "link", "set", d.config.BridgeName, "up")
|
|
return err
|
|
}
|
|
if _, err := d.runner.RunSudo(ctx, "ip", "link", "add", "name", d.config.BridgeName, "type", "bridge"); err != nil {
|
|
return err
|
|
}
|
|
if _, err := d.runner.RunSudo(ctx, "ip", "addr", "add", fmt.Sprintf("%s/%s", d.config.BridgeIP, d.config.CIDR), "dev", d.config.BridgeName); err != nil {
|
|
return err
|
|
}
|
|
_, err := d.runner.RunSudo(ctx, "ip", "link", "set", d.config.BridgeName, "up")
|
|
return err
|
|
}
|
|
|
|
func (d *Daemon) ensureSocketDir() error {
|
|
return os.MkdirAll(d.layout.RuntimeDir, 0o755)
|
|
}
|
|
|
|
func (d *Daemon) createTap(ctx context.Context, tap string) error {
|
|
if _, err := d.runner.Run(ctx, "ip", "link", "show", tap); err == nil {
|
|
_, _ = d.runner.RunSudo(ctx, "ip", "link", "del", tap)
|
|
}
|
|
if _, err := d.runner.RunSudo(ctx, "ip", "tuntap", "add", "dev", tap, "mode", "tap", "user", strconv.Itoa(os.Getuid()), "group", strconv.Itoa(os.Getgid())); err != nil {
|
|
return err
|
|
}
|
|
if _, err := d.runner.RunSudo(ctx, "ip", "link", "set", tap, "master", d.config.BridgeName); err != nil {
|
|
return err
|
|
}
|
|
if _, err := d.runner.RunSudo(ctx, "ip", "link", "set", tap, "up"); err != nil {
|
|
return err
|
|
}
|
|
_, err := d.runner.RunSudo(ctx, "ip", "link", "set", d.config.BridgeName, "up")
|
|
return err
|
|
}
|
|
|
|
func (d *Daemon) firecrackerBinary() (string, error) {
|
|
if d.config.FirecrackerBin == "" {
|
|
return "", fmt.Errorf("firecracker binary not configured; %s", paths.RuntimeBundleHint())
|
|
}
|
|
path := d.config.FirecrackerBin
|
|
if !exists(path) {
|
|
return "", fmt.Errorf("firecracker binary not found at %s; %s", path, paths.RuntimeBundleHint())
|
|
}
|
|
return path, nil
|
|
}
|
|
|
|
func (d *Daemon) ensureSocketAccess(ctx context.Context, apiSock string) error {
|
|
if _, err := d.runner.RunSudo(ctx, "chown", fmt.Sprintf("%d:%d", os.Getuid(), os.Getgid()), apiSock); err != nil {
|
|
return err
|
|
}
|
|
_, err := d.runner.RunSudo(ctx, "chmod", "600", apiSock)
|
|
return err
|
|
}
|
|
|
|
func (d *Daemon) findFirecrackerPID(ctx context.Context, apiSock string) (int, error) {
|
|
out, err := d.runner.Run(ctx, "pgrep", "-n", "-f", apiSock)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return strconv.Atoi(strings.TrimSpace(string(out)))
|
|
}
|
|
|
|
func (d *Daemon) resolveFirecrackerPID(ctx context.Context, machine *firecracker.Machine, apiSock string) int {
|
|
if pid, err := d.findFirecrackerPID(ctx, apiSock); err == nil && pid > 0 {
|
|
return pid
|
|
}
|
|
if machine != nil {
|
|
if pid, err := machine.PID(); err == nil && pid > 0 {
|
|
return pid
|
|
}
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func (d *Daemon) sendCtrlAltDel(ctx context.Context, vm model.VMRecord) error {
|
|
if err := d.ensureSocketAccess(ctx, vm.Runtime.APISockPath); err != nil {
|
|
return err
|
|
}
|
|
client := firecracker.New(vm.Runtime.APISockPath, d.logger)
|
|
return client.SendCtrlAltDel(ctx)
|
|
}
|
|
|
|
func (d *Daemon) waitForExit(ctx context.Context, pid int, apiSock string, timeout time.Duration) error {
|
|
deadline := time.Now().Add(timeout)
|
|
for {
|
|
if !system.ProcessRunning(pid, apiSock) {
|
|
return nil
|
|
}
|
|
if time.Now().After(deadline) {
|
|
return fmt.Errorf("timed out waiting for VM to exit")
|
|
}
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
case <-time.After(100 * time.Millisecond):
|
|
}
|
|
}
|
|
}
|
|
|
|
func (d *Daemon) cleanupRuntime(ctx context.Context, vm model.VMRecord, preserveDisks bool) error {
|
|
if d.logger != nil {
|
|
d.logger.Debug("cleanup runtime", append(vmLogAttrs(vm), "preserve_disks", preserveDisks)...)
|
|
}
|
|
if vm.Runtime.PID > 0 && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
|
_ = d.killVMProcess(ctx, vm.Runtime.PID)
|
|
}
|
|
if vm.Runtime.TapDevice != "" {
|
|
_, _ = d.runner.RunSudo(ctx, "ip", "link", "del", vm.Runtime.TapDevice)
|
|
}
|
|
if vm.Runtime.APISockPath != "" {
|
|
_ = os.Remove(vm.Runtime.APISockPath)
|
|
}
|
|
snapshotErr := d.cleanupDMSnapshot(ctx, dmSnapshotHandles{
|
|
BaseLoop: vm.Runtime.BaseLoop,
|
|
COWLoop: vm.Runtime.COWLoop,
|
|
DMName: vm.Runtime.DMName,
|
|
DMDev: vm.Runtime.DMDev,
|
|
})
|
|
if vm.Spec.NATEnabled {
|
|
_ = d.ensureNAT(ctx, vm, false)
|
|
}
|
|
_ = d.removeDNS(ctx, vm.Runtime.DNSName)
|
|
if !preserveDisks && vm.Runtime.VMDir != "" {
|
|
return errors.Join(snapshotErr, os.RemoveAll(vm.Runtime.VMDir))
|
|
}
|
|
return snapshotErr
|
|
}
|
|
|
|
func clearRuntimeHandles(vm *model.VMRecord) {
|
|
vm.Runtime.PID = 0
|
|
vm.Runtime.APISockPath = ""
|
|
vm.Runtime.TapDevice = ""
|
|
vm.Runtime.BaseLoop = ""
|
|
vm.Runtime.COWLoop = ""
|
|
vm.Runtime.DMName = ""
|
|
vm.Runtime.DMDev = ""
|
|
}
|
|
|
|
func (d *Daemon) setDNS(ctx context.Context, vmName, guestIP string) error {
|
|
if dataFile := strings.TrimSpace(d.config.MapDNSDataFile); dataFile != "" {
|
|
if err := os.MkdirAll(filepath.Dir(dataFile), 0o755); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
_, err := d.runner.Run(ctx, d.mapdnsBinary(), d.mapdnsArgs("set", vmName+".vm", guestIP)...)
|
|
if err == nil && d.logger != nil {
|
|
d.logger.Debug("dns record set", "dns_name", vmName+".vm", "guest_ip", guestIP)
|
|
}
|
|
return err
|
|
}
|
|
|
|
func (d *Daemon) removeDNS(ctx context.Context, dnsName string) error {
|
|
if dnsName == "" {
|
|
return nil
|
|
}
|
|
_, err := d.runner.Run(ctx, d.mapdnsBinary(), d.mapdnsArgs("rm", dnsName)...)
|
|
if err != nil && strings.Contains(err.Error(), "not found") {
|
|
return nil
|
|
}
|
|
if err == nil && d.logger != nil {
|
|
d.logger.Debug("dns record removed", "dns_name", dnsName)
|
|
}
|
|
return err
|
|
}
|
|
|
|
func (d *Daemon) killVMProcess(ctx context.Context, pid int) error {
|
|
_, err := d.runner.RunSudo(ctx, "kill", "-KILL", strconv.Itoa(pid))
|
|
return err
|
|
}
|
|
|
|
func (d *Daemon) generateName(ctx context.Context) (string, error) {
|
|
if exists(d.config.NamegenPath) {
|
|
out, err := d.runner.Run(ctx, d.config.NamegenPath)
|
|
if err == nil {
|
|
name := strings.TrimSpace(string(out))
|
|
if name != "" {
|
|
return name, nil
|
|
}
|
|
}
|
|
}
|
|
return "vm-" + strconv.FormatInt(time.Now().Unix(), 10), nil
|
|
}
|
|
|
|
func bridgePrefix(bridgeIP string) string {
|
|
parts := strings.Split(bridgeIP, ".")
|
|
if len(parts) < 3 {
|
|
return bridgeIP
|
|
}
|
|
return strings.Join(parts[:3], ".")
|
|
}
|
|
|
|
func optionalIntOrDefault(value *int, fallback int) int {
|
|
if value != nil {
|
|
return *value
|
|
}
|
|
return fallback
|
|
}
|
|
|
|
func validateOptionalPositiveSetting(label string, value *int) error {
|
|
if value == nil {
|
|
return nil
|
|
}
|
|
if *value <= 0 {
|
|
return fmt.Errorf("%s must be a positive integer", label)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (d *Daemon) mapdnsBinary() string {
|
|
if value := strings.TrimSpace(d.config.MapDNSBin); value != "" {
|
|
return value
|
|
}
|
|
return "mapdns"
|
|
}
|
|
|
|
func (d *Daemon) mapdnsArgs(subcommand string, args ...string) []string {
|
|
out := []string{subcommand}
|
|
if value := strings.TrimSpace(d.config.MapDNSDataFile); value != "" {
|
|
out = append(out, "--data-file", value)
|
|
}
|
|
out = append(out, args...)
|
|
return out
|
|
}
|