package daemon import ( "context" "database/sql" "errors" "fmt" "os" "path/filepath" "strings" "banger/internal/api" "banger/internal/imagecat" "banger/internal/model" "banger/internal/vmdns" ) // CreateVM is split into three phases so the global createVMMu guards // only the narrow name+IP reservation window, not the slow image // resolution or the multi-second boot flow: // // 1. Validate + resolve image. No global lock. Image auto-pull // self-locks via imageOpsMu (which is also now publication-only). // 2. Reserve a row: generate id, pick next IP, claim the name, // UpsertVM the "created" record. Held under createVMMu so two // concurrent `vm create --name foo` calls can't both think they // won. // 3. Boot. Only the per-VM lock is held — parallel creates against // different VMs fully overlap. func (d *Daemon) CreateVM(ctx context.Context, params api.VMCreateParams) (vm model.VMRecord, err error) { op := d.beginOperation("vm.create") defer func() { if err != nil { op.fail(err) return } op.done(vmLogAttrs(vm)...) }() if err := validateOptionalPositiveSetting("vcpu", params.VCPUCount); err != nil { return model.VMRecord{}, err } if err := validateOptionalPositiveSetting("memory", params.MemoryMiB); err != nil { return model.VMRecord{}, err } imageName := params.ImageName if imageName == "" { imageName = d.config.DefaultImageName } vmCreateStage(ctx, "resolve_image", "resolving image") image, err := d.findOrAutoPullImage(ctx, imageName) if err != nil { return model.VMRecord{}, err } vmCreateStage(ctx, "resolve_image", "using image "+image.Name) op.stage("image_resolved", imageLogAttrs(image)...) systemOverlaySize := int64(model.DefaultSystemOverlaySize) if params.SystemOverlaySize != "" { systemOverlaySize, err = model.ParseSize(params.SystemOverlaySize) if err != nil { return model.VMRecord{}, err } } workDiskSize := int64(model.DefaultWorkDiskSize) if params.WorkDiskSize != "" { workDiskSize, err = model.ParseSize(params.WorkDiskSize) if err != nil { return model.VMRecord{}, err } } spec := model.VMSpec{ VCPUCount: optionalIntOrDefault(params.VCPUCount, model.DefaultVCPUCount), MemoryMiB: optionalIntOrDefault(params.MemoryMiB, model.DefaultMemoryMiB), SystemOverlaySizeByte: systemOverlaySize, WorkDiskSizeBytes: workDiskSize, NATEnabled: params.NATEnabled, } vm, err = d.reserveVM(ctx, strings.TrimSpace(params.Name), image, spec) if err != nil { return model.VMRecord{}, err } op.stage("persisted", vmLogAttrs(vm)...) vmCreateBindVM(ctx, vm) vmCreateStage(ctx, "reserve_vm", fmt.Sprintf("allocated %s (%s)", vm.Name, vm.Runtime.GuestIP)) unlockVM := d.lockVMID(vm.ID) defer unlockVM() if params.NoStart { vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped if err := d.store.UpsertVM(ctx, vm); err != nil { return model.VMRecord{}, err } return vm, nil } return d.startVMLocked(ctx, vm, image) } // reserveVM holds createVMMu only long enough to verify the name is // free, allocate a guest IP from the store, and persist the "created" // reservation row. Everything else (image resolution upstream, boot // downstream) runs outside this lock. func (d *Daemon) reserveVM(ctx context.Context, requestedName string, image model.Image, spec model.VMSpec) (model.VMRecord, error) { d.createVMMu.Lock() defer d.createVMMu.Unlock() name := requestedName if name == "" { generated, err := d.generateName(ctx) if err != nil { return model.VMRecord{}, err } name = generated } // Exact-name lookup. Using FindVM here would also match a new name // that merely prefixes some existing VM's id or another VM's name, // falsely rejecting perfectly valid names. if _, err := d.store.GetVMByName(ctx, name); err == nil { return model.VMRecord{}, fmt.Errorf("vm name already exists: %s", name) } else if !errors.Is(err, sql.ErrNoRows) { return model.VMRecord{}, err } id, err := model.NewID() if err != nil { return model.VMRecord{}, err } guestIP, err := d.store.NextGuestIP(ctx, bridgePrefix(d.config.BridgeIP)) if err != nil { return model.VMRecord{}, err } vmDir := filepath.Join(d.layout.VMsDir, id) if err := os.MkdirAll(vmDir, 0o755); err != nil { return model.VMRecord{}, err } vsockCID, err := defaultVSockCID(guestIP) if err != nil { return model.VMRecord{}, err } now := model.Now() vm := model.VMRecord{ ID: id, Name: name, ImageID: image.ID, State: model.VMStateCreated, CreatedAt: now, UpdatedAt: now, LastTouchedAt: now, Spec: spec, Runtime: model.VMRuntime{ State: model.VMStateCreated, GuestIP: guestIP, DNSName: vmdns.RecordName(name), VMDir: vmDir, VSockPath: defaultVSockPath(d.layout.RuntimeDir, id), VSockCID: vsockCID, SystemOverlay: filepath.Join(vmDir, "system.cow"), WorkDiskPath: filepath.Join(vmDir, "root.ext4"), LogPath: filepath.Join(vmDir, "firecracker.log"), MetricsPath: filepath.Join(vmDir, "metrics.json"), }, } if err := d.store.UpsertVM(ctx, vm); err != nil { return model.VMRecord{}, err } return vm, nil } // findOrAutoPullImage tries the local image store first; if the name // isn't registered but matches an entry in the embedded imagecat // catalog, it auto-pulls the bundle so `vm create --image foo` (and // therefore `vm run`) works on a fresh host without the user having // to run `image pull` first. func (d *Daemon) findOrAutoPullImage(ctx context.Context, idOrName string) (model.Image, error) { image, err := d.imageSvc().FindImage(ctx, idOrName) if err == nil { return image, nil } catalog, loadErr := imagecat.LoadEmbedded() if loadErr != nil { return model.Image{}, err } entry, lookupErr := catalog.Lookup(idOrName) if lookupErr != nil { // Not in the catalog either — surface the original not-found. return model.Image{}, err } vmCreateStage(ctx, "auto_pull_image", fmt.Sprintf("pulling %s from image catalog", entry.Name)) if _, pullErr := d.imageSvc().PullImage(ctx, api.ImagePullParams{Ref: entry.Name}); pullErr != nil { return model.Image{}, fmt.Errorf("auto-pull image %q: %w", entry.Name, pullErr) } return d.imageSvc().FindImage(ctx, idOrName) }