Manage image artifacts and show VM create progress

Stop relying on ad hoc rootfs handling by adding image promotion, managed work-seed fingerprint metadata, and lazy self-healing for older managed images after the first create.

Rebuild guest images with baked SSH access, a guest NIC bootstrap, and default opencode services, and add the staged Void kernel/initramfs/modules workflow so void-exp uses a matching Void boot stack.

Replace the opaque blocking vm.create RPC with a begin/status flow that prints live stages in the CLI while still waiting for vsock health and opencode on guest port 4096.

Validate with GOCACHE=/tmp/banger-gocache go test ./... and live void-exp create/delete smoke runs.
This commit is contained in:
Thales Maciel 2026-03-21 14:48:01 -03:00
parent 9f09b0d25c
commit 30f0c0b54a
No known key found for this signature in database
GPG key ID: 33112E6833C34679
37 changed files with 2334 additions and 99 deletions

View file

@ -49,10 +49,12 @@ func (d *Daemon) CreateVM(ctx context.Context, params api.VMCreateParams) (vm mo
if imageName == "" {
imageName = d.config.DefaultImageName
}
vmCreateStage(ctx, "resolve_image", "resolving image")
image, err := d.FindImage(ctx, imageName)
if err != nil {
return model.VMRecord{}, err
}
vmCreateStage(ctx, "resolve_image", "using image "+image.Name)
op.stage("image_resolved", imageLogAttrs(image)...)
name := strings.TrimSpace(params.Name)
if name == "" {
@ -126,6 +128,8 @@ func (d *Daemon) CreateVM(ctx context.Context, params api.VMCreateParams) (vm mo
MetricsPath: filepath.Join(vmDir, "metrics.json"),
},
}
vmCreateBindVM(ctx, vm)
vmCreateStage(ctx, "reserve_vm", fmt.Sprintf("allocated %s (%s)", vm.Name, vm.Runtime.GuestIP))
if err := d.store.UpsertVM(ctx, vm); err != nil {
return model.VMRecord{}, err
}
@ -168,6 +172,7 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod
op.done(vmLogAttrs(vm)...)
}()
op.stage("preflight")
vmCreateStage(ctx, "preflight", "checking host prerequisites")
if err := d.validateStartPrereqs(ctx, vm, image); err != nil {
return model.VMRecord{}, err
}
@ -209,11 +214,13 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod
}
op.stage("system_overlay", "overlay_path", vm.Runtime.SystemOverlay)
vmCreateStage(ctx, "prepare_rootfs", "preparing system overlay")
if err := d.ensureSystemOverlay(ctx, &vm); err != nil {
return model.VMRecord{}, err
}
op.stage("dm_snapshot", "dm_name", dmName)
vmCreateStage(ctx, "prepare_rootfs", "creating root filesystem snapshot")
handles, err := d.createDMSnapshot(ctx, image.RootfsPath, vm.Runtime.SystemOverlay, dmName)
if err != nil {
return model.VMRecord{}, err
@ -241,10 +248,12 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod
}
op.stage("patch_root_overlay")
vmCreateStage(ctx, "prepare_rootfs", "writing guest configuration")
if err := d.patchRootOverlay(ctx, vm, image); err != nil {
return cleanupOnErr(err)
}
op.stage("prepare_host_features")
vmCreateStage(ctx, "prepare_host_features", "preparing host-side vm features")
if err := d.prepareCapabilityHosts(ctx, &vm, image); err != nil {
return cleanupOnErr(err)
}
@ -265,6 +274,7 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod
return cleanupOnErr(err)
}
op.stage("firecracker_launch", "log_path", vm.Runtime.LogPath, "metrics_path", vm.Runtime.MetricsPath)
vmCreateStage(ctx, "boot_firecracker", "starting firecracker")
firecrackerCtx := context.Background()
machineConfig := firecracker.MachineConfig{
BinaryPath: fcPath,
@ -304,15 +314,18 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod
return cleanupOnErr(err)
}
op.stage("vsock_access", "vsock_path", vm.Runtime.VSockPath, "vsock_cid", vm.Runtime.VSockCID)
vmCreateStage(ctx, "wait_vsock_agent", "waiting for guest vsock agent")
if err := d.ensureSocketAccess(ctx, vm.Runtime.VSockPath, "firecracker vsock socket"); err != nil {
return cleanupOnErr(err)
}
op.stage("post_start_features")
vmCreateStage(ctx, "wait_guest_ready", "waiting for guest services")
if err := d.postStartCapabilities(ctx, vm, image); err != nil {
return cleanupOnErr(err)
}
system.TouchNow(&vm)
op.stage("persist")
vmCreateStage(ctx, "finalize", "saving vm state")
if err := d.store.UpsertVM(ctx, vm); err != nil {
return cleanupOnErr(err)
}
@ -777,58 +790,75 @@ func (d *Daemon) patchRootOverlay(ctx context.Context, vm model.VMRecord, image
return nil
}
func (d *Daemon) ensureWorkDisk(ctx context.Context, vm *model.VMRecord, image model.Image) error {
type workDiskPreparation struct {
ClonedFromSeed bool
}
func (d *Daemon) ensureWorkDisk(ctx context.Context, vm *model.VMRecord, image model.Image) (workDiskPreparation, error) {
if exists(vm.Runtime.WorkDiskPath) {
return nil
return workDiskPreparation{}, nil
}
if exists(image.WorkSeedPath) {
vmCreateStage(ctx, "prepare_work_disk", "cloning work seed")
if err := system.CopyFilePreferClone(image.WorkSeedPath, vm.Runtime.WorkDiskPath); err != nil {
return err
return workDiskPreparation{}, err
}
seedInfo, err := os.Stat(image.WorkSeedPath)
if err != nil {
return err
return workDiskPreparation{}, err
}
if vm.Spec.WorkDiskSizeBytes < seedInfo.Size() {
return fmt.Errorf("requested work disk size %d is smaller than seed image %d", vm.Spec.WorkDiskSizeBytes, seedInfo.Size())
return workDiskPreparation{}, fmt.Errorf("requested work disk size %d is smaller than seed image %d", vm.Spec.WorkDiskSizeBytes, seedInfo.Size())
}
if vm.Spec.WorkDiskSizeBytes > seedInfo.Size() {
vmCreateStage(ctx, "prepare_work_disk", "resizing work disk")
if err := system.ResizeExt4Image(ctx, d.runner, vm.Runtime.WorkDiskPath, vm.Spec.WorkDiskSizeBytes); err != nil {
return err
return workDiskPreparation{}, err
}
}
return nil
return workDiskPreparation{ClonedFromSeed: true}, nil
}
vmCreateStage(ctx, "prepare_work_disk", "creating empty work disk")
if _, err := d.runner.Run(ctx, "truncate", "-s", strconv.FormatInt(vm.Spec.WorkDiskSizeBytes, 10), vm.Runtime.WorkDiskPath); err != nil {
return err
return workDiskPreparation{}, err
}
if _, err := d.runner.Run(ctx, "mkfs.ext4", "-F", vm.Runtime.WorkDiskPath); err != nil {
return err
return workDiskPreparation{}, err
}
rootMount, cleanupRoot, err := system.MountTempDir(ctx, d.runner, vm.Runtime.DMDev, true)
if err != nil {
return err
return workDiskPreparation{}, err
}
defer cleanupRoot()
workMount, cleanupWork, err := system.MountTempDir(ctx, d.runner, vm.Runtime.WorkDiskPath, false)
if err != nil {
return err
return workDiskPreparation{}, err
}
defer cleanupWork()
vmCreateStage(ctx, "prepare_work_disk", "copying /root into work disk")
if err := system.CopyDirContents(ctx, d.runner, filepath.Join(rootMount, "root"), workMount, true); err != nil {
return err
return workDiskPreparation{}, err
}
if err := d.flattenNestedWorkHome(ctx, workMount); err != nil {
return err
return workDiskPreparation{}, err
}
return nil
return workDiskPreparation{}, nil
}
func (d *Daemon) ensureAuthorizedKeyOnWorkDisk(ctx context.Context, vm *model.VMRecord) error {
func (d *Daemon) ensureAuthorizedKeyOnWorkDisk(ctx context.Context, vm *model.VMRecord, image model.Image, prep workDiskPreparation) error {
fingerprint, err := guest.AuthorizedPublicKeyFingerprint(d.config.SSHKeyPath)
if err != nil {
return fmt.Errorf("derive authorized ssh key fingerprint: %w", err)
}
if prep.ClonedFromSeed && image.SeededSSHPublicKeyFingerprint != "" && image.SeededSSHPublicKeyFingerprint == fingerprint {
vmCreateStage(ctx, "prepare_work_disk", "using seeded SSH access")
return nil
}
publicKey, err := guest.AuthorizedPublicKey(d.config.SSHKeyPath)
if err != nil {
return fmt.Errorf("derive authorized ssh key: %w", err)
}
vmCreateStage(ctx, "prepare_work_disk", "repairing SSH access on work disk")
workMount, cleanupWork, err := system.MountTempDir(ctx, d.runner, vm.Runtime.WorkDiskPath, false)
if err != nil {
return err
@ -873,6 +903,12 @@ func (d *Daemon) ensureAuthorizedKeyOnWorkDisk(ctx context.Context, vm *model.VM
if _, err := d.runner.RunSudo(ctx, "install", "-m", "600", tmpPath, authorizedKeysPath); err != nil {
return err
}
if prep.ClonedFromSeed && image.Managed {
vmCreateStage(ctx, "prepare_work_disk", "refreshing managed work seed")
if err := d.refreshManagedWorkSeedFingerprint(ctx, image, fingerprint); err != nil {
return err
}
}
return nil
}