package daemon import ( "context" "fmt" "os" "strings" "time" "banger/internal/guest" "banger/internal/model" "banger/internal/system" ) func (s *ImageService) seedAuthorizedKeyOnExt4Image(ctx context.Context, imagePath string) (string, error) { if strings.TrimSpace(s.config.SSHKeyPath) == "" { return "", nil } fingerprint, err := guest.AuthorizedPublicKeyFingerprint(s.config.SSHKeyPath) if err != nil { return "", fmt.Errorf("derive authorized ssh key fingerprint: %w", err) } publicKey, err := guest.AuthorizedPublicKey(s.config.SSHKeyPath) if err != nil { return "", fmt.Errorf("derive authorized ssh key: %w", err) } if err := provisionAuthorizedKey(ctx, s.runner, imagePath, publicKey); err != nil { return "", err } return fingerprint, nil } // refreshManagedWorkSeedFingerprint re-seeds work-seed.ext4 with the // daemon's current SSH key when a previously-stored fingerprint has // gone stale (host key rotated, image rebuilt without a new seed). // // This path is reachable from concurrent vm.create RPCs: each one // reads the same stale image.SeededSSHPublicKeyFingerprint from the // store and races into here. Modifying the seed in place via // e2rm/e2cp is not concurrent-read-safe — peer vm.create calls doing // `MaterializeWorkDisk` in parallel `RdumpExt4Dir` the seed and // observe a torn ext4 image ("Superblock checksum does not match"). // // Fix: stage the rewrite on a sibling tmpfile and atomic-rename. A // concurrent reader either has the file open (kernel keeps the // pre-rename inode alive) or opens after the rename (sees the new // inode) — never observes a partial state. Two concurrent refreshes // are idempotent (same daemon, same SSH key) so unique tmp suffixes // are enough; whichever rename lands last wins, with identical // content. UpsertImage runs after the rename so the recorded // fingerprint always matches what's actually on disk for any reader // that picks up the image record after this point. func (s *ImageService) refreshManagedWorkSeedFingerprint(ctx context.Context, image model.Image, fingerprint string) error { if !image.Managed || strings.TrimSpace(image.WorkSeedPath) == "" || strings.TrimSpace(fingerprint) == "" { return nil } // Unique sibling tmp path: same dir guarantees a same-FS rename. // Two concurrent refreshes get distinct paths so they don't clobber // each other's tmpfile mid-write. tmpPath := fmt.Sprintf("%s.refresh.%d-%d.tmp", image.WorkSeedPath, os.Getpid(), time.Now().UnixNano()) if err := system.CopyFilePreferClone(image.WorkSeedPath, tmpPath); err != nil { return fmt.Errorf("stage seed for refresh: %w", err) } committed := false defer func() { if !committed { _ = os.Remove(tmpPath) } }() seededFingerprint, err := s.seedAuthorizedKeyOnExt4Image(ctx, tmpPath) if err != nil { return err } if seededFingerprint == "" || seededFingerprint == image.SeededSSHPublicKeyFingerprint { return nil } if err := os.Rename(tmpPath, image.WorkSeedPath); err != nil { return fmt.Errorf("commit seed refresh: %w", err) } committed = true image.SeededSSHPublicKeyFingerprint = seededFingerprint image.UpdatedAt = model.Now() return s.store.UpsertImage(ctx, image) }