daemon: rewrite ensureWorkDisk no-seed path to skip the mount + cp

The no-seed branch used to mount the base rootfs read-only, mount
the freshly mkfs'd work disk read-write, sudo-cp /root from one to
the other, then flatten any accidental /root/root/ nesting. Five
sudo call sites packed into a fallback that the common image path
doesn't even exercise.

Replace with: `mkfs.ext4 -F -E root_owner=0:0` and nothing else.
mkfs already stamps inode 2 as root:root:0755 — sshd's StrictModes
walks that dir's ownership when the work disk mounts at /root in
the guest, so getting it right from mkfs means authsync can just
write authorized_keys without any repair pass.

Tradeoff: no-seed VMs lose the base rootfs's default /root dotfiles
(.bashrc, .profile). The no-seed path is explicitly the degraded
fallback — `banger doctor` already warns about it — and users who
want those back have two documented knobs: rebuild the image with
a work-seed, or land them via [[file_sync]].

Sudo call sites removed: 5 (MountTempDir × 2, sudo cp -a,
flattenNestedWorkHome's chmod/cp/rm). flattenNestedWorkHome itself
stays alive for now — authsync + image_seed still call it — and
gets deleted in commit 5 once its last caller goes away.

While here: fix the freshly-added EnsureExt4RootPerms helper.
`set_inode_field <2> mode N` overwrites the full i_mode word
instead of preserving the type nibble, so the initial
implementation that passed just the permission bits (0755) would
reset the fs root to regular-file shape and break the next kernel
mount with "Structure needs cleaning." The corrected call OR's in
S_IFDIR (0o040000) explicitly. Test updated to match.

Smoke: 21/21 scenarios green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Thales Maciel 2026-04-23 18:09:32 -03:00
parent 77043966d4
commit 0e28504892
No known key found for this signature in database
GPG key ID: 33112E6833C34679
3 changed files with 37 additions and 53 deletions

View file

@ -110,32 +110,26 @@ func (s *VMService) ensureWorkDisk(ctx context.Context, vm *model.VMRecord, imag
}
return workDiskPreparation{ClonedFromSeed: true}, nil
}
// No seed: build an empty work disk. `-E root_owner=0:0` stamps
// inode 2 (the fs root, which becomes /root inside the guest) as
// root:root:0755 up front. sshd's StrictModes walks that dir's
// ownership and mode, so getting it right from mkfs means the
// authsync step can just write authorized_keys without any
// repair pass.
//
// Unlike the pre-refactor flow there is no "copy /root from the
// base rootfs" step. The no-seed path is the degraded fallback
// (the common case has a work-seed artifact and hits the branch
// above). Dropping the copy eliminates 4 sudo call sites — mount
// base ro, mount work rw, sudo cp -a, flattenNestedWorkHome —
// at the cost of losing default distro dotfiles on no-seed VMs.
// Users who need those should either rebuild the image with a
// work-seed (the documented path) or land them via [[file_sync]].
vmCreateStage(ctx, "prepare_work_disk", "creating empty work disk")
if _, err := s.runner.Run(ctx, "truncate", "-s", strconv.FormatInt(vm.Spec.WorkDiskSizeBytes, 10), vm.Runtime.WorkDiskPath); err != nil {
return workDiskPreparation{}, err
}
if _, err := s.runner.Run(ctx, "mkfs.ext4", "-F", vm.Runtime.WorkDiskPath); err != nil {
return workDiskPreparation{}, err
}
dmDev := s.vmHandles(vm.ID).DMDev
if dmDev == "" {
return workDiskPreparation{}, fmt.Errorf("vm %q: DM device not in handle cache", vm.ID)
}
rootMount, cleanupRoot, err := system.MountTempDir(ctx, s.runner, dmDev, true)
if err != nil {
return workDiskPreparation{}, err
}
defer cleanupRoot()
workMount, cleanupWork, err := system.MountTempDir(ctx, s.runner, vm.Runtime.WorkDiskPath, false)
if err != nil {
return workDiskPreparation{}, err
}
defer cleanupWork()
vmCreateStage(ctx, "prepare_work_disk", "copying /root into work disk")
if err := system.CopyDirContents(ctx, s.runner, filepath.Join(rootMount, "root"), workMount, true); err != nil {
return workDiskPreparation{}, err
}
if err := flattenNestedWorkHome(ctx, s.runner, workMount); err != nil {
if _, err := s.runner.Run(ctx, "mkfs.ext4", "-F", "-E", "root_owner=0:0", vm.Runtime.WorkDiskPath); err != nil {
return workDiskPreparation{}, err
}
return workDiskPreparation{}, nil

View file

@ -85,37 +85,23 @@ func WriteExt4FileOwned(ctx context.Context, runner CommandRunner, imagePath, gu
return debugfsScript(ctx, runner, imagePath, &script)
}
// SetExt4Ownership adjusts an existing inode's uid/gid/mode in a
// single debugfs batch. Does not check whether the path exists —
// callers are expected to have just created it, or to know it's
// already there.
func SetExt4Ownership(ctx context.Context, runner CommandRunner, imagePath, guestPath string, mode os.FileMode, uid, gid int) error {
escaped, err := escapeDebugfsGuestPath(guestPath)
if err != nil {
return err
}
// debugfs exposes two spellings for mode edits: `set_inode_field
// <path> mode 0<octal>` which overwrites the full i_mode word
// (requiring callers to bake in the file-type nibble), and `sif
// <path> mode 0<octal>` which takes a permission-only value and
// preserves the existing type bits. We take the permission bits
// from the caller, so `sif` is the right verb.
var buf bytes.Buffer
fmt.Fprintf(&buf, "sif %s mode 0%o\n", escaped, uint32(mode.Perm())&0o7777)
fmt.Fprintf(&buf, "set_inode_field %s uid %d\n", escaped, uid)
fmt.Fprintf(&buf, "set_inode_field %s gid %d\n", escaped, gid)
return debugfsScript(ctx, runner, imagePath, &buf)
}
// EnsureExt4RootPerms sets the filesystem root inode (inode <2>,
// which is what `/` resolves to) to the given mode + owner. sshd's
// StrictModes inside the guest walks the home directory's ownership;
// the work disk is mounted at /root in the guest and its root inode
// is /root as far as sshd is concerned. Default-safe value: 0755
// root:root.
// which is what `/` resolves to) to the given directory mode + owner.
// sshd's StrictModes inside the guest walks the home directory's
// ownership; the work disk is mounted at /root in the guest, so its
// root inode is /root as far as sshd is concerned. Default-safe
// value: 0755 root:root.
//
// Note on debugfs mode semantics: `set_inode_field <path> mode N`
// OVERWRITES the full i_mode word — it does NOT preserve the type
// nibble. Passing just the permission bits (e.g. 0755) would reset
// the root inode to a regular-file shape, and the next kernel mount
// would fail with "Structure needs cleaning." The constant ORed
// below restores the S_IFDIR type bits explicitly.
func EnsureExt4RootPerms(ctx context.Context, runner CommandRunner, imagePath string, mode os.FileMode, uid, gid int) error {
fullMode := ext4ModeDirectory | (uint32(mode.Perm()) & 0o7777)
var script bytes.Buffer
fmt.Fprintf(&script, "sif <2> mode 0%o\n", uint32(mode.Perm())&0o7777)
fmt.Fprintf(&script, "set_inode_field <2> mode 0%o\n", fullMode)
fmt.Fprintf(&script, "set_inode_field <2> uid %d\n", uid)
fmt.Fprintf(&script, "set_inode_field <2> gid %d\n", gid)
return debugfsScript(ctx, runner, imagePath, &script)

View file

@ -285,9 +285,13 @@ func TestEnsureExt4RootPerms_UsesRootInodeLiteral(t *testing.T) {
t.Fatalf("EnsureExt4RootPerms: %v", err)
}
// Must address inode 2 — the ext4 root directory.
if !strings.Contains(capturedScript, "sif <2> mode 0755") {
t.Fatalf("script missing root-inode mode line:\n%s", capturedScript)
// Must address inode 2 — the ext4 root directory — with the
// FULL i_mode word (S_IFDIR | 0755 = 040755). debugfs's
// set_inode_field doesn't preserve the type nibble, so passing
// just the permission bits (0755) would reset the root inode
// to regular-file shape and break the next kernel mount.
if !strings.Contains(capturedScript, "set_inode_field <2> mode 040755") {
t.Fatalf("script missing root-inode mode line with S_IFDIR+0755:\n%s", capturedScript)
}
if !strings.Contains(capturedScript, "set_inode_field <2> uid 0") {
t.Fatalf("script missing root-inode uid line:\n%s", capturedScript)