From 0e285048920f7e403642b9bcf72abfd1edca0f9f Mon Sep 17 00:00:00 2001 From: Thales Maciel Date: Thu, 23 Apr 2026 18:09:32 -0300 Subject: [PATCH] daemon: rewrite ensureWorkDisk no-seed path to skip the mount + cp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The no-seed branch used to mount the base rootfs read-only, mount the freshly mkfs'd work disk read-write, sudo-cp /root from one to the other, then flatten any accidental /root/root/ nesting. Five sudo call sites packed into a fallback that the common image path doesn't even exercise. Replace with: `mkfs.ext4 -F -E root_owner=0:0` and nothing else. mkfs already stamps inode 2 as root:root:0755 — sshd's StrictModes walks that dir's ownership when the work disk mounts at /root in the guest, so getting it right from mkfs means authsync can just write authorized_keys without any repair pass. Tradeoff: no-seed VMs lose the base rootfs's default /root dotfiles (.bashrc, .profile). The no-seed path is explicitly the degraded fallback — `banger doctor` already warns about it — and users who want those back have two documented knobs: rebuild the image with a work-seed, or land them via [[file_sync]]. Sudo call sites removed: 5 (MountTempDir × 2, sudo cp -a, flattenNestedWorkHome's chmod/cp/rm). flattenNestedWorkHome itself stays alive for now — authsync + image_seed still call it — and gets deleted in commit 5 once its last caller goes away. While here: fix the freshly-added EnsureExt4RootPerms helper. `set_inode_field <2> mode N` overwrites the full i_mode word instead of preserving the type nibble, so the initial implementation that passed just the permission bits (0755) would reset the fs root to regular-file shape and break the next kernel mount with "Structure needs cleaning." The corrected call OR's in S_IFDIR (0o040000) explicitly. Test updated to match. Smoke: 21/21 scenarios green. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/daemon/vm_disk.go | 38 ++++++++++++++------------------ internal/system/ext4.go | 42 ++++++++++++------------------------ internal/system/ext4_test.go | 10 ++++++--- 3 files changed, 37 insertions(+), 53 deletions(-) diff --git a/internal/daemon/vm_disk.go b/internal/daemon/vm_disk.go index 7cb53ee..660676d 100644 --- a/internal/daemon/vm_disk.go +++ b/internal/daemon/vm_disk.go @@ -110,32 +110,26 @@ func (s *VMService) ensureWorkDisk(ctx context.Context, vm *model.VMRecord, imag } return workDiskPreparation{ClonedFromSeed: true}, nil } + // No seed: build an empty work disk. `-E root_owner=0:0` stamps + // inode 2 (the fs root, which becomes /root inside the guest) as + // root:root:0755 up front. sshd's StrictModes walks that dir's + // ownership and mode, so getting it right from mkfs means the + // authsync step can just write authorized_keys without any + // repair pass. + // + // Unlike the pre-refactor flow there is no "copy /root from the + // base rootfs" step. The no-seed path is the degraded fallback + // (the common case has a work-seed artifact and hits the branch + // above). Dropping the copy eliminates 4 sudo call sites — mount + // base ro, mount work rw, sudo cp -a, flattenNestedWorkHome — + // at the cost of losing default distro dotfiles on no-seed VMs. + // Users who need those should either rebuild the image with a + // work-seed (the documented path) or land them via [[file_sync]]. vmCreateStage(ctx, "prepare_work_disk", "creating empty work disk") if _, err := s.runner.Run(ctx, "truncate", "-s", strconv.FormatInt(vm.Spec.WorkDiskSizeBytes, 10), vm.Runtime.WorkDiskPath); err != nil { return workDiskPreparation{}, err } - if _, err := s.runner.Run(ctx, "mkfs.ext4", "-F", vm.Runtime.WorkDiskPath); err != nil { - return workDiskPreparation{}, err - } - dmDev := s.vmHandles(vm.ID).DMDev - if dmDev == "" { - return workDiskPreparation{}, fmt.Errorf("vm %q: DM device not in handle cache", vm.ID) - } - rootMount, cleanupRoot, err := system.MountTempDir(ctx, s.runner, dmDev, true) - if err != nil { - return workDiskPreparation{}, err - } - defer cleanupRoot() - workMount, cleanupWork, err := system.MountTempDir(ctx, s.runner, vm.Runtime.WorkDiskPath, false) - if err != nil { - return workDiskPreparation{}, err - } - defer cleanupWork() - vmCreateStage(ctx, "prepare_work_disk", "copying /root into work disk") - if err := system.CopyDirContents(ctx, s.runner, filepath.Join(rootMount, "root"), workMount, true); err != nil { - return workDiskPreparation{}, err - } - if err := flattenNestedWorkHome(ctx, s.runner, workMount); err != nil { + if _, err := s.runner.Run(ctx, "mkfs.ext4", "-F", "-E", "root_owner=0:0", vm.Runtime.WorkDiskPath); err != nil { return workDiskPreparation{}, err } return workDiskPreparation{}, nil diff --git a/internal/system/ext4.go b/internal/system/ext4.go index 70eb902..8c9ebdc 100644 --- a/internal/system/ext4.go +++ b/internal/system/ext4.go @@ -85,37 +85,23 @@ func WriteExt4FileOwned(ctx context.Context, runner CommandRunner, imagePath, gu return debugfsScript(ctx, runner, imagePath, &script) } -// SetExt4Ownership adjusts an existing inode's uid/gid/mode in a -// single debugfs batch. Does not check whether the path exists — -// callers are expected to have just created it, or to know it's -// already there. -func SetExt4Ownership(ctx context.Context, runner CommandRunner, imagePath, guestPath string, mode os.FileMode, uid, gid int) error { - escaped, err := escapeDebugfsGuestPath(guestPath) - if err != nil { - return err - } - // debugfs exposes two spellings for mode edits: `set_inode_field - // mode 0` which overwrites the full i_mode word - // (requiring callers to bake in the file-type nibble), and `sif - // mode 0` which takes a permission-only value and - // preserves the existing type bits. We take the permission bits - // from the caller, so `sif` is the right verb. - var buf bytes.Buffer - fmt.Fprintf(&buf, "sif %s mode 0%o\n", escaped, uint32(mode.Perm())&0o7777) - fmt.Fprintf(&buf, "set_inode_field %s uid %d\n", escaped, uid) - fmt.Fprintf(&buf, "set_inode_field %s gid %d\n", escaped, gid) - return debugfsScript(ctx, runner, imagePath, &buf) -} - // EnsureExt4RootPerms sets the filesystem root inode (inode <2>, -// which is what `/` resolves to) to the given mode + owner. sshd's -// StrictModes inside the guest walks the home directory's ownership; -// the work disk is mounted at /root in the guest and its root inode -// is /root as far as sshd is concerned. Default-safe value: 0755 -// root:root. +// which is what `/` resolves to) to the given directory mode + owner. +// sshd's StrictModes inside the guest walks the home directory's +// ownership; the work disk is mounted at /root in the guest, so its +// root inode is /root as far as sshd is concerned. Default-safe +// value: 0755 root:root. +// +// Note on debugfs mode semantics: `set_inode_field mode N` +// OVERWRITES the full i_mode word — it does NOT preserve the type +// nibble. Passing just the permission bits (e.g. 0755) would reset +// the root inode to a regular-file shape, and the next kernel mount +// would fail with "Structure needs cleaning." The constant ORed +// below restores the S_IFDIR type bits explicitly. func EnsureExt4RootPerms(ctx context.Context, runner CommandRunner, imagePath string, mode os.FileMode, uid, gid int) error { + fullMode := ext4ModeDirectory | (uint32(mode.Perm()) & 0o7777) var script bytes.Buffer - fmt.Fprintf(&script, "sif <2> mode 0%o\n", uint32(mode.Perm())&0o7777) + fmt.Fprintf(&script, "set_inode_field <2> mode 0%o\n", fullMode) fmt.Fprintf(&script, "set_inode_field <2> uid %d\n", uid) fmt.Fprintf(&script, "set_inode_field <2> gid %d\n", gid) return debugfsScript(ctx, runner, imagePath, &script) diff --git a/internal/system/ext4_test.go b/internal/system/ext4_test.go index 26cf2e7..3e23bb5 100644 --- a/internal/system/ext4_test.go +++ b/internal/system/ext4_test.go @@ -285,9 +285,13 @@ func TestEnsureExt4RootPerms_UsesRootInodeLiteral(t *testing.T) { t.Fatalf("EnsureExt4RootPerms: %v", err) } - // Must address inode 2 — the ext4 root directory. - if !strings.Contains(capturedScript, "sif <2> mode 0755") { - t.Fatalf("script missing root-inode mode line:\n%s", capturedScript) + // Must address inode 2 — the ext4 root directory — with the + // FULL i_mode word (S_IFDIR | 0755 = 040755). debugfs's + // set_inode_field doesn't preserve the type nibble, so passing + // just the permission bits (0755) would reset the root inode + // to regular-file shape and break the next kernel mount. + if !strings.Contains(capturedScript, "set_inode_field <2> mode 040755") { + t.Fatalf("script missing root-inode mode line with S_IFDIR+0755:\n%s", capturedScript) } if !strings.Contains(capturedScript, "set_inode_field <2> uid 0") { t.Fatalf("script missing root-inode uid line:\n%s", capturedScript)