diff --git a/internal/cli/banger.go b/internal/cli/banger.go index 032662d..6a55166 100644 --- a/internal/cli/banger.go +++ b/internal/cli/banger.go @@ -1328,7 +1328,7 @@ func newVMWorkspaceExportCommand() *cobra.Command { cmd := &cobra.Command{ Use: "export ", Short: "Pull changes from a guest workspace back to the host as a patch", - Long: "Stage all changes inside the guest workspace (git add -A) and emit a binary-safe unified diff. Pass --base-commit with the head_commit from workspace prepare to capture changes even when the worker ran git commit inside the VM. Without --base-commit the diff is against the current guest HEAD, which misses committed changes.", + Long: "Emit a binary-safe unified diff of every change inside the guest workspace (committed since base + uncommitted + untracked, minus .gitignore). Non-mutating — the guest's index and working tree are untouched. Pass --base-commit with the head_commit from workspace prepare to capture changes even when the worker ran git commit inside the VM. Without --base-commit the diff is against the current guest HEAD, which misses committed changes.", Args: exactArgsUsage(1, "usage: banger vm workspace export "), ValidArgsFunction: completeVMNameOnlyAtPos0, Example: strings.TrimSpace(` diff --git a/internal/daemon/workspace.go b/internal/daemon/workspace.go index d24c585..58bae96 100644 --- a/internal/daemon/workspace.go +++ b/internal/daemon/workspace.go @@ -43,26 +43,18 @@ func (d *Daemon) ExportVMWorkspace(ctx context.Context, params api.WorkspaceExpo diffRef = "HEAD" } - // Stage all changes then emit a binary-safe unified diff against diffRef. - // After git add -A the index contains the full working state, so - // git diff --cached captures both committed deltas (HEAD moved - // past diffRef) and any additional uncommitted changes on top. - patchScript := fmt.Sprintf( - "set -euo pipefail\ncd %s\ngit add -A\ngit diff --cached %s --binary\n", - sess.ShellQuote(guestPath), - sess.ShellQuote(diffRef), - ) + // Both scripts run `git add -A` to capture the working tree + // (committed deltas + uncommitted modifications + untracked files + // minus .gitignore), but they route it through a throwaway index + // file instead of .git/index. Export is an observation step; the + // user's real staging area must stay exactly as they left it. + patchScript := exportScript(guestPath, diffRef, "--binary") patch, err := client.RunScriptOutput(ctx, patchScript) if err != nil { return api.WorkspaceExportResult{}, fmt.Errorf("export workspace diff: %w", err) } - // Enumerate changed paths (index already staged; this is a cheap read). - namesScript := fmt.Sprintf( - "set -euo pipefail\ncd %s\ngit diff --cached %s --name-only\n", - sess.ShellQuote(guestPath), - sess.ShellQuote(diffRef), - ) + namesScript := exportScript(guestPath, diffRef, "--name-only") namesOut, _ := client.RunScriptOutput(ctx, namesScript) var changed []string for _, line := range strings.Split(strings.TrimSpace(string(namesOut)), "\n") { @@ -80,6 +72,30 @@ func (d *Daemon) ExportVMWorkspace(ctx context.Context, params api.WorkspaceExpo }, nil } +// exportScript emits a shell snippet that diffs the working tree at +// guestPath against diffRef (HEAD or a commit SHA) WITHOUT touching +// the repo's real index. diffFlag selects the git-diff output mode +// ("--binary" for the patch body, "--name-only" for the file list). +// +// Mechanics: seed a temp index from diffRef's tree via git read-tree, +// restage the working tree into that temp index with GIT_INDEX_FILE, +// then emit the diff. The temp index is rm'd on exit via trap. +func exportScript(guestPath, diffRef, diffFlag string) string { + return fmt.Sprintf( + "set -euo pipefail\n"+ + "cd %s\n"+ + "tmp_idx=\"$(mktemp \"${TMPDIR:-/tmp}/banger-export.XXXXXX\")\"\n"+ + "trap 'rm -f \"$tmp_idx\"' EXIT\n"+ + "git read-tree %s --index-output=\"$tmp_idx\"\n"+ + "GIT_INDEX_FILE=\"$tmp_idx\" git add -A\n"+ + "GIT_INDEX_FILE=\"$tmp_idx\" git diff --cached %s %s\n", + sess.ShellQuote(guestPath), + sess.ShellQuote(diffRef), + sess.ShellQuote(diffRef), + diffFlag, + ) +} + func (d *Daemon) PrepareVMWorkspace(ctx context.Context, params api.VMWorkspacePrepareParams) (model.WorkspacePrepareResult, error) { mode, err := ws.ParsePrepareMode(params.Mode) if err != nil { diff --git a/internal/daemon/workspace_test.go b/internal/daemon/workspace_test.go index b43c09d..8e26eaf 100644 --- a/internal/daemon/workspace_test.go +++ b/internal/daemon/workspace_test.go @@ -355,3 +355,63 @@ func TestExportVMWorkspace_MultipleChangedFiles(t *testing.T) { } } } + +// TestExportVMWorkspace_DoesNotMutateRealIndex is a regression guard +// for an earlier design where `git add -A` ran against the guest's +// real `.git/index`, leaving staged changes behind after what the user +// thought was a read-only observation. Every export script must now +// route `git add -A` through a throwaway index selected by +// GIT_INDEX_FILE, and every script must clean that file up. +func TestExportVMWorkspace_DoesNotMutateRealIndex(t *testing.T) { + t.Parallel() + ctx := context.Background() + + apiSock := filepath.Join(t.TempDir(), "fc.sock") + firecracker := startFakeFirecracker(t, apiSock) + + vm := testVM("exportbox-readonly", "image-export", "172.16.0.107") + vm.State = model.VMStateRunning + vm.Runtime.State = model.VMStateRunning + vm.Runtime.PID = firecracker.Process.Pid + vm.Runtime.APISockPath = apiSock + + fake := &exportGuestClient{ + responses: []exportGuestResponse{ + {output: []byte("diff --git a/x b/x\n")}, + {output: []byte("x\n")}, + }, + } + d := newExportTestDaemonStore(t, fake) + upsertDaemonVM(t, ctx, d.store, vm) + + if _, err := d.ExportVMWorkspace(ctx, api.WorkspaceExportParams{IDOrName: vm.Name}); err != nil { + t.Fatalf("ExportVMWorkspace: %v", err) + } + + if len(fake.scripts) == 0 { + t.Fatal("expected at least one export script to be sent") + } + for i, script := range fake.scripts { + if !strings.Contains(script, "GIT_INDEX_FILE") { + t.Errorf("script[%d] missing GIT_INDEX_FILE routing:\n%s", i, script) + } + // git add -A must ONLY appear on a line that also sets + // GIT_INDEX_FILE. A bare occurrence would mutate the real + // index. + for _, line := range strings.Split(script, "\n") { + if strings.Contains(line, "git add -A") && !strings.Contains(line, "GIT_INDEX_FILE") { + t.Errorf("script[%d] has unscoped `git add -A`:\n%s", i, script) + break + } + } + if !strings.Contains(script, "git read-tree") { + t.Errorf("script[%d] missing git read-tree (temp index seed):\n%s", i, script) + } + if !strings.Contains(script, "mktemp") { + t.Errorf("script[%d] missing mktemp for temp index:\n%s", i, script) + } + if !strings.Contains(script, "trap") || !strings.Contains(script, "rm") { + t.Errorf("script[%d] missing temp-index cleanup trap:\n%s", i, script) + } + } +}