workspace export: stop mutating the guest repo index

Previously `banger vm workspace export` ran `git add -A` against the
guest's real `.git/index`, so the observation step left staged
changes behind that users never asked for. Reconnecting later (ssh,
another export) surfaced them and looked like phantom work.

Route `git add -A` through a throwaway index file instead:

  tmp_idx=$(mktemp ...)
  trap 'rm -f "$tmp_idx"' EXIT
  git read-tree <ref> --index-output="$tmp_idx"
  GIT_INDEX_FILE="$tmp_idx" git add -A
  GIT_INDEX_FILE="$tmp_idx" git diff --cached <ref> --binary|--name-only

The real .git/index, working tree, and refs stay exactly as the user
left them. Same diff content — commits past <ref>, uncommitted edits,
and untracked files (minus .gitignore) all captured.

Regression test locks the invariant: every export script must route
add -A through GIT_INDEX_FILE and clean the temp index on exit. CLI
help text updated to say "non-mutating".
This commit is contained in:
Thales Maciel 2026-04-19 13:20:56 -03:00
parent 21b74639d8
commit 99de42385f
No known key found for this signature in database
GPG key ID: 33112E6833C34679
3 changed files with 92 additions and 16 deletions

View file

@ -1328,7 +1328,7 @@ func newVMWorkspaceExportCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "export <id-or-name>",
Short: "Pull changes from a guest workspace back to the host as a patch",
Long: "Stage all changes inside the guest workspace (git add -A) and emit a binary-safe unified diff. Pass --base-commit with the head_commit from workspace prepare to capture changes even when the worker ran git commit inside the VM. Without --base-commit the diff is against the current guest HEAD, which misses committed changes.",
Long: "Emit a binary-safe unified diff of every change inside the guest workspace (committed since base + uncommitted + untracked, minus .gitignore). Non-mutating — the guest's index and working tree are untouched. Pass --base-commit with the head_commit from workspace prepare to capture changes even when the worker ran git commit inside the VM. Without --base-commit the diff is against the current guest HEAD, which misses committed changes.",
Args: exactArgsUsage(1, "usage: banger vm workspace export <id-or-name>"),
ValidArgsFunction: completeVMNameOnlyAtPos0,
Example: strings.TrimSpace(`

View file

@ -43,26 +43,18 @@ func (d *Daemon) ExportVMWorkspace(ctx context.Context, params api.WorkspaceExpo
diffRef = "HEAD"
}
// Stage all changes then emit a binary-safe unified diff against diffRef.
// After git add -A the index contains the full working state, so
// git diff --cached <diffRef> captures both committed deltas (HEAD moved
// past diffRef) and any additional uncommitted changes on top.
patchScript := fmt.Sprintf(
"set -euo pipefail\ncd %s\ngit add -A\ngit diff --cached %s --binary\n",
sess.ShellQuote(guestPath),
sess.ShellQuote(diffRef),
)
// Both scripts run `git add -A` to capture the working tree
// (committed deltas + uncommitted modifications + untracked files
// minus .gitignore), but they route it through a throwaway index
// file instead of .git/index. Export is an observation step; the
// user's real staging area must stay exactly as they left it.
patchScript := exportScript(guestPath, diffRef, "--binary")
patch, err := client.RunScriptOutput(ctx, patchScript)
if err != nil {
return api.WorkspaceExportResult{}, fmt.Errorf("export workspace diff: %w", err)
}
// Enumerate changed paths (index already staged; this is a cheap read).
namesScript := fmt.Sprintf(
"set -euo pipefail\ncd %s\ngit diff --cached %s --name-only\n",
sess.ShellQuote(guestPath),
sess.ShellQuote(diffRef),
)
namesScript := exportScript(guestPath, diffRef, "--name-only")
namesOut, _ := client.RunScriptOutput(ctx, namesScript)
var changed []string
for _, line := range strings.Split(strings.TrimSpace(string(namesOut)), "\n") {
@ -80,6 +72,30 @@ func (d *Daemon) ExportVMWorkspace(ctx context.Context, params api.WorkspaceExpo
}, nil
}
// exportScript emits a shell snippet that diffs the working tree at
// guestPath against diffRef (HEAD or a commit SHA) WITHOUT touching
// the repo's real index. diffFlag selects the git-diff output mode
// ("--binary" for the patch body, "--name-only" for the file list).
//
// Mechanics: seed a temp index from diffRef's tree via git read-tree,
// restage the working tree into that temp index with GIT_INDEX_FILE,
// then emit the diff. The temp index is rm'd on exit via trap.
func exportScript(guestPath, diffRef, diffFlag string) string {
return fmt.Sprintf(
"set -euo pipefail\n"+
"cd %s\n"+
"tmp_idx=\"$(mktemp \"${TMPDIR:-/tmp}/banger-export.XXXXXX\")\"\n"+
"trap 'rm -f \"$tmp_idx\"' EXIT\n"+
"git read-tree %s --index-output=\"$tmp_idx\"\n"+
"GIT_INDEX_FILE=\"$tmp_idx\" git add -A\n"+
"GIT_INDEX_FILE=\"$tmp_idx\" git diff --cached %s %s\n",
sess.ShellQuote(guestPath),
sess.ShellQuote(diffRef),
sess.ShellQuote(diffRef),
diffFlag,
)
}
func (d *Daemon) PrepareVMWorkspace(ctx context.Context, params api.VMWorkspacePrepareParams) (model.WorkspacePrepareResult, error) {
mode, err := ws.ParsePrepareMode(params.Mode)
if err != nil {

View file

@ -355,3 +355,63 @@ func TestExportVMWorkspace_MultipleChangedFiles(t *testing.T) {
}
}
}
// TestExportVMWorkspace_DoesNotMutateRealIndex is a regression guard
// for an earlier design where `git add -A` ran against the guest's
// real `.git/index`, leaving staged changes behind after what the user
// thought was a read-only observation. Every export script must now
// route `git add -A` through a throwaway index selected by
// GIT_INDEX_FILE, and every script must clean that file up.
func TestExportVMWorkspace_DoesNotMutateRealIndex(t *testing.T) {
t.Parallel()
ctx := context.Background()
apiSock := filepath.Join(t.TempDir(), "fc.sock")
firecracker := startFakeFirecracker(t, apiSock)
vm := testVM("exportbox-readonly", "image-export", "172.16.0.107")
vm.State = model.VMStateRunning
vm.Runtime.State = model.VMStateRunning
vm.Runtime.PID = firecracker.Process.Pid
vm.Runtime.APISockPath = apiSock
fake := &exportGuestClient{
responses: []exportGuestResponse{
{output: []byte("diff --git a/x b/x\n")},
{output: []byte("x\n")},
},
}
d := newExportTestDaemonStore(t, fake)
upsertDaemonVM(t, ctx, d.store, vm)
if _, err := d.ExportVMWorkspace(ctx, api.WorkspaceExportParams{IDOrName: vm.Name}); err != nil {
t.Fatalf("ExportVMWorkspace: %v", err)
}
if len(fake.scripts) == 0 {
t.Fatal("expected at least one export script to be sent")
}
for i, script := range fake.scripts {
if !strings.Contains(script, "GIT_INDEX_FILE") {
t.Errorf("script[%d] missing GIT_INDEX_FILE routing:\n%s", i, script)
}
// git add -A must ONLY appear on a line that also sets
// GIT_INDEX_FILE. A bare occurrence would mutate the real
// index.
for _, line := range strings.Split(script, "\n") {
if strings.Contains(line, "git add -A") && !strings.Contains(line, "GIT_INDEX_FILE") {
t.Errorf("script[%d] has unscoped `git add -A`:\n%s", i, script)
break
}
}
if !strings.Contains(script, "git read-tree") {
t.Errorf("script[%d] missing git read-tree (temp index seed):\n%s", i, script)
}
if !strings.Contains(script, "mktemp") {
t.Errorf("script[%d] missing mktemp for temp index:\n%s", i, script)
}
if !strings.Contains(script, "trap") || !strings.Contains(script, "rm") {
t.Errorf("script[%d] missing temp-index cleanup trap:\n%s", i, script)
}
}
}