vm run: ship tracked files only by default; add --include-untracked + --dry-run

Workspace-mode vm run and vm workspace prepare used to copy both
tracked AND untracked non-ignored files into the guest. That silently
catches local .env files, scratch notes, credentials, and any other
working-tree state a developer hasn't explicitly gitignored — a real
data-exposure footgun given the golden image ships Docker and the
usual dev tooling.

Flip the default to tracked-only. Users who actually want the fuller
set opt in with --include-untracked (documented in both commands'
help). Gitignored files are still always excluded regardless of the
flag.

Add --dry-run to both vm run and vm workspace prepare. Dry-run
inspects the repo CLI-side (no VM created, no daemon RPC needed since
the daemon is always local and the inspection is a pure git read),
prints the exact file list + mode, and exits. A byte-level preview of
what would land in the guest.

When running real (non-dry) and untracked files exist in the repo but
are being skipped under the new default, print a one-line notice
pointing to --include-untracked so users aren't surprised when the
guest is missing something they expected.

Signature changes:
- ListOverlayPaths takes an includeUntracked bool (tracked always;
  untracked gated by flag).
- InspectRepo takes the same flag and passes it through.
- VMWorkspacePrepareParams gains IncludeUntracked.
- WorkspaceService.workspaceInspectRepo seam signature widened to
  match (4 callers in tests updated).

New workspace package tests cover both modes and verify that
gitignored files never leak regardless of the flag.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Thales Maciel 2026-04-21 19:53:17 -03:00
parent 25a1466947
commit 2a7f55f028
No known key found for this signature in database
GPG key ID: 33112E6833C34679
11 changed files with 293 additions and 67 deletions

View file

@ -20,11 +20,11 @@ import (
// opposed to always requiring callers to populate s.workspaceInspectRepo
// in a constructor) lets tests selectively override one hook without
// having to wire both.
func (s *WorkspaceService) workspaceInspectRepoHook(ctx context.Context, sourcePath, branchName, fromRef string) (ws.RepoSpec, error) {
func (s *WorkspaceService) workspaceInspectRepoHook(ctx context.Context, sourcePath, branchName, fromRef string, includeUntracked bool) (ws.RepoSpec, error) {
if s != nil && s.workspaceInspectRepo != nil {
return s.workspaceInspectRepo(ctx, sourcePath, branchName, fromRef)
return s.workspaceInspectRepo(ctx, sourcePath, branchName, fromRef, includeUntracked)
}
return ws.InspectRepo(ctx, sourcePath, branchName, fromRef)
return ws.InspectRepo(ctx, sourcePath, branchName, fromRef, includeUntracked)
}
func (s *WorkspaceService) workspaceImportHook(ctx context.Context, client ws.GuestClient, spec ws.RepoSpec, guestPath string, mode model.WorkspacePrepareMode) error {
@ -160,14 +160,14 @@ func (s *WorkspaceService) PrepareVMWorkspace(ctx context.Context, params api.VM
unlock := s.workspaceLocks.lock(vm.ID)
defer unlock()
return s.prepareVMWorkspaceGuestIO(ctx, vm, strings.TrimSpace(params.SourcePath), guestPath, branchName, fromRef, mode, params.ReadOnly)
return s.prepareVMWorkspaceGuestIO(ctx, vm, strings.TrimSpace(params.SourcePath), guestPath, branchName, fromRef, mode, params.ReadOnly, params.IncludeUntracked)
}
// prepareVMWorkspaceGuestIO performs the actual guest-side work:
// inspect the local repo, dial SSH, stream the tar, optionally chmod
// readonly. It is called without holding the VM mutex.
func (s *WorkspaceService) prepareVMWorkspaceGuestIO(ctx context.Context, vm model.VMRecord, sourcePath, guestPath, branchName, fromRef string, mode model.WorkspacePrepareMode, readOnly bool) (model.WorkspacePrepareResult, error) {
spec, err := s.workspaceInspectRepoHook(ctx, sourcePath, branchName, fromRef)
func (s *WorkspaceService) prepareVMWorkspaceGuestIO(ctx context.Context, vm model.VMRecord, sourcePath, guestPath, branchName, fromRef string, mode model.WorkspacePrepareMode, readOnly, includeUntracked bool) (model.WorkspacePrepareResult, error) {
spec, err := s.workspaceInspectRepoHook(ctx, sourcePath, branchName, fromRef, includeUntracked)
if err != nil {
return model.WorkspacePrepareResult{}, err
}

View file

@ -67,11 +67,12 @@ var HostCommandOutputFunc = func(ctx context.Context, name string, args ...strin
return output, fmt.Errorf("%s: %w: %s", command, err, detail)
}
// InspectRepo resolves rawPath into an absolute repo root and captures the
// HEAD, branch, optional base-from ref, git identity, origin URL, submodules,
// and overlay paths (tracked + untracked non-ignored files) needed for a
// prepare.
func InspectRepo(ctx context.Context, rawPath, branchName, fromRef string) (RepoSpec, error) {
// InspectRepo resolves rawPath into an absolute repo root and captures
// the HEAD, branch, optional base-from ref, git identity, origin URL,
// submodules, and overlay paths needed for a prepare. Overlay paths
// cover tracked files by default; untracked non-ignored files are
// included only when includeUntracked is true.
func InspectRepo(ctx context.Context, rawPath, branchName, fromRef string, includeUntracked bool) (RepoSpec, error) {
sourcePath, err := ResolveSourcePath(rawPath)
if err != nil {
return RepoSpec{}, err
@ -119,7 +120,7 @@ func InspectRepo(ctx context.Context, rawPath, branchName, fromRef string) (Repo
if err != nil {
return RepoSpec{}, fmt.Errorf("resolve origin url for %s: %w", repoRoot, err)
}
overlayPaths, err := ListOverlayPaths(ctx, repoRoot)
overlayPaths, err := ListOverlayPaths(ctx, repoRoot, includeUntracked)
if err != nil {
return RepoSpec{}, err
}
@ -292,17 +293,22 @@ func ListSubmodules(ctx context.Context, repoRoot string) ([]string, error) {
return submodules, nil
}
// ListOverlayPaths returns tracked + untracked non-ignored files in
// repoRoot. Missing tracked entries (deleted working-tree files) are skipped.
func ListOverlayPaths(ctx context.Context, repoRoot string) ([]string, error) {
// ListOverlayPaths returns tracked files in repoRoot, plus (when
// includeUntracked is true) untracked non-ignored files. Missing
// tracked entries (deleted working-tree files) are skipped in both
// modes.
//
// The default is tracked-only because "untracked + not gitignored"
// silently catches local credentials, .env files, scratch notes, and
// other secrets that live in the working tree but aren't meant to
// leave the developer's machine. Callers that genuinely want the
// fuller set (scratch repos, vendored binaries the user is iterating
// on) opt in explicitly.
func ListOverlayPaths(ctx context.Context, repoRoot string, includeUntracked bool) ([]string, error) {
trackedOutput, err := GitOutput(ctx, repoRoot, "ls-files", "-z")
if err != nil {
return nil, fmt.Errorf("list tracked files for %s: %w", repoRoot, err)
}
untrackedOutput, err := GitOutput(ctx, repoRoot, "ls-files", "--others", "--exclude-standard", "-z")
if err != nil {
return nil, fmt.Errorf("list untracked files for %s: %w", repoRoot, err)
}
paths := make([]string, 0)
seen := make(map[string]struct{})
for _, relPath := range ParseNullSeparatedOutput(trackedOutput) {
@ -318,20 +324,44 @@ func ListOverlayPaths(ctx context.Context, repoRoot string) ([]string, error) {
seen[relPath] = struct{}{}
paths = append(paths, relPath)
}
for _, relPath := range ParseNullSeparatedOutput(untrackedOutput) {
if relPath == "" {
continue
if includeUntracked {
untrackedOutput, err := GitOutput(ctx, repoRoot, "ls-files", "--others", "--exclude-standard", "-z")
if err != nil {
return nil, fmt.Errorf("list untracked files for %s: %w", repoRoot, err)
}
if _, ok := seen[relPath]; ok {
continue
for _, relPath := range ParseNullSeparatedOutput(untrackedOutput) {
if relPath == "" {
continue
}
if _, ok := seen[relPath]; ok {
continue
}
seen[relPath] = struct{}{}
paths = append(paths, relPath)
}
seen[relPath] = struct{}{}
paths = append(paths, relPath)
}
sort.Strings(paths)
return paths, nil
}
// CountUntrackedPaths returns the number of untracked non-ignored
// files in repoRoot. Used by the CLI to warn the user when they are
// about to ship a workspace that has local-but-unignored scratch
// files which, under the default, will be skipped.
func CountUntrackedPaths(ctx context.Context, repoRoot string) (int, error) {
untrackedOutput, err := GitOutput(ctx, repoRoot, "ls-files", "--others", "--exclude-standard", "-z")
if err != nil {
return 0, fmt.Errorf("list untracked files for %s: %w", repoRoot, err)
}
count := 0
for _, relPath := range ParseNullSeparatedOutput(untrackedOutput) {
if relPath != "" {
count++
}
}
return count, nil
}
// ParsePrepareMode validates and canonicalises a user-supplied mode value.
func ParsePrepareMode(raw string) (model.WorkspacePrepareMode, error) {
switch strings.TrimSpace(raw) {

View file

@ -0,0 +1,99 @@
package workspace
import (
"context"
"os"
"os/exec"
"path/filepath"
"slices"
"testing"
)
// seedRepo creates a tiny git repo with one tracked file, one
// gitignored file, and one untracked-non-ignored file. Returns the
// repo root path. Skips the test if git isn't on PATH (unusual for
// a dev machine, but polite).
func seedRepo(t *testing.T) string {
t.Helper()
if _, err := exec.LookPath("git"); err != nil {
t.Skipf("git not on PATH: %v", err)
}
dir := t.TempDir()
run := func(args ...string) {
t.Helper()
cmd := exec.Command(args[0], args[1:]...)
cmd.Dir = dir
// Isolate from the ambient user config so commits don't need
// a global user.name/user.email. Also disable GPG signing.
cmd.Env = append(os.Environ(),
"GIT_AUTHOR_NAME=t", "GIT_AUTHOR_EMAIL=t@t",
"GIT_COMMITTER_NAME=t", "GIT_COMMITTER_EMAIL=t@t",
"GIT_CONFIG_GLOBAL=/dev/null",
)
if out, err := cmd.CombinedOutput(); err != nil {
t.Fatalf("%v: %v\n%s", args, err, out)
}
}
writeFile := func(relPath, content string) {
t.Helper()
if err := os.WriteFile(filepath.Join(dir, relPath), []byte(content), 0o644); err != nil {
t.Fatal(err)
}
}
run("git", "init", "-q", "-b", "main")
run("git", "config", "commit.gpgsign", "false")
writeFile(".gitignore", "ignored.log\n")
writeFile("README.md", "hello\n")
run("git", "add", ".gitignore", "README.md")
run("git", "commit", "-q", "-m", "init")
// A tracked file AFTER the first commit so ls-files picks it up.
// A gitignored file so --exclude-standard filters it.
// An untracked non-ignored file so the flag matters.
writeFile("src.go", "package main\n")
run("git", "add", "src.go")
run("git", "commit", "-q", "-m", "src")
writeFile("ignored.log", "noisy\n")
writeFile("SECRETS.env", "TOKEN=abc\n")
return dir
}
func TestListOverlayPaths_TrackedOnlyByDefault(t *testing.T) {
repo := seedRepo(t)
got, err := ListOverlayPaths(context.Background(), repo, false)
if err != nil {
t.Fatalf("ListOverlayPaths: %v", err)
}
want := []string{".gitignore", "README.md", "src.go"}
if !slices.Equal(got, want) {
t.Fatalf("got %v, want %v (untracked SECRETS.env must be excluded; gitignored ignored.log must always be excluded)", got, want)
}
}
func TestListOverlayPaths_IncludeUntracked(t *testing.T) {
repo := seedRepo(t)
got, err := ListOverlayPaths(context.Background(), repo, true)
if err != nil {
t.Fatalf("ListOverlayPaths: %v", err)
}
want := []string{".gitignore", "README.md", "SECRETS.env", "src.go"}
if !slices.Equal(got, want) {
t.Fatalf("got %v, want %v", got, want)
}
// gitignored files must stay out even when untracked is included.
for _, p := range got {
if p == "ignored.log" {
t.Fatalf("gitignored file leaked into overlay: %v", got)
}
}
}
func TestCountUntrackedPaths(t *testing.T) {
repo := seedRepo(t)
count, err := CountUntrackedPaths(context.Background(), repo)
if err != nil {
t.Fatalf("CountUntrackedPaths: %v", err)
}
if count != 1 {
t.Fatalf("count = %d, want 1 (only SECRETS.env; ignored.log is gitignored)", count)
}
}

View file

@ -46,7 +46,7 @@ type WorkspaceService struct {
beginOperation func(name string, attrs ...any) *operationLog
// Test seams.
workspaceInspectRepo func(ctx context.Context, sourcePath, branchName, fromRef string) (ws.RepoSpec, error)
workspaceInspectRepo func(ctx context.Context, sourcePath, branchName, fromRef string, includeUntracked bool) (ws.RepoSpec, error)
workspaceImport func(ctx context.Context, client ws.GuestClient, spec ws.RepoSpec, guestPath string, mode model.WorkspacePrepareMode) error
}

View file

@ -400,7 +400,7 @@ func TestPrepareVMWorkspace_ReleasesVMLockDuringGuestIO(t *testing.T) {
// Import blocks until we say go.
importStarted := make(chan struct{})
releaseImport := make(chan struct{})
d.ws.workspaceInspectRepo = func(context.Context, string, string, string) (workspace.RepoSpec, error) {
d.ws.workspaceInspectRepo = func(context.Context, string, string, string, bool) (workspace.RepoSpec, error) {
return workspace.RepoSpec{RepoName: "fake", RepoRoot: "/tmp/fake"}, nil
}
d.ws.workspaceImport = func(context.Context, workspace.GuestClient, workspace.RepoSpec, string, model.WorkspacePrepareMode) error {
@ -483,7 +483,7 @@ func TestPrepareVMWorkspace_SerialisesConcurrentPreparesOnSameVM(t *testing.T) {
upsertDaemonVM(t, ctx, d.store, vm)
d.vm.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: firecracker.Process.Pid})
d.ws.workspaceInspectRepo = func(context.Context, string, string, string) (workspace.RepoSpec, error) {
d.ws.workspaceInspectRepo = func(context.Context, string, string, string, bool) (workspace.RepoSpec, error) {
return workspace.RepoSpec{RepoName: "fake", RepoRoot: "/tmp/fake"}, nil
}