banger/internal/daemon/workspace/workspace.go
Thales Maciel 2a7f55f028
vm run: ship tracked files only by default; add --include-untracked + --dry-run
Workspace-mode vm run and vm workspace prepare used to copy both
tracked AND untracked non-ignored files into the guest. That silently
catches local .env files, scratch notes, credentials, and any other
working-tree state a developer hasn't explicitly gitignored — a real
data-exposure footgun given the golden image ships Docker and the
usual dev tooling.

Flip the default to tracked-only. Users who actually want the fuller
set opt in with --include-untracked (documented in both commands'
help). Gitignored files are still always excluded regardless of the
flag.

Add --dry-run to both vm run and vm workspace prepare. Dry-run
inspects the repo CLI-side (no VM created, no daemon RPC needed since
the daemon is always local and the inspection is a pure git read),
prints the exact file list + mode, and exits. A byte-level preview of
what would land in the guest.

When running real (non-dry) and untracked files exist in the repo but
are being skipped under the new default, print a one-line notice
pointing to --include-untracked so users aren't surprised when the
guest is missing something they expected.

Signature changes:
- ListOverlayPaths takes an includeUntracked bool (tracked always;
  untracked gated by flag).
- InspectRepo takes the same flag and passes it through.
- VMWorkspacePrepareParams gains IncludeUntracked.
- WorkspaceService.workspaceInspectRepo seam signature widened to
  match (4 callers in tests updated).

New workspace package tests cover both modes and verify that
gitignored files never leak regardless of the flag.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-21 19:53:17 -03:00

429 lines
16 KiB
Go

// Package workspace contains the pure helpers of the workspace subsystem:
// git repo inspection, shallow copy preparation, guest-side tar import,
// finalization script generation, and small utilities.
//
// The orchestrator methods (ExportVMWorkspace, PrepareVMWorkspace) stay on
// *daemon.Daemon.
package workspace
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"net/url"
"os"
"path/filepath"
"sort"
"strings"
"banger/internal/model"
"banger/internal/system"
)
// ShallowFetchDepth is the default --depth for the transient shallow clone
// used by metadata / overlay prepare modes.
const ShallowFetchDepth = 10
// RepoSpec describes the host-side git repository we're about to import into
// a guest. It captures the pieces both InspectRepo and the prepare flow need.
type RepoSpec struct {
SourcePath string
RepoRoot string
RepoName string
HeadCommit string
CurrentBranch string
BranchName string
BaseCommit string
OriginURL string
GitUserName string
GitUserEmail string
OverlayPaths []string
Submodules []string
}
// GuestClient is the narrow subset of guest SSH operations needed by
// ImportRepoToGuest. Satisfied by the daemon-package guestSSHClient.
type GuestClient interface {
RunScript(ctx context.Context, script string, log io.Writer) error
StreamTar(ctx context.Context, dir, command string, log io.Writer) error
StreamTarEntries(ctx context.Context, dir string, entries []string, command string, log io.Writer) error
}
// HostCommandOutputFunc runs a host command and returns its combined output.
// Declared as a package var so tests can substitute a stub runner.
var HostCommandOutputFunc = func(ctx context.Context, name string, args ...string) ([]byte, error) {
runner := system.NewRunner()
output, err := runner.Run(ctx, name, args...)
if err == nil {
return output, nil
}
command := strings.TrimSpace(strings.Join(append([]string{name}, args...), " "))
detail := strings.TrimSpace(string(output))
if detail == "" {
return output, fmt.Errorf("%s: %w", command, err)
}
return output, fmt.Errorf("%s: %w: %s", command, err, detail)
}
// InspectRepo resolves rawPath into an absolute repo root and captures
// the HEAD, branch, optional base-from ref, git identity, origin URL,
// submodules, and overlay paths needed for a prepare. Overlay paths
// cover tracked files by default; untracked non-ignored files are
// included only when includeUntracked is true.
func InspectRepo(ctx context.Context, rawPath, branchName, fromRef string, includeUntracked bool) (RepoSpec, error) {
sourcePath, err := ResolveSourcePath(rawPath)
if err != nil {
return RepoSpec{}, err
}
repoRoot, err := GitTrimmedOutput(ctx, sourcePath, "rev-parse", "--show-toplevel")
if err != nil {
return RepoSpec{}, fmt.Errorf("%s is not inside a git repository", sourcePath)
}
isBare, err := GitTrimmedOutput(ctx, repoRoot, "rev-parse", "--is-bare-repository")
if err != nil {
return RepoSpec{}, fmt.Errorf("inspect git repository %s: %w", repoRoot, err)
}
if isBare == "true" {
return RepoSpec{}, fmt.Errorf("workspace prepare requires a non-bare git repository: %s", repoRoot)
}
submodules, err := ListSubmodules(ctx, repoRoot)
if err != nil {
return RepoSpec{}, err
}
headCommit, err := GitTrimmedOutput(ctx, repoRoot, "rev-parse", "HEAD^{commit}")
if err != nil {
return RepoSpec{}, fmt.Errorf("git repository %s must have at least one commit", repoRoot)
}
currentBranch, err := GitTrimmedOutput(ctx, repoRoot, "branch", "--show-current")
if err != nil {
return RepoSpec{}, fmt.Errorf("resolve current branch for %s: %w", repoRoot, err)
}
baseCommit := headCommit
branchName = strings.TrimSpace(branchName)
if branchName != "" {
baseCommit, err = GitTrimmedOutput(ctx, repoRoot, "rev-parse", fromRef+"^{commit}")
if err != nil {
return RepoSpec{}, fmt.Errorf("resolve workspace from %q: %w", fromRef, err)
}
}
gitUserName, err := GitResolvedConfigValue(ctx, repoRoot, "user.name")
if err != nil {
return RepoSpec{}, fmt.Errorf("resolve git user.name for %s: %w", repoRoot, err)
}
gitUserEmail, err := GitResolvedConfigValue(ctx, repoRoot, "user.email")
if err != nil {
return RepoSpec{}, fmt.Errorf("resolve git user.email for %s: %w", repoRoot, err)
}
originURL, err := GitResolvedConfigValue(ctx, repoRoot, "remote.origin.url")
if err != nil {
return RepoSpec{}, fmt.Errorf("resolve origin url for %s: %w", repoRoot, err)
}
overlayPaths, err := ListOverlayPaths(ctx, repoRoot, includeUntracked)
if err != nil {
return RepoSpec{}, err
}
return RepoSpec{
SourcePath: sourcePath,
RepoRoot: repoRoot,
RepoName: filepath.Base(repoRoot),
HeadCommit: headCommit,
CurrentBranch: currentBranch,
BranchName: branchName,
BaseCommit: baseCommit,
OriginURL: originURL,
GitUserName: gitUserName,
GitUserEmail: gitUserEmail,
OverlayPaths: overlayPaths,
Submodules: submodules,
}, nil
}
// ImportRepoToGuest materialises spec inside the guest at guestPath. Mode
// selects between full copy, metadata-only, or shallow metadata + overlay.
func ImportRepoToGuest(ctx context.Context, client GuestClient, spec RepoSpec, guestPath string, mode model.WorkspacePrepareMode) error {
switch mode {
case model.WorkspacePrepareModeFullCopy:
var copyLog bytes.Buffer
command := fmt.Sprintf("rm -rf %s && mkdir -p %s && tar -o -C %s --strip-components=1 -xf -", ShellQuote(guestPath), ShellQuote(guestPath), ShellQuote(guestPath))
if err := client.StreamTar(ctx, spec.RepoRoot, command, &copyLog); err != nil {
return FormatStepError("copy full workspace", err, copyLog.String())
}
var finalizeLog bytes.Buffer
if err := client.RunScript(ctx, FinalizeScript(spec, guestPath, mode), &finalizeLog); err != nil {
return FormatStepError("finalize full workspace", err, finalizeLog.String())
}
return nil
case model.WorkspacePrepareModeMetadataOnly, model.WorkspacePrepareModeShallowOverlay:
repoCopyDir, cleanup, err := PrepareRepoCopy(ctx, spec)
if err != nil {
return err
}
defer cleanup()
var copyLog bytes.Buffer
command := fmt.Sprintf("rm -rf %s && mkdir -p %s && tar -o -C %s --strip-components=1 -xf -", ShellQuote(guestPath), ShellQuote(guestPath), ShellQuote(guestPath))
if err := client.StreamTar(ctx, repoCopyDir, command, &copyLog); err != nil {
return FormatStepError("copy guest git metadata", err, copyLog.String())
}
var scriptLog bytes.Buffer
if err := client.RunScript(ctx, FinalizeScript(spec, guestPath, mode), &scriptLog); err != nil {
return FormatStepError("prepare guest checkout", err, scriptLog.String())
}
if mode == model.WorkspacePrepareModeMetadataOnly {
return nil
}
var overlayLog bytes.Buffer
command = fmt.Sprintf("tar -o -C %s --strip-components=1 -xf -", ShellQuote(guestPath))
if err := client.StreamTarEntries(ctx, spec.RepoRoot, spec.OverlayPaths, command, &overlayLog); err != nil {
return FormatStepError("overlay workspace working tree", err, overlayLog.String())
}
return nil
default:
return fmt.Errorf("unsupported workspace mode %q", mode)
}
}
// FinalizeScript returns the bash script run inside the guest after the repo
// copy lands: safe.directory, optional cleanup, branch/detached checkout,
// and git identity config.
func FinalizeScript(spec RepoSpec, guestPath string, mode model.WorkspacePrepareMode) string {
var script strings.Builder
script.WriteString("set -euo pipefail\n")
fmt.Fprintf(&script, "DIR=%s\n", ShellQuote(guestPath))
script.WriteString("git config --global --add safe.directory \"$DIR\"\n")
if mode != model.WorkspacePrepareModeFullCopy {
script.WriteString("find \"$DIR\" -mindepth 1 -maxdepth 1 ! -name .git -exec rm -rf {} +\n")
}
switch {
case strings.TrimSpace(spec.BranchName) != "":
fmt.Fprintf(&script, "git -C \"$DIR\" checkout -B %s %s\n", ShellQuote(spec.BranchName), ShellQuote(spec.BaseCommit))
case strings.TrimSpace(spec.CurrentBranch) != "":
fmt.Fprintf(&script, "git -C \"$DIR\" checkout -B %s %s\n", ShellQuote(spec.CurrentBranch), ShellQuote(spec.HeadCommit))
default:
fmt.Fprintf(&script, "git -C \"$DIR\" checkout --detach %s\n", ShellQuote(spec.HeadCommit))
}
if strings.TrimSpace(spec.GitUserName) != "" && strings.TrimSpace(spec.GitUserEmail) != "" {
fmt.Fprintf(&script, "git -C \"$DIR\" config user.name %s\n", ShellQuote(spec.GitUserName))
fmt.Fprintf(&script, "git -C \"$DIR\" config user.email %s\n", ShellQuote(spec.GitUserEmail))
}
return script.String()
}
// PrepareRepoCopy materialises a shallow clone of spec into a temp dir. The
// returned cleanup removes the temp root.
func PrepareRepoCopy(ctx context.Context, spec RepoSpec) (string, func(), error) {
tempRoot, err := os.MkdirTemp("", "banger-workspace-*")
if err != nil {
return "", nil, err
}
cleanup := func() { _ = os.RemoveAll(tempRoot) }
repoCopyDir := filepath.Join(tempRoot, spec.RepoName)
cloneArgs := []string{"clone", "--no-checkout", "--depth", fmt.Sprintf("%d", ShallowFetchDepth)}
if strings.TrimSpace(spec.CurrentBranch) != "" {
cloneArgs = append(cloneArgs, "--single-branch", "--branch", spec.CurrentBranch)
}
cloneArgs = append(cloneArgs, GitFileURL(spec.RepoRoot), repoCopyDir)
if err := RunHostCommand(ctx, "git", cloneArgs...); err != nil {
cleanup()
return "", nil, fmt.Errorf("clone shallow workspace repo copy: %w", err)
}
checkoutCommit := spec.HeadCommit
if strings.TrimSpace(spec.BranchName) != "" {
checkoutCommit = spec.BaseCommit
}
if err := RunHostCommand(ctx, "git", "-C", repoCopyDir, "cat-file", "-e", checkoutCommit+"^{commit}"); err != nil {
if err := RunHostCommand(ctx, "git", "-C", repoCopyDir, "fetch", "--depth", fmt.Sprintf("%d", ShallowFetchDepth), GitFileURL(spec.RepoRoot), checkoutCommit); err != nil {
cleanup()
return "", nil, fmt.Errorf("fetch shallow workspace repo commit %s: %w", checkoutCommit, err)
}
}
if strings.TrimSpace(spec.OriginURL) != "" {
if err := RunHostCommand(ctx, "git", "-C", repoCopyDir, "remote", "set-url", "origin", spec.OriginURL); err != nil {
cleanup()
return "", nil, fmt.Errorf("set workspace origin remote: %w", err)
}
} else {
if err := RunHostCommand(ctx, "git", "-C", repoCopyDir, "remote", "remove", "origin"); err != nil {
cleanup()
return "", nil, fmt.Errorf("remove workspace placeholder origin remote: %w", err)
}
}
return repoCopyDir, cleanup, nil
}
// ResolveSourcePath expands rawPath to an absolute path and verifies it is
// an existing directory.
func ResolveSourcePath(rawPath string) (string, error) {
if strings.TrimSpace(rawPath) == "" {
return "", errors.New("workspace source path is required")
}
absPath, err := filepath.Abs(rawPath)
if err != nil {
return "", err
}
info, err := os.Stat(absPath)
if err != nil {
return "", err
}
if !info.IsDir() {
return "", fmt.Errorf("%s is not a directory", absPath)
}
return absPath, nil
}
// ListSubmodules returns the gitlink paths in repoRoot (mode 160000 entries).
func ListSubmodules(ctx context.Context, repoRoot string) ([]string, error) {
output, err := GitOutput(ctx, repoRoot, "ls-files", "--stage", "-z")
if err != nil {
return nil, fmt.Errorf("inspect workspace git index for %s: %w", repoRoot, err)
}
var submodules []string
for _, record := range ParseNullSeparatedOutput(output) {
if !strings.HasPrefix(record, "160000 ") {
continue
}
_, path, ok := strings.Cut(record, "\t")
if !ok {
continue
}
submodules = append(submodules, strings.TrimSpace(path))
}
sort.Strings(submodules)
return submodules, nil
}
// ListOverlayPaths returns tracked files in repoRoot, plus (when
// includeUntracked is true) untracked non-ignored files. Missing
// tracked entries (deleted working-tree files) are skipped in both
// modes.
//
// The default is tracked-only because "untracked + not gitignored"
// silently catches local credentials, .env files, scratch notes, and
// other secrets that live in the working tree but aren't meant to
// leave the developer's machine. Callers that genuinely want the
// fuller set (scratch repos, vendored binaries the user is iterating
// on) opt in explicitly.
func ListOverlayPaths(ctx context.Context, repoRoot string, includeUntracked bool) ([]string, error) {
trackedOutput, err := GitOutput(ctx, repoRoot, "ls-files", "-z")
if err != nil {
return nil, fmt.Errorf("list tracked files for %s: %w", repoRoot, err)
}
paths := make([]string, 0)
seen := make(map[string]struct{})
for _, relPath := range ParseNullSeparatedOutput(trackedOutput) {
if relPath == "" {
continue
}
if _, err := os.Lstat(filepath.Join(repoRoot, relPath)); err != nil {
if os.IsNotExist(err) {
continue
}
return nil, err
}
seen[relPath] = struct{}{}
paths = append(paths, relPath)
}
if includeUntracked {
untrackedOutput, err := GitOutput(ctx, repoRoot, "ls-files", "--others", "--exclude-standard", "-z")
if err != nil {
return nil, fmt.Errorf("list untracked files for %s: %w", repoRoot, err)
}
for _, relPath := range ParseNullSeparatedOutput(untrackedOutput) {
if relPath == "" {
continue
}
if _, ok := seen[relPath]; ok {
continue
}
seen[relPath] = struct{}{}
paths = append(paths, relPath)
}
}
sort.Strings(paths)
return paths, nil
}
// CountUntrackedPaths returns the number of untracked non-ignored
// files in repoRoot. Used by the CLI to warn the user when they are
// about to ship a workspace that has local-but-unignored scratch
// files which, under the default, will be skipped.
func CountUntrackedPaths(ctx context.Context, repoRoot string) (int, error) {
untrackedOutput, err := GitOutput(ctx, repoRoot, "ls-files", "--others", "--exclude-standard", "-z")
if err != nil {
return 0, fmt.Errorf("list untracked files for %s: %w", repoRoot, err)
}
count := 0
for _, relPath := range ParseNullSeparatedOutput(untrackedOutput) {
if relPath != "" {
count++
}
}
return count, nil
}
// ParsePrepareMode validates and canonicalises a user-supplied mode value.
func ParsePrepareMode(raw string) (model.WorkspacePrepareMode, error) {
switch strings.TrimSpace(raw) {
case "", string(model.WorkspacePrepareModeShallowOverlay):
return model.WorkspacePrepareModeShallowOverlay, nil
case string(model.WorkspacePrepareModeFullCopy):
return model.WorkspacePrepareModeFullCopy, nil
case string(model.WorkspacePrepareModeMetadataOnly):
return model.WorkspacePrepareModeMetadataOnly, nil
default:
return "", fmt.Errorf("unsupported workspace mode %q", raw)
}
}
// GitOutput runs `git [-C dir] args...` and returns its raw stdout.
func GitOutput(ctx context.Context, dir string, args ...string) ([]byte, error) {
fullArgs := make([]string, 0, len(args)+2)
if strings.TrimSpace(dir) != "" {
fullArgs = append(fullArgs, "-C", dir)
}
fullArgs = append(fullArgs, args...)
return HostCommandOutputFunc(ctx, "git", fullArgs...)
}
// GitTrimmedOutput returns GitOutput with surrounding whitespace trimmed.
func GitTrimmedOutput(ctx context.Context, dir string, args ...string) (string, error) {
output, err := GitOutput(ctx, dir, args...)
if err != nil {
return "", err
}
return strings.TrimSpace(string(output)), nil
}
// GitResolvedConfigValue reads git config key with --default "" --get.
func GitResolvedConfigValue(ctx context.Context, dir, key string) (string, error) {
return GitTrimmedOutput(ctx, dir, "config", "--default", "", "--get", key)
}
// ParseNullSeparatedOutput splits on NULs and trims, returning non-empty
// values in order.
func ParseNullSeparatedOutput(output []byte) []string {
chunks := bytes.Split(output, []byte{0})
values := make([]string, 0, len(chunks))
for _, chunk := range chunks {
value := strings.TrimSpace(string(chunk))
if value == "" {
continue
}
values = append(values, value)
}
return values
}
// RunHostCommand runs a host command via HostCommandOutputFunc, discarding
// its stdout.
func RunHostCommand(ctx context.Context, name string, args ...string) error {
_, err := HostCommandOutputFunc(ctx, name, args...)
return err
}
// GitFileURL returns a file:// URL for path, the form git requires when
// cloning from a local directory.
func GitFileURL(path string) string {
return (&url.URL{Scheme: "file", Path: filepath.ToSlash(path)}).String()
}