Speed up vm run repo import

Replace the post-boot full-history git bundle path with a shallow repo copy so vm run no longer spends its quiet time shipping and cloning every object in the source repository.

Stage a depth-10 no-checkout clone from the host repo, fetch the requested checkout commit only when it is outside the shallow window, rewrite origin back to the host repo's origin URL, and keep the existing guest checkout plus working-tree overlay behavior.

Add explicit [vm run] progress lines after [vm create] ready so the user can see the SSH wait, shallow repo prep, guest copy, overlay, and opencode attach phases instead of a silent pause.

Validated with GOCACHE=/tmp/banger-gocache go test ./..., make build, and a local payload comparison showing the banger repo dropping from a ~400 MB full bundle to a ~294 KB shallow metadata copy.
This commit is contained in:
Thales Maciel 2026-03-22 19:45:26 -03:00
parent 42b4a18c63
commit 1e967140c3
No known key found for this signature in database
GPG key ID: 33112E6833C34679
2 changed files with 299 additions and 83 deletions

View file

@ -8,6 +8,7 @@ import (
"fmt"
"io"
"net"
"net/url"
"os"
"os/exec"
"path/filepath"
@ -93,13 +94,14 @@ var (
guestDialFunc = func(ctx context.Context, address, privateKeyPath string) (vmRunGuestClient, error) {
return guest.Dial(ctx, address, privateKeyPath)
}
cwdFunc = os.Getwd
prepareVMRunRepoCopyFunc = prepareVMRunRepoCopy
cwdFunc = os.Getwd
)
type vmRunGuestClient interface {
Close() error
UploadFile(ctx context.Context, remotePath string, mode os.FileMode, data []byte, logWriter io.Writer) error
RunScript(ctx context.Context, script string, logWriter io.Writer) error
StreamTar(ctx context.Context, sourceDir, remoteCommand string, logWriter io.Writer) error
StreamTarEntries(ctx context.Context, sourceDir string, entries []string, remoteCommand string, logWriter io.Writer) error
}
@ -111,12 +113,13 @@ type vmRunRepoSpec struct {
CurrentBranch string
BranchName string
BaseCommit string
OriginURL string
GitUserName string
GitUserEmail string
OverlayPaths []string
}
const vmRunGuestBundlePath = "/tmp/banger-vm-run.bundle"
const vmRunShallowFetchDepth = 10
func NewBangerCommand() *cobra.Command {
root := &cobra.Command{
@ -1454,6 +1457,10 @@ func inspectVMRunRepo(ctx context.Context, rawPath, branchName, fromRef string)
if err != nil {
return vmRunRepoSpec{}, fmt.Errorf("resolve git user.email for %s: %w", repoRoot, err)
}
originURL, err := gitResolvedConfigValue(ctx, repoRoot, "remote.origin.url")
if err != nil {
return vmRunRepoSpec{}, fmt.Errorf("resolve origin url for %s: %w", repoRoot, err)
}
overlayPaths, err := listVMRunOverlayPaths(ctx, repoRoot)
if err != nil {
@ -1468,6 +1475,7 @@ func inspectVMRunRepo(ctx context.Context, rawPath, branchName, fromRef string)
CurrentBranch: currentBranch,
BranchName: branchName,
BaseCommit: baseCommit,
OriginURL: originURL,
GitUserName: gitUserName,
GitUserEmail: gitUserEmail,
OverlayPaths: overlayPaths,
@ -1583,6 +1591,7 @@ func parseNullSeparatedOutput(output []byte) []string {
}
func runVMRun(ctx context.Context, socketPath string, cfg model.DaemonConfig, stdin io.Reader, stdout, stderr io.Writer, params api.VMCreateParams, spec vmRunRepoSpec) error {
progress := newVMRunProgressRenderer(stderr)
vm, err := runVMCreate(ctx, socketPath, stderr, params)
if err != nil {
return err
@ -1592,6 +1601,7 @@ func runVMRun(ctx context.Context, socketPath string, cfg model.DaemonConfig, st
vmRef = shortID(vm.ID)
}
sshAddress := net.JoinHostPort(vm.Runtime.GuestIP, "22")
progress.render("waiting for guest ssh")
if err := guestWaitForSSHFunc(ctx, sshAddress, cfg.SSHKeyPath, 250*time.Millisecond); err != nil {
return fmt.Errorf("vm %q is running but guest ssh is unavailable: %w", vmRef, err)
}
@ -1600,71 +1610,112 @@ func runVMRun(ctx context.Context, socketPath string, cfg model.DaemonConfig, st
return fmt.Errorf("vm %q is running but guest ssh is unavailable: %w", vmRef, err)
}
defer client.Close()
if err := importVMRunRepoToGuest(ctx, client, spec); err != nil {
if err := importVMRunRepoToGuest(ctx, client, spec, progress); err != nil {
return fmt.Errorf("vm %q is running but repo import failed: %w", vmRef, err)
}
progress.render("attaching opencode")
if err := runVMRunAttach(ctx, stdin, stdout, stderr, vm.Runtime.GuestIP, vmRunGuestDir(spec.RepoName)); err != nil {
return fmt.Errorf("vm %q is running but opencode attach failed: %w", vmRef, err)
}
return nil
}
func importVMRunRepoToGuest(ctx context.Context, client vmRunGuestClient, spec vmRunRepoSpec) error {
bundleData, err := createVMRunBundle(ctx, spec)
func importVMRunRepoToGuest(ctx context.Context, client vmRunGuestClient, spec vmRunRepoSpec, progress *vmRunProgressRenderer) error {
if progress != nil {
progress.render("preparing shallow repo")
}
repoCopyDir, cleanup, err := prepareVMRunRepoCopyFunc(ctx, spec)
if err != nil {
return err
}
var uploadLog bytes.Buffer
if err := client.UploadFile(ctx, vmRunGuestBundlePath, 0o600, bundleData, &uploadLog); err != nil {
return formatVMRunStepError("upload git bundle", err, uploadLog.String())
defer cleanup()
if progress != nil {
progress.render("copying repo metadata to guest")
}
var copyLog bytes.Buffer
remoteCommand := fmt.Sprintf("rm -rf %s && mkdir -p %s && tar -o -C %s --strip-components=1 -xf -", shellQuote(vmRunGuestDir(spec.RepoName)), shellQuote(vmRunGuestDir(spec.RepoName)), shellQuote(vmRunGuestDir(spec.RepoName)))
if err := client.StreamTar(ctx, repoCopyDir, remoteCommand, &copyLog); err != nil {
return formatVMRunStepError("copy guest git metadata", err, copyLog.String())
}
if progress != nil {
progress.render("preparing guest checkout")
}
var scriptLog bytes.Buffer
if err := client.RunScript(ctx, vmRunCloneScript(spec), &scriptLog); err != nil {
if err := client.RunScript(ctx, vmRunCheckoutScript(spec), &scriptLog); err != nil {
return formatVMRunStepError("prepare guest checkout", err, scriptLog.String())
}
if progress != nil {
progress.render("overlaying host working tree")
}
var overlayLog bytes.Buffer
remoteCommand := fmt.Sprintf("tar -o -C %s --strip-components=1 -xf -", shellQuote(vmRunGuestDir(spec.RepoName)))
remoteCommand = fmt.Sprintf("tar -o -C %s --strip-components=1 -xf -", shellQuote(vmRunGuestDir(spec.RepoName)))
if err := client.StreamTarEntries(ctx, spec.RepoRoot, spec.OverlayPaths, remoteCommand, &overlayLog); err != nil {
return formatVMRunStepError("overlay host working tree", err, overlayLog.String())
}
return nil
}
func createVMRunBundle(ctx context.Context, spec vmRunRepoSpec) ([]byte, error) {
tempFile, err := os.CreateTemp("", "banger-vm-run-*.bundle")
func prepareVMRunRepoCopy(ctx context.Context, spec vmRunRepoSpec) (string, func(), error) {
tempRoot, err := os.MkdirTemp("", "banger-vm-run-*")
if err != nil {
return nil, err
return "", nil, err
}
tempPath := tempFile.Name()
if err := tempFile.Close(); err != nil {
_ = os.Remove(tempPath)
return nil, err
cleanup := func() {
_ = os.RemoveAll(tempRoot)
}
defer os.Remove(tempPath)
args := []string{"-C", spec.RepoRoot, "bundle", "create", tempPath, "--all"}
for _, rev := range uniqueNonEmptyStrings(spec.HeadCommit, spec.BaseCommit) {
args = append(args, rev)
repoCopyDir := filepath.Join(tempRoot, spec.RepoName)
cloneArgs := []string{"clone", "--no-checkout", "--depth", fmt.Sprintf("%d", vmRunShallowFetchDepth)}
if strings.TrimSpace(spec.CurrentBranch) != "" {
cloneArgs = append(cloneArgs, "--single-branch", "--branch", spec.CurrentBranch)
}
if _, err := hostCommandOutputFunc(ctx, "git", args...); err != nil {
return nil, fmt.Errorf("create git bundle: %w", err)
cloneArgs = append(cloneArgs, gitFileURL(spec.RepoRoot), repoCopyDir)
if err := runHostCommand(ctx, "git", cloneArgs...); err != nil {
cleanup()
return "", nil, fmt.Errorf("clone shallow repo copy: %w", err)
}
data, err := os.ReadFile(tempPath)
if err != nil {
return nil, fmt.Errorf("read git bundle: %w", err)
checkoutCommit := vmRunCheckoutCommit(spec)
if err := runHostCommand(ctx, "git", "-C", repoCopyDir, "cat-file", "-e", checkoutCommit+"^{commit}"); err != nil {
if err := runHostCommand(ctx, "git", "-C", repoCopyDir, "fetch", "--depth", fmt.Sprintf("%d", vmRunShallowFetchDepth), gitFileURL(spec.RepoRoot), checkoutCommit); err != nil {
cleanup()
return "", nil, fmt.Errorf("fetch shallow repo commit %s: %w", checkoutCommit, err)
}
}
return data, nil
if strings.TrimSpace(spec.OriginURL) != "" {
if err := runHostCommand(ctx, "git", "-C", repoCopyDir, "remote", "set-url", "origin", spec.OriginURL); err != nil {
cleanup()
return "", nil, fmt.Errorf("set origin remote: %w", err)
}
} else {
if err := runHostCommand(ctx, "git", "-C", repoCopyDir, "remote", "remove", "origin"); err != nil {
cleanup()
return "", nil, fmt.Errorf("remove placeholder origin remote: %w", err)
}
}
return repoCopyDir, cleanup, nil
}
func vmRunCloneScript(spec vmRunRepoSpec) string {
func vmRunCheckoutCommit(spec vmRunRepoSpec) string {
if strings.TrimSpace(spec.BranchName) != "" {
return spec.BaseCommit
}
return spec.HeadCommit
}
func gitFileURL(path string) string {
return (&url.URL{Scheme: "file", Path: filepath.ToSlash(path)}).String()
}
func runHostCommand(ctx context.Context, name string, args ...string) error {
_, err := hostCommandOutputFunc(ctx, name, args...)
return err
}
func vmRunCheckoutScript(spec vmRunRepoSpec) string {
guestDir := vmRunGuestDir(spec.RepoName)
var script strings.Builder
script.WriteString("set -euo pipefail\n")
fmt.Fprintf(&script, "DIR=%s\n", shellQuote(guestDir))
fmt.Fprintf(&script, "BUNDLE=%s\n", shellQuote(vmRunGuestBundlePath))
script.WriteString("rm -rf \"$DIR\"\n")
script.WriteString("git clone \"$BUNDLE\" \"$DIR\"\n")
script.WriteString("rm -f \"$BUNDLE\"\n")
script.WriteString("git config --global --add safe.directory \"$DIR\"\n")
switch {
case strings.TrimSpace(spec.BranchName) != "":
fmt.Fprintf(&script, "git -C \"$DIR\" checkout -B %s %s\n", shellQuote(spec.BranchName), shellQuote(spec.BaseCommit))
@ -1674,7 +1725,6 @@ func vmRunCloneScript(spec vmRunRepoSpec) string {
fmt.Fprintf(&script, "git -C \"$DIR\" checkout --detach %s\n", shellQuote(spec.HeadCommit))
}
script.WriteString("find \"$DIR\" -mindepth 1 -maxdepth 1 ! -name .git -exec rm -rf {} +\n")
script.WriteString("git config --global --add safe.directory \"$DIR\"\n")
if strings.TrimSpace(spec.GitUserName) != "" && strings.TrimSpace(spec.GitUserEmail) != "" {
fmt.Fprintf(&script, "git -C \"$DIR\" config user.name %s\n", shellQuote(spec.GitUserName))
fmt.Fprintf(&script, "git -C \"$DIR\" config user.email %s\n", shellQuote(spec.GitUserEmail))
@ -1706,21 +1756,37 @@ func formatVMRunStepError(action string, err error, log string) error {
return fmt.Errorf("%s: %w: %s", action, err, log)
}
func uniqueNonEmptyStrings(values ...string) []string {
unique := make([]string, 0, len(values))
seen := make(map[string]struct{}, len(values))
for _, value := range values {
value = strings.TrimSpace(value)
if value == "" {
continue
}
if _, ok := seen[value]; ok {
continue
}
seen[value] = struct{}{}
unique = append(unique, value)
type vmRunProgressRenderer struct {
out io.Writer
enabled bool
lastLine string
}
func newVMRunProgressRenderer(out io.Writer) *vmRunProgressRenderer {
return &vmRunProgressRenderer{
out: out,
enabled: out != nil,
}
return unique
}
func (r *vmRunProgressRenderer) render(detail string) {
if r == nil || !r.enabled {
return
}
line := formatVMRunProgress(detail)
if line == "" || line == r.lastLine {
return
}
r.lastLine = line
_, _ = fmt.Fprintln(r.out, line)
}
func formatVMRunProgress(detail string) string {
detail = strings.TrimSpace(detail)
if detail == "" {
return ""
}
return "[vm run] " + detail
}
func shellQuote(value string) string {