banger/internal/daemon/imagebuild.go
Thales Maciel 14d8563f3c
Stop using kernel IP autoconfig for runtime VMs
Avoid the Alpine boot stall caused by kernel ip= autoconfig running before
virtio_net is available.

Split runtime and image-build boot args so managed VMs boot without kernel
network autoconfig, inject a static guest network config plus bootstrap
script into the runtime overlay, and keep image builds on the old path for
compatibility with existing base images.

Preserve executable bits when patching guest files into ext4 images and add
coverage for the new boot-arg split and guest network config generation.

Validated with go test ./..., a rebuilt Alpine image, and a fresh alp-fast
create/ssh check that brought vm.start down to about 2.7s.
2026-03-21 21:54:18 -03:00

432 lines
17 KiB
Go

package daemon
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"time"
"banger/internal/firecracker"
"banger/internal/guest"
"banger/internal/guestnet"
"banger/internal/hostnat"
"banger/internal/imagepreset"
"banger/internal/model"
"banger/internal/opencode"
"banger/internal/system"
"banger/internal/vsockagent"
)
const (
defaultMiseVersion = "v2025.12.0"
defaultMiseInstallPath = "/usr/local/bin/mise"
defaultMiseActivateLine = `eval "$(/usr/local/bin/mise activate bash)"`
defaultOpenCodeTool = "github:anomalyco/opencode"
defaultTPMRepo = "https://github.com/tmux-plugins/tpm"
defaultResurrectRepo = "https://github.com/tmux-plugins/tmux-resurrect"
defaultContinuumRepo = "https://github.com/tmux-plugins/tmux-continuum"
defaultTMUXPluginDir = "/root/.tmux/plugins"
defaultTMUXResurrectDir = "/root/.tmux/resurrect"
tmuxManagedBlockStart = "# >>> banger tmux plugins >>>"
tmuxManagedBlockEnd = "# <<< banger tmux plugins <<<"
)
type imageBuildSpec struct {
ID string
Name string
SourceRootfs string
RootfsPath string
BuildLog io.Writer
KernelPath string
InitrdPath string
ModulesDir string
Packages []string
InstallDocker bool
Size string
}
type imageBuildVM struct {
Name string
GuestIP string
TapDevice string
APISock string
PID int
}
func (d *Daemon) runImageBuild(ctx context.Context, spec imageBuildSpec) error {
if d.imageBuild != nil {
return d.imageBuild(ctx, spec)
}
return d.runImageBuildNative(ctx, spec)
}
func (d *Daemon) runImageBuildNative(ctx context.Context, spec imageBuildSpec) (err error) {
if err := system.CopyFilePreferClone(spec.SourceRootfs, spec.RootfsPath); err != nil {
return err
}
if spec.Size != "" {
if err := resizeRootfs(spec.SourceRootfs, spec.RootfsPath, spec.Size); err != nil {
return err
}
}
vm, cleanup, err := d.startImageBuildVM(ctx, spec)
if err != nil {
return err
}
defer func() {
cleanupErr := cleanup(context.Background())
if cleanupErr != nil {
err = errors.Join(err, cleanupErr)
}
}()
sshAddress := vm.GuestIP + ":22"
if _, err := fmt.Fprintf(spec.BuildLog, "[image.build] waiting for ssh on %s\n", sshAddress); err != nil {
return err
}
waitCtx, cancel := context.WithTimeout(ctx, 60*time.Second)
defer cancel()
if err := guest.WaitForSSH(waitCtx, sshAddress, d.config.SSHKeyPath, time.Second); err != nil {
return err
}
client, err := guest.Dial(ctx, sshAddress, d.config.SSHKeyPath)
if err != nil {
return err
}
defer client.Close()
authorizedKey, err := guest.AuthorizedPublicKey(d.config.SSHKeyPath)
if err != nil {
return err
}
vsockAgentPath, err := d.vsockAgentBinary()
if err != nil {
return err
}
helperBytes, err := os.ReadFile(vsockAgentPath)
if err != nil {
return err
}
if err := writeBuildLog(spec.BuildLog, "installing vsock agent"); err != nil {
return err
}
if err := client.UploadFile(ctx, vsockagent.GuestInstallPath, 0o755, helperBytes, spec.BuildLog); err != nil {
return err
}
if err := writeBuildLog(spec.BuildLog, "configuring guest"); err != nil {
return err
}
if err := client.RunScript(ctx, buildProvisionScript(vm.Name, d.config.DefaultDNS, string(authorizedKey), spec.Packages, spec.InstallDocker), spec.BuildLog); err != nil {
return err
}
if strings.TrimSpace(spec.ModulesDir) != "" {
if err := writeBuildLog(spec.BuildLog, "copying kernel modules"); err != nil {
return err
}
if err := client.StreamTar(ctx, spec.ModulesDir, buildModulesCommand(filepath.Base(spec.ModulesDir)), spec.BuildLog); err != nil {
return err
}
}
if err := writeBuildLog(spec.BuildLog, "shutting down guest"); err != nil {
return err
}
if err := client.RunScript(ctx, "set -e\nsync\n", spec.BuildLog); err != nil {
return err
}
return d.shutdownImageBuildVM(ctx, vm)
}
func resizeRootfs(baseRootfs, rootfsPath, sizeSpec string) error {
sizeBytes, err := model.ParseSize(sizeSpec)
if err != nil {
return err
}
info, err := os.Stat(baseRootfs)
if err != nil {
return err
}
if sizeBytes < info.Size() {
return fmt.Errorf("size must be >= base image size")
}
return system.ResizeExt4Image(context.Background(), system.NewRunner(), rootfsPath, sizeBytes)
}
func (d *Daemon) startImageBuildVM(ctx context.Context, spec imageBuildSpec) (imageBuildVM, func(context.Context) error, error) {
if err := d.ensureBridge(ctx); err != nil {
return imageBuildVM{}, nil, err
}
if err := d.ensureSocketDir(); err != nil {
return imageBuildVM{}, nil, err
}
fcPath, err := d.firecrackerBinary()
if err != nil {
return imageBuildVM{}, nil, err
}
shortID := system.ShortID(spec.ID)
guestIP, err := d.store.NextGuestIP(ctx, bridgePrefix(d.config.BridgeIP))
if err != nil {
return imageBuildVM{}, nil, err
}
vm := imageBuildVM{
Name: "image-build-" + shortID,
GuestIP: guestIP,
TapDevice: "tap-img-" + shortID,
APISock: filepath.Join(d.layout.RuntimeDir, "img-"+shortID+".sock"),
}
if err := os.RemoveAll(vm.APISock); err != nil && !os.IsNotExist(err) {
return imageBuildVM{}, nil, err
}
if err := d.createTap(ctx, vm.TapDevice); err != nil {
return imageBuildVM{}, nil, err
}
if err := hostnat.Ensure(ctx, d.runner, vm.GuestIP, vm.TapDevice, true); err != nil {
_, _ = d.runner.RunSudo(ctx, "ip", "link", "del", vm.TapDevice)
return imageBuildVM{}, nil, err
}
firecrackerCtx := context.Background()
machine, err := firecracker.NewMachine(firecrackerCtx, firecracker.MachineConfig{
BinaryPath: fcPath,
VMID: spec.ID,
SocketPath: vm.APISock,
LogPath: spec.RootfsPath + ".firecracker.log",
MetricsPath: filepath.Join(filepath.Dir(spec.RootfsPath), "metrics.json"),
KernelImagePath: spec.KernelPath,
InitrdPath: spec.InitrdPath,
KernelArgs: system.BuildBootArgsWithKernelIP(vm.Name, vm.GuestIP, d.config.BridgeIP, d.config.DefaultDNS),
Drives: []firecracker.DriveConfig{{
ID: "rootfs",
Path: spec.RootfsPath,
ReadOnly: false,
IsRoot: true,
}},
TapDevice: vm.TapDevice,
VCPUCount: model.DefaultVCPUCount,
MemoryMiB: model.DefaultMemoryMiB,
Logger: d.logger,
})
if err != nil {
_ = hostnat.Ensure(ctx, d.runner, vm.GuestIP, vm.TapDevice, false)
_, _ = d.runner.RunSudo(ctx, "ip", "link", "del", vm.TapDevice)
return imageBuildVM{}, nil, err
}
if err := machine.Start(firecrackerCtx); err != nil {
_ = hostnat.Ensure(ctx, d.runner, vm.GuestIP, vm.TapDevice, false)
_, _ = d.runner.RunSudo(ctx, "ip", "link", "del", vm.TapDevice)
return imageBuildVM{}, nil, err
}
vm.PID = d.resolveFirecrackerPID(firecrackerCtx, machine, vm.APISock)
if err := d.ensureSocketAccess(ctx, vm.APISock, "firecracker api socket"); err != nil {
_ = d.killVMProcess(context.Background(), vm.PID)
_ = hostnat.Ensure(ctx, d.runner, vm.GuestIP, vm.TapDevice, false)
_, _ = d.runner.RunSudo(ctx, "ip", "link", "del", vm.TapDevice)
return imageBuildVM{}, nil, err
}
cleanup := func(cleanupCtx context.Context) error {
if vm.PID > 0 && system.ProcessRunning(vm.PID, vm.APISock) {
_ = d.killVMProcess(cleanupCtx, vm.PID)
_ = d.waitForExit(cleanupCtx, vm.PID, vm.APISock, 10*time.Second)
}
_ = hostnat.Ensure(cleanupCtx, d.runner, vm.GuestIP, vm.TapDevice, false)
if vm.TapDevice != "" {
_, _ = d.runner.RunSudo(cleanupCtx, "ip", "link", "del", vm.TapDevice)
}
if vm.APISock != "" {
_ = os.Remove(vm.APISock)
}
return nil
}
return vm, cleanup, nil
}
func (d *Daemon) shutdownImageBuildVM(ctx context.Context, vm imageBuildVM) error {
buildVM := model.VMRecord{Runtime: model.VMRuntime{APISockPath: vm.APISock}}
if err := d.sendCtrlAltDel(ctx, buildVM); err != nil {
return err
}
return d.waitForExit(ctx, vm.PID, vm.APISock, 15*time.Second)
}
func buildProvisionScript(vmName, dnsServer, authorizedKey string, packages []string, installDocker bool) string {
var script bytes.Buffer
script.WriteString("set -euo pipefail\n")
fmt.Fprintf(&script, "printf 'nameserver %%s\\n' %s > /etc/resolv.conf\n", shellQuote(dnsServer))
fmt.Fprintf(&script, "printf '%%s\\n' %s > /etc/hostname\n", shellQuote(vmName))
fmt.Fprintf(&script, "printf '127.0.0.1 localhost\\n127.0.1.1 %%s\\n' %s > /etc/hosts\n", shellQuote(vmName))
script.WriteString("touch /etc/fstab\n")
script.WriteString("sed -i '\\|^/dev/vdb[[:space:]]\\+/home[[:space:]]|d; \\|^/dev/vdc[[:space:]]\\+/var[[:space:]]|d' /etc/fstab\n")
script.WriteString("if ! grep -q '^tmpfs /run ' /etc/fstab; then echo 'tmpfs /run tmpfs defaults,nodev,nosuid,mode=0755 0 0' >> /etc/fstab; fi\n")
script.WriteString("if ! grep -q '^tmpfs /tmp ' /etc/fstab; then echo 'tmpfs /tmp tmpfs defaults,nodev,nosuid,mode=1777 0 0' >> /etc/fstab; fi\n")
appendAuthorizedKeySetup(&script, authorizedKey)
script.WriteString("apt-get update\n")
script.WriteString("DEBIAN_FRONTEND=noninteractive apt-get -y upgrade\n")
fmt.Fprintf(&script, "PACKAGES=%s\n", shellArray(packages))
script.WriteString("DEBIAN_FRONTEND=noninteractive apt-get -y install \"${PACKAGES[@]}\"\n")
appendGuestNetworkSetup(&script)
appendMiseSetup(&script)
appendOpenCodeServiceSetup(&script)
appendTmuxSetup(&script)
appendVSockPingSetup(&script)
if installDocker {
script.WriteString("DEBIAN_FRONTEND=noninteractive apt-get -y remove containerd || true\n")
script.WriteString("if ! DEBIAN_FRONTEND=noninteractive apt-get -y install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin; then\n")
script.WriteString(" DEBIAN_FRONTEND=noninteractive apt-get -y install docker.io\n")
script.WriteString("fi\n")
script.WriteString("if command -v systemctl >/dev/null 2>&1; then systemctl enable --now docker || true; fi\n")
}
appendGuestCleanup(&script)
script.WriteString("git config --system init.defaultBranch main\n")
return script.String()
}
func appendAuthorizedKeySetup(script *bytes.Buffer, authorizedKey string) {
script.WriteString("mkdir -p /root/.ssh\n")
script.WriteString("chmod 700 /root/.ssh\n")
script.WriteString("cat > /root/.ssh/authorized_keys <<'EOF'\n")
script.WriteString(strings.TrimSpace(authorizedKey))
script.WriteString("\nEOF\n")
script.WriteString("chmod 600 /root/.ssh/authorized_keys\n")
}
func buildModulesCommand(modulesBase string) string {
return fmt.Sprintf("bash -se <<'EOF'\nset -euo pipefail\nmkdir -p /lib/modules\ntar -C /lib/modules -xf -\ndepmod -a %s\nmkdir -p /etc/modules-load.d\nprintf 'nf_tables\\nnft_chain_nat\\nveth\\nbr_netfilter\\noverlay\\n' > /etc/modules-load.d/docker-netfilter.conf\nmkdir -p /etc/sysctl.d\ncat > /etc/sysctl.d/99-docker.conf <<'SYSCTL'\nnet.bridge.bridge-nf-call-iptables = 1\nnet.bridge.bridge-nf-call-ip6tables = 1\nnet.ipv4.ip_forward = 1\nSYSCTL\nsysctl --system >/dev/null 2>&1 || true\nEOF", shellQuote(modulesBase))
}
func appendMiseSetup(script *bytes.Buffer) {
fmt.Fprintf(script, "curl -fsSL https://mise.run | MISE_INSTALL_PATH=%s MISE_VERSION=%s sh\n", shellQuote(defaultMiseInstallPath), shellQuote(defaultMiseVersion))
fmt.Fprintf(script, "%s use -g %s\n", shellQuote(defaultMiseInstallPath), shellQuote(defaultOpenCodeTool))
fmt.Fprintf(script, "%s reshim\n", shellQuote(defaultMiseInstallPath))
fmt.Fprintf(script, "if [[ ! -e %s ]]; then echo 'opencode shim not found after mise install' >&2; exit 1; fi\n", shellQuote(opencode.ShimPath))
fmt.Fprintf(script, "ln -snf %s %s\n", shellQuote(opencode.ShimPath), shellQuote(opencode.GuestBinaryPath))
script.WriteString("mkdir -p /etc/profile.d\n")
script.WriteString("cat > /etc/profile.d/mise.sh <<'EOF'\n")
fmt.Fprintf(script, "if [ -n \"${BASH_VERSION:-}\" ] && [ -x %s ]; then\n", shellQuote(defaultMiseInstallPath))
fmt.Fprintf(script, " %s\n", defaultMiseActivateLine)
script.WriteString("fi\n")
script.WriteString("EOF\n")
script.WriteString("chmod 0644 /etc/profile.d/mise.sh\n")
appendLineIfMissing(script, "/etc/bash.bashrc", defaultMiseActivateLine)
}
func appendGuestNetworkSetup(script *bytes.Buffer) {
script.WriteString("mkdir -p /usr/local/libexec /etc/systemd/system\n")
script.WriteString("cat > " + guestnet.GuestScriptPath + " <<'EOF'\n")
script.WriteString(guestnet.BootstrapScript())
script.WriteString("EOF\n")
script.WriteString("chmod 0755 " + guestnet.GuestScriptPath + "\n")
script.WriteString("cat > /etc/systemd/system/" + guestnet.SystemdServiceName + " <<'EOF'\n")
script.WriteString(guestnet.SystemdServiceUnit())
script.WriteString("EOF\n")
script.WriteString("chmod 0644 /etc/systemd/system/" + guestnet.SystemdServiceName + "\n")
script.WriteString("if command -v systemctl >/dev/null 2>&1; then systemctl daemon-reload || true; systemctl enable --now " + guestnet.SystemdServiceName + " || true; fi\n")
}
func appendOpenCodeServiceSetup(script *bytes.Buffer) {
script.WriteString("mkdir -p /etc/systemd/system\n")
script.WriteString("cat > /etc/systemd/system/" + opencode.ServiceName + " <<'EOF'\n")
script.WriteString(opencode.ServiceUnit())
script.WriteString("EOF\n")
script.WriteString("chmod 0644 /etc/systemd/system/" + opencode.ServiceName + "\n")
script.WriteString("if command -v systemctl >/dev/null 2>&1; then systemctl daemon-reload || true; systemctl enable --now " + opencode.ServiceName + " || true; fi\n")
}
func appendTmuxSetup(script *bytes.Buffer) {
fmt.Fprintf(script, "TMUX_PLUGIN_DIR=%s\n", shellQuote(defaultTMUXPluginDir))
fmt.Fprintf(script, "TMUX_RESURRECT_DIR=%s\n", shellQuote(defaultTMUXResurrectDir))
script.WriteString("mkdir -p \"$TMUX_PLUGIN_DIR\" \"$TMUX_RESURRECT_DIR\"\n")
appendGitRepo(script, "$TMUX_PLUGIN_DIR/tpm", defaultTPMRepo)
appendGitRepo(script, "$TMUX_PLUGIN_DIR/tmux-resurrect", defaultResurrectRepo)
appendGitRepo(script, "$TMUX_PLUGIN_DIR/tmux-continuum", defaultContinuumRepo)
script.WriteString("TMUX_CONF=/root/.tmux.conf\n")
fmt.Fprintf(script, "TMUX_MANAGED_START=%s\n", shellQuote(tmuxManagedBlockStart))
fmt.Fprintf(script, "TMUX_MANAGED_END=%s\n", shellQuote(tmuxManagedBlockEnd))
script.WriteString("tmp_tmux_conf=$(mktemp)\n")
script.WriteString("if [[ -f \"$TMUX_CONF\" ]]; then\n")
script.WriteString(" awk -v begin=\"$TMUX_MANAGED_START\" -v end=\"$TMUX_MANAGED_END\" '$0 == begin { skip = 1; next } $0 == end { skip = 0; next } !skip { print }' \"$TMUX_CONF\" > \"$tmp_tmux_conf\"\n")
script.WriteString("else\n")
script.WriteString(" : > \"$tmp_tmux_conf\"\n")
script.WriteString("fi\n")
script.WriteString("if [[ -s \"$tmp_tmux_conf\" ]]; then\n")
script.WriteString(" printf '\\n' >> \"$tmp_tmux_conf\"\n")
script.WriteString("fi\n")
script.WriteString("cat >> \"$tmp_tmux_conf\" <<'EOF'\n")
script.WriteString(tmuxManagedBlockStart + "\n")
script.WriteString("set -g @plugin 'tmux-plugins/tpm'\n")
script.WriteString("set -g @plugin 'tmux-plugins/tmux-resurrect'\n")
script.WriteString("set -g @plugin 'tmux-plugins/tmux-continuum'\n")
script.WriteString("set -g @continuum-save-interval '15'\n")
script.WriteString("set -g @continuum-restore 'off'\n")
script.WriteString("set -g @resurrect-dir '/root/.tmux/resurrect'\n")
script.WriteString("run '~/.tmux/plugins/tpm/tpm'\n")
script.WriteString(tmuxManagedBlockEnd + "\n")
script.WriteString("EOF\n")
script.WriteString("mv \"$tmp_tmux_conf\" \"$TMUX_CONF\"\n")
script.WriteString("chmod 0644 \"$TMUX_CONF\"\n")
}
func appendVSockPingSetup(script *bytes.Buffer) {
script.WriteString("mkdir -p /etc/modules-load.d /etc/systemd/system\n")
script.WriteString("cat > /etc/modules-load.d/banger-vsock.conf <<'EOF'\n")
script.WriteString(vsockagent.ModulesLoadConfig())
script.WriteString("EOF\n")
script.WriteString("chmod 0644 /etc/modules-load.d/banger-vsock.conf\n")
script.WriteString("cat > /etc/systemd/system/" + vsockagent.ServiceName + " <<'EOF'\n")
script.WriteString(vsockagent.ServiceUnit())
script.WriteString("EOF\n")
script.WriteString("chmod 0644 /etc/systemd/system/" + vsockagent.ServiceName + "\n")
script.WriteString("if command -v systemctl >/dev/null 2>&1; then systemctl daemon-reload || true; systemctl enable --now " + vsockagent.ServiceName + " || true; fi\n")
}
func appendGitRepo(script *bytes.Buffer, dir, repo string) {
fmt.Fprintf(script, "if [[ -d \"%s/.git\" ]]; then\n", dir)
fmt.Fprintf(script, " git -C \"%s\" fetch --depth 1 origin\n", dir)
fmt.Fprintf(script, " git -C \"%s\" reset --hard FETCH_HEAD\n", dir)
script.WriteString("else\n")
fmt.Fprintf(script, " rm -rf \"%s\"\n", dir)
fmt.Fprintf(script, " git clone --depth 1 %s \"%s\"\n", shellQuote(repo), dir)
script.WriteString("fi\n")
}
func appendGuestCleanup(script *bytes.Buffer) {
script.WriteString("rm -f /root/get-docker /root/get-docker.sh /tmp/get-docker /tmp/get-docker.sh\n")
}
func appendLineIfMissing(script *bytes.Buffer, path, line string) {
fmt.Fprintf(script, "touch %s\n", shellQuote(path))
fmt.Fprintf(script, "if ! grep -Fqx %s %s; then\n", shellQuote(line), shellQuote(path))
fmt.Fprintf(script, " printf '\\n%%s\\n' %s >> %s\n", shellQuote(line), shellQuote(path))
script.WriteString("fi\n")
}
func shellArray(values []string) string {
quoted := make([]string, 0, len(values))
for _, value := range values {
quoted = append(quoted, shellQuote(value))
}
return "(" + strings.Join(quoted, " ") + ")"
}
func shellQuote(value string) string {
return "'" + strings.ReplaceAll(value, "'", `'"'"'`) + "'"
}
func writeBuildLog(w io.Writer, message string) error {
if w == nil {
return nil
}
_, err := fmt.Fprintf(w, "[image.build] %s\n", message)
return err
}
func packagesHash(lines []string) string {
return imagepreset.Hash(lines)
}