banger/internal/daemon/imagebuild.go
Thales Maciel 942d242c03
Move avoidable daemon shell-outs into Go
Reduce the control plane's dependency on helper scripts while keeping the hard Linux integration points in the approved shell-out layer.

Replace the bash-driven image build path with a native Go builder that clones and optionally resizes the rootfs, boots a temporary Firecracker VM, provisions the guest over SSH, installs packages and modules, and preserves the package-manifest sidecar.

Also replace a few small convenience shell-outs with Go helpers: read process stats from /proc, use os.Truncate for ext4 image growth, add file-clone and normalized-line helpers, drop the sh -c work-disk flattening path, and launch Firecracker via a direct sudo command.

Add tests for the new SSH/archive and system helpers, plus a policy test that keeps os/exec imports confined to cli/firecracker/system. Update the docs to describe customize.sh as a manual helper rather than the daemon's image-build backend.

Validated with go mod tidy, go test ./..., and make build.
2026-03-17 17:13:07 -03:00

275 lines
9.4 KiB
Go

package daemon
import (
"bytes"
"context"
"crypto/sha256"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"time"
"banger/internal/firecracker"
"banger/internal/guest"
"banger/internal/hostnat"
"banger/internal/model"
"banger/internal/system"
)
type imageBuildSpec struct {
ID string
Name string
BaseRootfs string
RootfsPath string
BuildLog io.Writer
KernelPath string
InitrdPath string
ModulesDir string
PackagesPath string
InstallDocker bool
Size string
}
type imageBuildVM struct {
Name string
GuestIP string
TapDevice string
APISock string
PID int
}
func (d *Daemon) runImageBuild(ctx context.Context, spec imageBuildSpec) error {
if d.imageBuild != nil {
return d.imageBuild(ctx, spec)
}
return d.runImageBuildNative(ctx, spec)
}
func (d *Daemon) runImageBuildNative(ctx context.Context, spec imageBuildSpec) (err error) {
packages, err := system.ReadNormalizedLines(spec.PackagesPath)
if err != nil {
return err
}
if err := system.CopyFilePreferClone(spec.BaseRootfs, spec.RootfsPath); err != nil {
return err
}
if spec.Size != "" {
if err := resizeRootfs(spec.BaseRootfs, spec.RootfsPath, spec.Size); err != nil {
return err
}
}
vm, cleanup, err := d.startImageBuildVM(ctx, spec)
if err != nil {
return err
}
defer func() {
cleanupErr := cleanup(context.Background())
if cleanupErr != nil {
err = errors.Join(err, cleanupErr)
}
}()
sshAddress := vm.GuestIP + ":22"
if _, err := fmt.Fprintf(spec.BuildLog, "[image.build] waiting for ssh on %s\n", sshAddress); err != nil {
return err
}
waitCtx, cancel := context.WithTimeout(ctx, 60*time.Second)
defer cancel()
if err := guest.WaitForSSH(waitCtx, sshAddress, d.config.SSHKeyPath, time.Second); err != nil {
return err
}
client, err := guest.Dial(ctx, sshAddress, d.config.SSHKeyPath)
if err != nil {
return err
}
defer client.Close()
if err := writeBuildLog(spec.BuildLog, "configuring guest"); err != nil {
return err
}
if err := client.RunScript(ctx, buildProvisionScript(vm.Name, d.config.DefaultDNS, packages, spec.InstallDocker), spec.BuildLog); err != nil {
return err
}
if strings.TrimSpace(spec.ModulesDir) != "" {
if err := writeBuildLog(spec.BuildLog, "copying kernel modules"); err != nil {
return err
}
if err := client.StreamTar(ctx, spec.ModulesDir, buildModulesCommand(filepath.Base(spec.ModulesDir)), spec.BuildLog); err != nil {
return err
}
}
if err := writeBuildLog(spec.BuildLog, "shutting down guest"); err != nil {
return err
}
if err := client.RunScript(ctx, "set -e\nsync\n", spec.BuildLog); err != nil {
return err
}
return d.shutdownImageBuildVM(ctx, vm)
}
func resizeRootfs(baseRootfs, rootfsPath, sizeSpec string) error {
sizeBytes, err := model.ParseSize(sizeSpec)
if err != nil {
return err
}
info, err := os.Stat(baseRootfs)
if err != nil {
return err
}
if sizeBytes < info.Size() {
return fmt.Errorf("size must be >= base image size")
}
return system.ResizeExt4Image(context.Background(), system.NewRunner(), rootfsPath, sizeBytes)
}
func (d *Daemon) startImageBuildVM(ctx context.Context, spec imageBuildSpec) (imageBuildVM, func(context.Context) error, error) {
if err := d.ensureBridge(ctx); err != nil {
return imageBuildVM{}, nil, err
}
if err := d.ensureSocketDir(); err != nil {
return imageBuildVM{}, nil, err
}
fcPath, err := d.firecrackerBinary()
if err != nil {
return imageBuildVM{}, nil, err
}
shortID := system.ShortID(spec.ID)
guestIP, err := d.store.NextGuestIP(ctx, bridgePrefix(d.config.BridgeIP))
if err != nil {
return imageBuildVM{}, nil, err
}
vm := imageBuildVM{
Name: "image-build-" + shortID,
GuestIP: guestIP,
TapDevice: "tap-img-" + shortID,
APISock: filepath.Join(d.layout.RuntimeDir, "img-"+shortID+".sock"),
}
if err := os.RemoveAll(vm.APISock); err != nil && !os.IsNotExist(err) {
return imageBuildVM{}, nil, err
}
if err := d.createTap(ctx, vm.TapDevice); err != nil {
return imageBuildVM{}, nil, err
}
if err := hostnat.Ensure(ctx, d.runner, vm.GuestIP, vm.TapDevice, true); err != nil {
_, _ = d.runner.RunSudo(ctx, "ip", "link", "del", vm.TapDevice)
return imageBuildVM{}, nil, err
}
firecrackerCtx := context.Background()
machine, err := firecracker.NewMachine(firecrackerCtx, firecracker.MachineConfig{
BinaryPath: fcPath,
VMID: spec.ID,
SocketPath: vm.APISock,
LogPath: spec.RootfsPath + ".firecracker.log",
MetricsPath: filepath.Join(filepath.Dir(spec.RootfsPath), "metrics.json"),
KernelImagePath: spec.KernelPath,
InitrdPath: spec.InitrdPath,
KernelArgs: system.BuildBootArgs(vm.Name, vm.GuestIP, d.config.BridgeIP, d.config.DefaultDNS),
RootDrivePath: spec.RootfsPath,
TapDevice: vm.TapDevice,
VCPUCount: model.DefaultVCPUCount,
MemoryMiB: model.DefaultMemoryMiB,
Logger: d.logger,
})
if err != nil {
_ = hostnat.Ensure(ctx, d.runner, vm.GuestIP, vm.TapDevice, false)
_, _ = d.runner.RunSudo(ctx, "ip", "link", "del", vm.TapDevice)
return imageBuildVM{}, nil, err
}
if err := machine.Start(firecrackerCtx); err != nil {
_ = hostnat.Ensure(ctx, d.runner, vm.GuestIP, vm.TapDevice, false)
_, _ = d.runner.RunSudo(ctx, "ip", "link", "del", vm.TapDevice)
return imageBuildVM{}, nil, err
}
vm.PID = d.resolveFirecrackerPID(firecrackerCtx, machine, vm.APISock)
if err := d.ensureSocketAccess(ctx, vm.APISock); err != nil {
_ = d.killVMProcess(context.Background(), vm.PID)
_ = hostnat.Ensure(ctx, d.runner, vm.GuestIP, vm.TapDevice, false)
_, _ = d.runner.RunSudo(ctx, "ip", "link", "del", vm.TapDevice)
return imageBuildVM{}, nil, err
}
cleanup := func(cleanupCtx context.Context) error {
if vm.PID > 0 && system.ProcessRunning(vm.PID, vm.APISock) {
_ = d.killVMProcess(cleanupCtx, vm.PID)
_ = d.waitForExit(cleanupCtx, vm.PID, vm.APISock, 10*time.Second)
}
_ = hostnat.Ensure(cleanupCtx, d.runner, vm.GuestIP, vm.TapDevice, false)
if vm.TapDevice != "" {
_, _ = d.runner.RunSudo(cleanupCtx, "ip", "link", "del", vm.TapDevice)
}
if vm.APISock != "" {
_ = os.Remove(vm.APISock)
}
return nil
}
return vm, cleanup, nil
}
func (d *Daemon) shutdownImageBuildVM(ctx context.Context, vm imageBuildVM) error {
buildVM := model.VMRecord{Runtime: model.VMRuntime{APISockPath: vm.APISock}}
if err := d.sendCtrlAltDel(ctx, buildVM); err != nil {
return err
}
return d.waitForExit(ctx, vm.PID, vm.APISock, 15*time.Second)
}
func buildProvisionScript(vmName, dnsServer string, packages []string, installDocker bool) string {
var script bytes.Buffer
script.WriteString("set -euo pipefail\n")
fmt.Fprintf(&script, "printf 'nameserver %%s\\n' %s > /etc/resolv.conf\n", shellQuote(dnsServer))
fmt.Fprintf(&script, "printf '%%s\\n' %s > /etc/hostname\n", shellQuote(vmName))
fmt.Fprintf(&script, "printf '127.0.0.1 localhost\\n127.0.1.1 %%s\\n' %s > /etc/hosts\n", shellQuote(vmName))
script.WriteString("touch /etc/fstab\n")
script.WriteString("sed -i '\\|^/dev/vdb[[:space:]]\\+/home[[:space:]]|d; \\|^/dev/vdc[[:space:]]\\+/var[[:space:]]|d' /etc/fstab\n")
script.WriteString("if ! grep -q '^tmpfs /run ' /etc/fstab; then echo 'tmpfs /run tmpfs defaults,nodev,nosuid,mode=0755 0 0' >> /etc/fstab; fi\n")
script.WriteString("if ! grep -q '^tmpfs /tmp ' /etc/fstab; then echo 'tmpfs /tmp tmpfs defaults,nodev,nosuid,mode=1777 0 0' >> /etc/fstab; fi\n")
script.WriteString("apt-get update\n")
script.WriteString("DEBIAN_FRONTEND=noninteractive apt-get -y upgrade\n")
fmt.Fprintf(&script, "PACKAGES=%s\n", shellArray(packages))
script.WriteString("DEBIAN_FRONTEND=noninteractive apt-get -y install \"${PACKAGES[@]}\"\n")
if installDocker {
script.WriteString("DEBIAN_FRONTEND=noninteractive apt-get -y remove containerd || true\n")
script.WriteString("if ! DEBIAN_FRONTEND=noninteractive apt-get -y install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin; then\n")
script.WriteString(" DEBIAN_FRONTEND=noninteractive apt-get -y install docker.io\n")
script.WriteString("fi\n")
script.WriteString("if command -v systemctl >/dev/null 2>&1; then systemctl enable --now docker || true; fi\n")
}
script.WriteString("git config --system init.defaultBranch main\n")
return script.String()
}
func buildModulesCommand(modulesBase string) string {
return fmt.Sprintf("bash -se <<'EOF'\nset -euo pipefail\nmkdir -p /lib/modules\ntar -C /lib/modules -xf -\ndepmod -a %s\nmkdir -p /etc/modules-load.d\nprintf 'nf_tables\\nnft_chain_nat\\nveth\\nbr_netfilter\\noverlay\\n' > /etc/modules-load.d/docker-netfilter.conf\nmkdir -p /etc/sysctl.d\ncat > /etc/sysctl.d/99-docker.conf <<'SYSCTL'\nnet.bridge.bridge-nf-call-iptables = 1\nnet.bridge.bridge-nf-call-ip6tables = 1\nnet.ipv4.ip_forward = 1\nSYSCTL\nsysctl --system >/dev/null 2>&1 || true\nEOF", shellQuote(modulesBase))
}
func shellArray(values []string) string {
quoted := make([]string, 0, len(values))
for _, value := range values {
quoted = append(quoted, shellQuote(value))
}
return "(" + strings.Join(quoted, " ") + ")"
}
func shellQuote(value string) string {
return "'" + strings.ReplaceAll(value, "'", `'"'"'`) + "'"
}
func writeBuildLog(w io.Writer, message string) error {
if w == nil {
return nil
}
_, err := fmt.Fprintf(w, "[image.build] %s\n", message)
return err
}
func packagesHash(lines []string) string {
sum := sha256.Sum256([]byte(strings.Join(lines, "\n") + "\n"))
return fmt.Sprintf("%x", sum)
}