From 7f402e74d3f538137e26970af876bbf04c1e508e Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Tue, 10 Mar 2026 00:33:32 +0000 Subject: [PATCH] Retry updater container creation on SMB mount failure The CIFS volume mount occurs during container creation, and the SMB service inside the storage container may not be fully ready even though port 445 is listening. Instead of relying on a sleep after port detection, retry the container creation up to 3 times with a 2-second backoff when a mount error occurs. Also removes the 1-second sleep in waitForPort that was a workaround for the same issue. --- internal/infra/updater.go | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/internal/infra/updater.go b/internal/infra/updater.go index dd3a4955..a565553b 100644 --- a/internal/infra/updater.go +++ b/internal/infra/updater.go @@ -116,9 +116,20 @@ func NewUpdater(ctx context.Context, cli *client.Client, net *Networks, params * }, } - updaterContainer, err := cli.ContainerCreate(ctx, containerCfg, hostCfg, netCfg, nil, "") - if err != nil { - return nil, fmt.Errorf("failed to create updater container: %w", err) + // Retry container creation to handle transient SMB mount failures. + // The CIFS volumes are mounted during container creation, and the SMB service + // may not be fully ready even though its port is listening. + var updaterContainer container.CreateResponse + for attempt := range 3 { + updaterContainer, err = cli.ContainerCreate(ctx, containerCfg, hostCfg, netCfg, nil, "") + if err == nil { + break + } + if !strings.Contains(err.Error(), "mount") || attempt == 2 { + return nil, fmt.Errorf("failed to create updater container: %w", err) + } + log.Printf("retrying updater container creation due to mount error: %v", err) + time.Sleep(2 * time.Second) } updater := &Updater{ @@ -568,10 +579,6 @@ func waitForPort(ctx context.Context, cli *client.Client, containerID string, po if execInspect.ExitCode == 0 { // port is listening log.Printf(" port %d is listening after %d attempts", port, i+1) - - // in a few instances, the port is open but the service isn't yet ready for connections - // no more reliable method has been found, other than a short delay - time.Sleep(sleepDuration) return nil }