veertuinc · NorseGaud · Jan 14, 2026 · Jan 13, 2026 · Jan 13, 2026 · Jan 13, 2026
diff --git a/README.md b/README.md
@@ -45,7 +45,7 @@ Accepted values for booleans are: "1", "t", "T", "true", "TRUE", "True", "0", "f
 | ANKA_CLOUD_CONTROLLER_URL | ✅ | String | Anka Build Cloud's Controller URL. Inlcuding `http[s]` prefix. Port optional |
 | ANKA_CLOUD_TEMPLATE_ID | ✅* | String | VM Template ID to use. Takes precedence over `ANKA_CLOUD_TEMPLATE_NAME`. **Required if `ANKA_CLOUD_TEMPLATE_NAME` not provided** |
 | ANKA_CLOUD_TEMPLATE_NAME | ✅* | String | VM Template Name to use. Since template names are not guaranteed to be unique, it is recommended to use `ANKA_CLOUD_TEMPLATE_ID`. **Required if `ANKA_CLOUD_TEMPLATE_ID` not provided** |
-| ANKA_CLOUD_DEBUG |     ❌ | Boolean | Output Anka Cloud debug info |
+| ANKA_CLOUD_DEBUG | ❌ | Boolean | Output Anka Cloud debug info |
 | ANKA_CLOUD_TEMPLATE_TAG | ❌ | String | Template tag to use |
 | ANKA_CLOUD_NODE_ID | ❌ | String | Run VM on this specific node |
 | ANKA_CLOUD_PRIORITY | ❌ | Number | Priority in range 1-10000 (lower is more urgent) |

diff --git a/RELEASING.md b/RELEASING.md
@@ -4,4 +4,4 @@
 2. Merge into main branch.
 3. Create a new release on Github.
 4. Build and Watch https://jenkins/job/anka-cloud-gitlab-executor-release/ to see the release build.
-5. Update the release description and title if needed. The workflow will attach artifacts to the release.
+5. Update the release description and title if needed. The workflow will attach artifacts to the release.
diff --git a/internal/ankacloud/client.go b/internal/ankacloud/client.go
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"context"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"io"
 	"net/http"
@@ -18,6 +19,9 @@ import (
 const (
 	defaultMaxIdleConnsPerHost = 20
 	defaultRequestTimeout      = 10 * time.Second
+	DefaultRetryAttempts       = 3
+	DefaultRetryInitialDelay   = 5 * time.Second
+	DefaultRetryMaxDelay       = 30 * time.Second
 )
 
 type APIClient struct {
@@ -26,6 +30,86 @@ type APIClient struct {
 	CustomHttpHeaders map[string]string
 }
 
+// RetryConfig holds configuration for retry behavior with exponential backoff
+type RetryConfig struct {
+	MaxAttempts  int
+	InitialDelay time.Duration
+	MaxDelay     time.Duration
+}
+
+// DefaultRetryConfig returns the default retry configuration
+func DefaultRetryConfig() RetryConfig {
+	return RetryConfig{
+		MaxAttempts:  DefaultRetryAttempts,
+		InitialDelay: DefaultRetryInitialDelay,
+		MaxDelay:     DefaultRetryMaxDelay,
+	}
+}
+
+// IsRetryableError checks if an error is retryable (timeout or transient errors)
+func IsRetryableError(err error) bool {
+	if err == nil {
+		return false
+	}
+	// Check for TransientError (which wraps timeout errors)
+	if errors.Is(err, gitlab.ErrTransient) {
+		return true
+	}
+	// Check for url.Error timeout
+	var urlErr *url.Error
+	if errors.As(err, &urlErr) && urlErr.Timeout() {
+		return true
+	}
+	// Also check for common timeout error messages
+	errStr := err.Error()
+	return strings.Contains(errStr, "deadline exceeded") ||
+		strings.Contains(errStr, "Client.Timeout")
+}
+
+// WithRetry executes the given operation with retry logic using exponential backoff
+func WithRetry[T any](ctx context.Context, config RetryConfig, operation func() (T, error)) (T, error) {
+	var zero T
+	var lastErr error
+	delay := config.InitialDelay
+
+	for attempt := 1; attempt <= config.MaxAttempts; attempt++ {
+		result, err := operation()
+		if err == nil {
+			return result, nil
+		}
+
+		lastErr = err
+
+		if !IsRetryableError(err) {
+			return zero, err
+		}
+
+		if attempt < config.MaxAttempts {
+			log.Printf("Request timed out (attempt %d/%d), retrying in %v...\n", attempt, config.MaxAttempts, delay)
+			select {
+			case <-ctx.Done():
+				return zero, ctx.Err()
+			case <-time.After(delay):
+			}
+			// Exponential backoff: double the delay for next attempt, capped at MaxDelay
+			delay *= 2
+			if delay > config.MaxDelay {
+				delay = config.MaxDelay
+			}
+		}
+	}
+
+	return zero, fmt.Errorf("operation failed after %d attempts: %w", config.MaxAttempts, lastErr)
+}
+
+// WithRetryNoResult executes the given operation with retry logic for operations that don't return a value
+func WithRetryNoResult(ctx context.Context, config RetryConfig, operation func() error) error {
+	_, err := WithRetry(ctx, config, func() (struct{}, error) {
+		return struct{}{}, operation()
+	})
+	return err
+}
+
 func (c *APIClient) parse(body []byte) (response, error) {
 	var r response
 	err := json.Unmarshal(body, &r)

diff --git a/internal/ankacloud/client_test.go b/internal/ankacloud/client_test.go
@@ -3,9 +3,14 @@ package ankacloud
 import (
 	"context"
 	"encoding/json"
+	"errors"
 	"net/http"
 	"net/http/httptest"
+	"net/url"
 	"testing"
+	"time"
+
+	"github.com/veertuinc/anka-cloud-gitlab-executor/internal/gitlab"
 )
 
 func TestCustomHeaders(t *testing.T) {
@@ -46,3 +51,245 @@ func TestCustomHeaders(t *testing.T) {
 		t.Error(err)
 	}
 }
+
+func TestIsRetryableError(t *testing.T) {
+	tests := []struct {
+		name     string
+		err      error
+		expected bool
+	}{
+		{
+			name:     "nil error",
+			err:      nil,
+			expected: false,
+		},
+		{
+			name:     "regular error",
+			err:      errors.New("some error"),
+			expected: false,
+		},
+		{
+			name:     "transient error",
+			err:      gitlab.TransientError(errors.New("timeout")),
+			expected: true,
+		},
+		{
+			name:     "error with deadline exceeded",
+			err:      errors.New("context deadline exceeded"),
+			expected: true,
+		},
+		{
+			name:     "error with Client.Timeout",
+			err:      errors.New("Client.Timeout exceeded while awaiting headers"),
+			expected: true,
+		},
+		{
+			name:     "wrapped deadline exceeded",
+			err:      errors.New("failed to terminate: context deadline exceeded (Client.Timeout exceeded)"),
+			expected: true,
+		},
+		{
+			name:     "url.Error with timeout",
+			err:      &url.Error{Op: "Get", URL: "http://test", Err: errors.New("timeout")},
+			expected: false, // url.Error.Timeout() returns false for generic errors
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := IsRetryableError(tt.err)
+			if result != tt.expected {
+				t.Errorf("IsRetryableError(%v) = %v, expected %v", tt.err, result, tt.expected)
+			}
+		})
+	}
+}
+
+func TestWithRetry_Success(t *testing.T) {
+	callCount := 0
+	config := RetryConfig{
+		MaxAttempts:  3,
+		InitialDelay: 10 * time.Millisecond,
+		MaxDelay:     100 * time.Millisecond,
+	}
+
+	result, err := WithRetry(context.Background(), config, func() (string, error) {
+		callCount++
+		return "success", nil
+	})
+
+	if err != nil {
+		t.Errorf("expected no error, got %v", err)
+	}
+	if result != "success" {
+		t.Errorf("expected 'success', got %q", result)
+	}
+	if callCount != 1 {
+		t.Errorf("expected 1 call, got %d", callCount)
+	}
+}
+
+func TestWithRetry_NonRetryableError(t *testing.T) {
+	callCount := 0
+	config := RetryConfig{
+		MaxAttempts:  3,
+		InitialDelay: 10 * time.Millisecond,
+		MaxDelay:     100 * time.Millisecond,
+	}
+
+	_, err := WithRetry(context.Background(), config, func() (string, error) {
+		callCount++
+		return "", errors.New("permanent error")
+	})
+
+	if err == nil {
+		t.Error("expected error, got nil")
+	}
+	if callCount != 1 {
+		t.Errorf("expected 1 call (no retry for non-retryable error), got %d", callCount)
+	}
+}
+
+func TestWithRetry_RetryableError_EventualSuccess(t *testing.T) {
+	callCount := 0
+	config := RetryConfig{
+		MaxAttempts:  3,
+		InitialDelay: 10 * time.Millisecond,
+		MaxDelay:     100 * time.Millisecond,
+	}
+
+	result, err := WithRetry(context.Background(), config, func() (string, error) {
+		callCount++
+		if callCount < 3 {
+			return "", gitlab.TransientError(errors.New("timeout"))
+		}
+		return "success", nil
+	})
+
+	if err != nil {
+		t.Errorf("expected no error, got %v", err)
+	}
+	if result != "success" {
+		t.Errorf("expected 'success', got %q", result)
+	}
+	if callCount != 3 {
+		t.Errorf("expected 3 calls, got %d", callCount)
+	}
+}
+
+func TestWithRetry_RetryableError_AllAttemptsFail(t *testing.T) {
+	callCount := 0
+	config := RetryConfig{
+		MaxAttempts:  3,
+		InitialDelay: 10 * time.Millisecond,
+		MaxDelay:     100 * time.Millisecond,
+	}
+
+	_, err := WithRetry(context.Background(), config, func() (string, error) {
+		callCount++
+		return "", gitlab.TransientError(errors.New("timeout"))
+	})
+
+	if err == nil {
+		t.Error("expected error, got nil")
+	}
+	if callCount != 3 {
+		t.Errorf("expected 3 calls, got %d", callCount)
+	}
+	if !errors.Is(err, gitlab.ErrTransient) {
+		t.Errorf("expected error to wrap ErrTransient, got %v", err)
+	}
+}
+
+func TestWithRetry_ExponentialBackoff(t *testing.T) {
+	callCount := 0
+	var callTimes []time.Time
+	config := RetryConfig{
+		MaxAttempts:  4,
+		InitialDelay: 50 * time.Millisecond,
+		MaxDelay:     150 * time.Millisecond,
+	}
+
+	start := time.Now()
+	_, _ = WithRetry(context.Background(), config, func() (string, error) {
+		callTimes = append(callTimes, time.Now())
+		callCount++
+		return "", gitlab.TransientError(errors.New("timeout"))
+	})
+
+	if callCount != 4 {
+		t.Errorf("expected 4 calls, got %d", callCount)
+	}
+
+	// Verify exponential backoff timing
+	// Expected delays: 50ms, 100ms, 150ms (capped)
+	// Total minimum time: 50 + 100 + 150 = 300ms
+	elapsed := time.Since(start)
+	minExpected := 250 * time.Millisecond // Allow some tolerance
+	if elapsed < minExpected {
+		t.Errorf("expected at least %v elapsed, got %v", minExpected, elapsed)
+	}
+
+	// Verify delay between calls increases (with tolerance for timing)
+	if len(callTimes) >= 3 {
+		delay1 := callTimes[1].Sub(callTimes[0])
+		delay2 := callTimes[2].Sub(callTimes[1])
+		// Second delay should be roughly double the first (with tolerance)
+		if delay2 < delay1 {
+			t.Errorf("expected exponential backoff: delay2 (%v) should be >= delay1 (%v)", delay2, delay1)
+		}
+	}
+}
+
+func TestWithRetry_ContextCancellation(t *testing.T) {
+	callCount := 0
+	config := RetryConfig{
+		MaxAttempts:  5,
+		InitialDelay: 100 * time.Millisecond,
+		MaxDelay:     1 * time.Second,
+	}
+
+	ctx, cancel := context.WithCancel(context.Background())
+
+	go func() {
+		time.Sleep(50 * time.Millisecond)
+		cancel()
+	}()
+
+	_, err := WithRetry(ctx, config, func() (string, error) {
+		callCount++
+		return "", gitlab.TransientError(errors.New("timeout"))
+	})
+
+	if !errors.Is(err, context.Canceled) {
+		t.Errorf("expected context.Canceled error, got %v", err)
+	}
+	// Should have been cancelled before all retries completed
+	if callCount >= 5 {
+		t.Errorf("expected fewer than 5 calls due to cancellation, got %d", callCount)
+	}
+}
+
+func TestWithRetryNoResult(t *testing.T) {
+	callCount := 0
+	config := RetryConfig{
+		MaxAttempts:  3,
+		InitialDelay: 10 * time.Millisecond,
+		MaxDelay:     100 * time.Millisecond,
+	}
+
+	err := WithRetryNoResult(context.Background(), config, func() error {
+		callCount++
+		if callCount < 2 {
+			return gitlab.TransientError(errors.New("timeout"))
+		}
+		return nil
+	})
+
+	if err != nil {
+		t.Errorf("expected no error, got %v", err)
+	}
+	if callCount != 2 {
+		t.Errorf("expected 2 calls, got %d", callCount)
+	}
+}