Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions packages/gateway-v2/connection.go
Original file line number Diff line number Diff line change
Expand Up @@ -273,3 +273,9 @@ func handlePing(ctx context.Context, conn *tls.Conn, reader *bufio.Reader) error
conn.Write([]byte("PONG\n"))
return nil
}

func handleHealth(ctx context.Context, conn *tls.Conn, reader *bufio.Reader, heartbeatTTL int) error {
response := fmt.Sprintf(`{"status":"ok","heartbeatTTL":%d}`, heartbeatTTL)
conn.Write([]byte(response + "\n"))
return nil
}
21 changes: 18 additions & 3 deletions packages/gateway-v2/gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
ForwardModePAMCancellation ForwardMode = "PAM_CANCELLATION"
ForwardModePAMCapabilities ForwardMode = "PAM_CAPABILITIES"
ForwardModePing ForwardMode = "PING"
ForwardModeHealth ForwardMode = "HEALTH"
)

type ActorType string
Expand All @@ -47,6 +48,8 @@
ActorTypeUser ActorType = "user"
)

const heartbeatInterval = 3 * time.Minute

const GATEWAY_ROUTING_INFO_OID = "1.3.6.1.4.1.12345.100.1"
const GATEWAY_ACTOR_OID = "1.3.6.1.4.1.12345.100.2"
const PAM_INFO_OID = "1.3.6.1.4.1.12345.100.3"
Expand Down Expand Up @@ -362,14 +365,14 @@
return
}
}
}
}()

// Phase 2: Regular heartbeat every 30 minutes
regularTicker := time.NewTicker(30 * time.Minute)
// Phase 2: Regular heartbeat
regularTicker := time.NewTicker(heartbeatInterval)
defer regularTicker.Stop()

for {

Check failure on line 375 in packages/gateway-v2/gateway.go

View check run for this annotation

Claude / Claude Code Review

Phase 2 heartbeat has no retry, single transient failure exceeds 5min dead-detection window

Phase 2 of `registerHeartBeat` (gateway.go:375-382) calls `sendHeartbeat()` on the regular ticker but ignores the returned error and never retries — with the new `heartbeatInterval = 3 * time.Minute` and the PR's stated ~5 min backend dead-detection window, a single transient heartbeat failure produces a ~6 minute gap (heartbeat at t=0 succeeds → t=3m fails → next attempt t=6m), which exceeds the 5 min TTL and causes the backend to mark a healthy gateway dead and pull it from the HA pool. This i
Comment thread
saifsmailbox98 marked this conversation as resolved.
select {
case <-ctx.Done():
return
Expand Down Expand Up @@ -669,7 +672,7 @@
ClientCAs: clientCAPool,
ClientAuth: tls.RequireAndVerifyClientCert,
MinVersion: tls.VersionTLS12,
NextProtos: []string{"infisical-http-proxy", "infisical-tcp-proxy", "infisical-ping", "infisical-pam-proxy", "infisical-pam-session-cancellation", "infisical-pam-capabilities"},
NextProtos: []string{"infisical-http-proxy", "infisical-tcp-proxy", "infisical-health", "infisical-ping", "infisical-pam-proxy", "infisical-pam-session-cancellation", "infisical-pam-capabilities"},
}

return nil
Expand Down Expand Up @@ -873,6 +876,14 @@
log.Info().Msg("Ping handler completed")
}
return
} else if forwardConfig.Mode == ForwardModeHealth {
log.Info().Msg("Starting health handler")
if err := handleHealth(g.ctx, tlsConn, reader, int(heartbeatInterval.Seconds())); err != nil {
log.Error().Err(err).Msg("Health handler ended with error")
} else {
log.Info().Msg("Health handler completed")
}
return
}
}

Expand Down Expand Up @@ -919,6 +930,10 @@
config.Mode = ForwardModePing
return config, nil

case "infisical-health":
config.Mode = ForwardModeHealth
return config, nil

default:
return nil, fmt.Errorf("unsupported ALPN protocol: %s", negotiatedProtocol)
}
Expand Down
Loading