Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 9 additions & 10 deletions src/inference_endpoint/endpoint_client/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,13 @@ class _SocketConfig:
# Connection keepalive-probe settings for long-lived connections
# client kernel sends probe, server's kernel ACKs - no application overhead
#
# TODO(vir): verify impact on failure-detection, we want to fail fast
# detection time: KEEPIDLE + (KEEPCNT × KEEPINTVL) = 1 + 5×1 = 6s
SO_KEEPALIVE: int = 1 # Enable keepalive at socket level
TCP_KEEPIDLE: int = 1 # Probe after 1s idle
TCP_KEEPCNT: int = 5 # 5 failed probes = dead
TCP_KEEPINTVL: int = 1 # 1s between probes
# NOTE(vir):
# we hit lots of connection timed out errors in offline and high-concurrency modes,
# disabling since we handle dead-connections in http.py connection_lost/eof_received
Comment on lines +55 to +57
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Disabling TCP keepalive can lead to "zombie" connections in the pool if a connection is silently dropped by a network middlebox (e.g., a firewall or NAT gateway). While connection_lost and eof_received handle active closures (FIN/RST), they cannot detect silent drops. Since the protocol's read_headers and read_body methods lack timeouts, a request using such a connection could hang indefinitely. It is recommended to ensure an application-level timeout is implemented elsewhere or to consider a non-zero TCP_USER_TIMEOUT (on Linux) as a safeguard.

SO_KEEPALIVE: int = 0 # Disabled
TCP_KEEPIDLE: int = 1 # Probe after 1s idle (disabled)
TCP_KEEPCNT: int = 5 # 5 failed probes = dead (disabled)
TCP_KEEPINTVL: int = 1 # 1s between probes (disabled)
Comment on lines +55 to +61
Comment on lines +59 to +61

# Socket buffer sizing: sliding windows, not full-message buffers.
# The event loop reads eagerly so the buffer only holds data between
Expand All @@ -79,11 +80,9 @@ def apply(cls, sock: socket.socket) -> None:
# Low-latency optimizations for streaming
sock.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, cls.TCP_NODELAY)

# Connection keepalive settings for long-lived SSE connections
# Connection keepalive (disabled by default, tune via SO_KEEPALIVE)
sock.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, cls.SO_KEEPALIVE)

# Fine-tune keepalive timing
if hasattr(socket, "TCP_KEEPIDLE"):
if cls.SO_KEEPALIVE and hasattr(socket, "TCP_KEEPIDLE"):
sock.setsockopt(socket.SOL_TCP, socket.TCP_KEEPIDLE, cls.TCP_KEEPIDLE)
sock.setsockopt(socket.SOL_TCP, socket.TCP_KEEPINTVL, cls.TCP_KEEPINTVL)
sock.setsockopt(socket.SOL_TCP, socket.TCP_KEEPCNT, cls.TCP_KEEPCNT)
Expand Down
Loading