From 40b83bf94c5b89d3c5c8ffb6f0a7ec96afccc293 Mon Sep 17 00:00:00 2001 From: Mikita Hradovich Date: Wed, 13 May 2026 14:30:25 +0200 Subject: [PATCH] fix: extend NodeDiagnostics with node state, distance, datacenter and pool size (DRIVER-540) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add four additional fields to DriverTimeoutException.NodeDiagnostics captured at timeout time: - nodeState: UP/DOWN/FORCED_DOWN/UNKNOWN — immediately explains timeouts to downed nodes - nodeDistance: LOCAL/REMOTE/IGNORED — contextualizes latency expectations - datacenter: node DC — helps diagnose cross-DC routing issues - poolSize: active connection count — reveals degraded pools (fewer connections than expected) All four are available from Node and ChannelPool already in scope at each buildNodeDiagnostics() call site. No new infrastructure required. Updated toString() example: /10.0.0.1:9042 [state: UP, distance: LOCAL, dc: dc1, channel in-flight: 5, pool size: 3, pool in-flight: 12, pool available ids: 988, pool orphaned ids: 2] --- .../ContinuousRequestHandlerBase.java | 8 ++ .../core/graph/GraphRequestHandler.java | 4 + .../api/core/DriverTimeoutException.java | 132 ++++++++++++++++-- .../internal/core/cql/CqlPrepareHandler.java | 4 + .../internal/core/cql/CqlRequestHandler.java | 4 + 5 files changed, 143 insertions(+), 9 deletions(-) diff --git a/core/src/main/java/com/datastax/dse/driver/internal/core/cql/continuous/ContinuousRequestHandlerBase.java b/core/src/main/java/com/datastax/dse/driver/internal/core/cql/continuous/ContinuousRequestHandlerBase.java index 8d10172c27b..0107ebd1176 100644 --- a/core/src/main/java/com/datastax/dse/driver/internal/core/cql/continuous/ContinuousRequestHandlerBase.java +++ b/core/src/main/java/com/datastax/dse/driver/internal/core/cql/continuous/ContinuousRequestHandlerBase.java @@ -414,7 +414,11 @@ private NodeDiagnostics buildNodeDiagnostics() { ChannelPool pool = session.getPools().get(cb.node); return NodeDiagnostics.of( cb.node.getEndPoint(), + cb.node.getState(), + cb.node.getDistance(), + cb.node.getDatacenter(), channelInFlight, + pool != null ? pool.size() : UNAVAILABLE, pool != null ? pool.getInFlight() : UNAVAILABLE, pool != null ? pool.getAvailableIds() : UNAVAILABLE, pool != null ? pool.getOrphanedIds() : UNAVAILABLE); @@ -748,7 +752,11 @@ private void onPageTimeout(int expectedPage) { "Timed out waiting for page " + expectedPage, NodeDiagnostics.of( node.getEndPoint(), + node.getState(), + node.getDistance(), + node.getDatacenter(), channelInFlight, + pool != null ? pool.size() : UNAVAILABLE, pool != null ? pool.getInFlight() : UNAVAILABLE, pool != null ? pool.getAvailableIds() : UNAVAILABLE, pool != null ? pool.getOrphanedIds() : UNAVAILABLE)), diff --git a/core/src/main/java/com/datastax/dse/driver/internal/core/graph/GraphRequestHandler.java b/core/src/main/java/com/datastax/dse/driver/internal/core/graph/GraphRequestHandler.java index 1e7dcd0f0f9..015467a66fa 100644 --- a/core/src/main/java/com/datastax/dse/driver/internal/core/graph/GraphRequestHandler.java +++ b/core/src/main/java/com/datastax/dse/driver/internal/core/graph/GraphRequestHandler.java @@ -248,7 +248,11 @@ private NodeDiagnostics buildNodeDiagnostics() { ChannelPool pool = session.getPools().get(cb.node); return NodeDiagnostics.of( cb.node.getEndPoint(), + cb.node.getState(), + cb.node.getDistance(), + cb.node.getDatacenter(), channelInFlight, + pool != null ? pool.size() : UNAVAILABLE, pool != null ? pool.getInFlight() : UNAVAILABLE, pool != null ? pool.getAvailableIds() : UNAVAILABLE, pool != null ? pool.getOrphanedIds() : UNAVAILABLE); diff --git a/core/src/main/java/com/datastax/oss/driver/api/core/DriverTimeoutException.java b/core/src/main/java/com/datastax/oss/driver/api/core/DriverTimeoutException.java index 81bf6c7e2f5..b98976ad0b5 100644 --- a/core/src/main/java/com/datastax/oss/driver/api/core/DriverTimeoutException.java +++ b/core/src/main/java/com/datastax/oss/driver/api/core/DriverTimeoutException.java @@ -18,7 +18,9 @@ package com.datastax.oss.driver.api.core; import com.datastax.oss.driver.api.core.cql.ExecutionInfo; +import com.datastax.oss.driver.api.core.loadbalancing.NodeDistance; import com.datastax.oss.driver.api.core.metadata.EndPoint; +import com.datastax.oss.driver.api.core.metadata.NodeState; import edu.umd.cs.findbugs.annotations.NonNull; import edu.umd.cs.findbugs.annotations.Nullable; @@ -43,8 +45,14 @@ public class DriverTimeoutException extends DriverException { *

Fields: * *

*/ public static final class NodeDiagnostics { @NonNull private final EndPoint endPoint; + @Nullable private final NodeState nodeState; + @Nullable private final NodeDistance nodeDistance; + @Nullable private final String datacenter; private final int channelInFlight; + private final int poolSize; private final int poolInFlight; private final int poolAvailableIds; private final int poolOrphanedIds; @@ -77,19 +91,31 @@ public static final class NodeDiagnostics { * Creates a full diagnostic snapshot (pool was available at timeout time). * * @param endPoint the endpoint of the node. + * @param nodeState the state of the node at timeout time. + * @param nodeDistance the distance assigned to the node by the load-balancing policy. + * @param datacenter the datacenter the node belongs to. * @param channelInFlight in-flight count on the specific channel. + * @param poolSize number of active connections in the pool. * @param poolInFlight total in-flight across the pool for this host. * @param poolAvailableIds remaining stream IDs available in the pool. * @param poolOrphanedIds orphaned stream IDs in the pool. */ public NodeDiagnostics( @NonNull EndPoint endPoint, + @Nullable NodeState nodeState, + @Nullable NodeDistance nodeDistance, + @Nullable String datacenter, int channelInFlight, + int poolSize, int poolInFlight, int poolAvailableIds, int poolOrphanedIds) { this.endPoint = endPoint; + this.nodeState = nodeState; + this.nodeDistance = nodeDistance; + this.datacenter = datacenter; this.channelInFlight = channelInFlight; + this.poolSize = poolSize; this.poolInFlight = poolInFlight; this.poolAvailableIds = poolAvailableIds; this.poolOrphanedIds = poolOrphanedIds; @@ -97,14 +123,32 @@ public NodeDiagnostics( /** * Creates a partial diagnostic snapshot for when the pool was unavailable at timeout time. The - * pool-related fields ({@link #getPoolInFlight()}, {@link #getPoolAvailableIds()}, {@link - * #getPoolOrphanedIds()}) will be {@link DriverTimeoutException#UNAVAILABLE}. + * pool-related fields ({@link #getPoolSize()}, {@link #getPoolInFlight()}, {@link + * #getPoolAvailableIds()}, {@link #getPoolOrphanedIds()}) will be {@link + * DriverTimeoutException#UNAVAILABLE}. * * @param endPoint the endpoint of the node. + * @param nodeState the state of the node at timeout time. + * @param nodeDistance the distance assigned to the node by the load-balancing policy. + * @param datacenter the datacenter the node belongs to. * @param channelInFlight in-flight count on the specific channel. */ - public NodeDiagnostics(@NonNull EndPoint endPoint, int channelInFlight) { - this(endPoint, channelInFlight, UNAVAILABLE, UNAVAILABLE, UNAVAILABLE); + public NodeDiagnostics( + @NonNull EndPoint endPoint, + @Nullable NodeState nodeState, + @Nullable NodeDistance nodeDistance, + @Nullable String datacenter, + int channelInFlight) { + this( + endPoint, + nodeState, + nodeDistance, + datacenter, + channelInFlight, + UNAVAILABLE, + UNAVAILABLE, + UNAVAILABLE, + UNAVAILABLE); } /** @@ -113,7 +157,12 @@ public NodeDiagnostics(@NonNull EndPoint endPoint, int channelInFlight) { * timeout time. * * @param endPoint the endpoint of the node. + * @param nodeState the state of the node at timeout time. + * @param nodeDistance the distance assigned to the node by the load-balancing policy. + * @param datacenter the datacenter the node belongs to. * @param channelInFlight in-flight count on the specific channel. + * @param poolSize number of active connections in the pool, or {@link + * DriverTimeoutException#UNAVAILABLE}. * @param poolInFlight total in-flight across the pool, or {@link * DriverTimeoutException#UNAVAILABLE}. * @param poolAvailableIds remaining stream IDs in the pool, or {@link @@ -124,12 +173,24 @@ public NodeDiagnostics(@NonNull EndPoint endPoint, int channelInFlight) { @NonNull public static NodeDiagnostics of( @NonNull EndPoint endPoint, + @Nullable NodeState nodeState, + @Nullable NodeDistance nodeDistance, + @Nullable String datacenter, int channelInFlight, + int poolSize, int poolInFlight, int poolAvailableIds, int poolOrphanedIds) { return new NodeDiagnostics( - endPoint, channelInFlight, poolInFlight, poolAvailableIds, poolOrphanedIds); + endPoint, + nodeState, + nodeDistance, + datacenter, + channelInFlight, + poolSize, + poolInFlight, + poolAvailableIds, + poolOrphanedIds); } /** Returns the endpoint of the node that had in-flight requests at timeout time. */ @@ -138,6 +199,30 @@ public EndPoint getEndPoint() { return endPoint; } + /** + * Returns the state of the node at timeout time (e.g. UP, DOWN, FORCED_DOWN), or {@code null} + * if not available. + */ + @Nullable + public NodeState getNodeState() { + return nodeState; + } + + /** + * Returns the distance assigned to this node by the load-balancing policy at timeout time (e.g. + * LOCAL, REMOTE, IGNORED), or {@code null} if not available. + */ + @Nullable + public NodeDistance getNodeDistance() { + return nodeDistance; + } + + /** Returns the datacenter this node belongs to, or {@code null} if not available. */ + @Nullable + public String getDatacenter() { + return datacenter; + } + /** * Returns the number of in-flight requests on the specific connection at timeout time, or * {@link DriverTimeoutException#UNAVAILABLE} if not available. @@ -146,6 +231,14 @@ public int getChannelInFlight() { return channelInFlight; } + /** + * Returns the number of active connections in the pool at timeout time, or {@link + * DriverTimeoutException#UNAVAILABLE} if the pool was no longer available. + */ + public int getPoolSize() { + return poolSize; + } + /** * Returns the total number of in-flight requests across all connections to this host at timeout * time, or {@link DriverTimeoutException#UNAVAILABLE} if the pool was no longer available. @@ -174,12 +267,33 @@ public int getPoolOrphanedIds() { @Override public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(endPoint); + sb.append(" ["); + if (nodeState != null) { + sb.append("state: ").append(nodeState).append(", "); + } + if (nodeDistance != null) { + sb.append("distance: ").append(nodeDistance).append(", "); + } + if (datacenter != null) { + sb.append("dc: ").append(datacenter).append(", "); + } + sb.append("channel in-flight: ").append(channelInFlight).append(", "); if (poolInFlight == UNAVAILABLE) { - return String.format("%s [channel in-flight: %d, pool: n/a]", endPoint, channelInFlight); + sb.append("pool: n/a"); + } else { + sb.append("pool size: ") + .append(poolSize) + .append(", pool in-flight: ") + .append(poolInFlight) + .append(", pool available ids: ") + .append(poolAvailableIds) + .append(", pool orphaned ids: ") + .append(poolOrphanedIds); } - return String.format( - "%s [channel in-flight: %d, pool in-flight: %d, pool available ids: %d, pool orphaned ids: %d]", - endPoint, channelInFlight, poolInFlight, poolAvailableIds, poolOrphanedIds); + sb.append("]"); + return sb.toString(); } } diff --git a/core/src/main/java/com/datastax/oss/driver/internal/core/cql/CqlPrepareHandler.java b/core/src/main/java/com/datastax/oss/driver/internal/core/cql/CqlPrepareHandler.java index 4c99726a8b4..69e98ca5197 100644 --- a/core/src/main/java/com/datastax/oss/driver/internal/core/cql/CqlPrepareHandler.java +++ b/core/src/main/java/com/datastax/oss/driver/internal/core/cql/CqlPrepareHandler.java @@ -195,7 +195,11 @@ private NodeDiagnostics buildNodeDiagnostics() { ChannelPool pool = session.getPools().get(cb.node); return NodeDiagnostics.of( cb.node.getEndPoint(), + cb.node.getState(), + cb.node.getDistance(), + cb.node.getDatacenter(), channelInFlight, + pool != null ? pool.size() : UNAVAILABLE, pool != null ? pool.getInFlight() : UNAVAILABLE, pool != null ? pool.getAvailableIds() : UNAVAILABLE, pool != null ? pool.getOrphanedIds() : UNAVAILABLE); diff --git a/core/src/main/java/com/datastax/oss/driver/internal/core/cql/CqlRequestHandler.java b/core/src/main/java/com/datastax/oss/driver/internal/core/cql/CqlRequestHandler.java index e3391161c59..25f27e5a39d 100644 --- a/core/src/main/java/com/datastax/oss/driver/internal/core/cql/CqlRequestHandler.java +++ b/core/src/main/java/com/datastax/oss/driver/internal/core/cql/CqlRequestHandler.java @@ -263,7 +263,11 @@ private NodeDiagnostics buildNodeDiagnostics() { ChannelPool pool = session.getPools().get(cb.node); return NodeDiagnostics.of( cb.node.getEndPoint(), + cb.node.getState(), + cb.node.getDistance(), + cb.node.getDatacenter(), channelInFlight, + pool != null ? pool.size() : UNAVAILABLE, pool != null ? pool.getInFlight() : UNAVAILABLE, pool != null ? pool.getAvailableIds() : UNAVAILABLE, pool != null ? pool.getOrphanedIds() : UNAVAILABLE);