From 305debe8a44f89fc581efd8a10d9c2b814e12bba Mon Sep 17 00:00:00 2001 From: Hazel Date: Tue, 23 Dec 2025 15:25:51 -0800 Subject: [PATCH 01/35] test --- .../v1/SessionEvictionIntegrationTest.java | 77 +++++++++++++++++++ .../testresource/StargateTestResource.java | 10 ++- 2 files changed, 84 insertions(+), 3 deletions(-) create mode 100644 src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java new file mode 100644 index 0000000000..c7e3ac9d9e --- /dev/null +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java @@ -0,0 +1,77 @@ +package io.stargate.sgv2.jsonapi.api.v1; + +import static io.stargate.sgv2.jsonapi.api.v1.ResponseAssertions.*; + +import io.quarkus.test.common.QuarkusTestResource; +import io.quarkus.test.junit.QuarkusIntegrationTest; +import io.stargate.sgv2.jsonapi.testresource.DseTestResource; +import io.stargate.sgv2.jsonapi.testresource.StargateTestResource; +import org.junit.jupiter.api.Test; +import org.testcontainers.containers.GenericContainer; + +@QuarkusIntegrationTest +@QuarkusTestResource(DseTestResource.class) +public class SessionEvictionIntegrationTest extends AbstractCollectionIntegrationTestBase { + + @Test + public void testSessionEvictionOnAllNodesFailed() throws InterruptedException { + + insertDoc( + """ + { + "insertOne": { + "document": { + "name": "before_crash" + } + } + } + """); + + GenericContainer dbContainer = StargateTestResource.getCassandraContainer(); + if (dbContainer == null) { + throw new RuntimeException("Cannot find Cassandra container."); + } + + System.out.println("Stopping Database Container to simulate failure..."); + dbContainer.stop(); + + // Should get AllNodeFailedException + givenHeadersPostJsonThen( + """ + { + "insertOne": { + "document": { + "name": "after_crash" + } + } + } + """) + .statusCode(500) + .body("$", responseIsError()); + + // restart container + System.out.println("Starting Database Container to simulate recovery..."); + dbContainer.start(); + + // wait + Thread.sleep(20000); + + // restore + createKeyspace(); + createDefaultCollection(); + + // verify + insertDoc( + """ + { + "insertOne": { + "document": { + "name": "after_crash" + } + } + } + """); + + System.out.println("Test Passed: Session recovered after DB restart."); + } +} diff --git a/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java b/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java index 60627c6097..65046615ee 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java @@ -20,6 +20,9 @@ public abstract class StargateTestResource private static final Logger LOG = LoggerFactory.getLogger(StargateTestResource.class); + /** The backend database container (Cassandra, DSE, or HCD). */ + private static GenericContainer cassandraContainer; + /** * Network ID injected by Quarkus when running tests inside a container (e.g., CI/CD). Used to * connect the database container to the same network as the test runner. @@ -32,9 +35,6 @@ public abstract class StargateTestResource */ private Network network; - /** The backend database container (Cassandra, DSE, or HCD). */ - private GenericContainer cassandraContainer; - /** * Called by Quarkus to inject the DevServicesContext, allowing us to detect if we are running * inside a container network. @@ -110,6 +110,10 @@ public static String getPersistenceModule() { "testing.containers.cluster-persistence", "persistence-cassandra-4.0"); } + public static GenericContainer getCassandraContainer() { + return cassandraContainer; + } + public static boolean isDse() { String dse = System.getProperty("testing.containers.cluster-dse", null); return "true".equals(dse); From 8f4a7c472f702c4743f3695cd2e2b3d28f01b360 Mon Sep 17 00:00:00 2001 From: Hazel Date: Mon, 5 Jan 2026 16:47:45 -0800 Subject: [PATCH 02/35] test --- .../v1/SessionEvictionIntegrationTest.java | 34 +++++++++++-------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java index c7e3ac9d9e..7c9eaca68b 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java @@ -1,7 +1,10 @@ package io.stargate.sgv2.jsonapi.api.v1; import static io.stargate.sgv2.jsonapi.api.v1.ResponseAssertions.*; +import static org.hamcrest.Matchers.containsString; +import com.github.dockerjava.api.DockerClient; +import io.quarkus.logging.Log; import io.quarkus.test.common.QuarkusTestResource; import io.quarkus.test.junit.QuarkusIntegrationTest; import io.stargate.sgv2.jsonapi.testresource.DseTestResource; @@ -14,7 +17,7 @@ public class SessionEvictionIntegrationTest extends AbstractCollectionIntegrationTestBase { @Test - public void testSessionEvictionOnAllNodesFailed() throws InterruptedException { + public void testSessionEvictionOnAllNodesFailed() throws Exception { insertDoc( """ @@ -28,12 +31,10 @@ public void testSessionEvictionOnAllNodesFailed() throws InterruptedException { """); GenericContainer dbContainer = StargateTestResource.getCassandraContainer(); - if (dbContainer == null) { - throw new RuntimeException("Cannot find Cassandra container."); - } - - System.out.println("Stopping Database Container to simulate failure..."); - dbContainer.stop(); + DockerClient dockerClient = dbContainer.getDockerClient(); + String containerId = dbContainer.getContainerId(); + Log.info("Pausing Database Container to simulate failure (Freeze)..."); + dockerClient.pauseContainerCmd(containerId).exec(); // Should get AllNodeFailedException givenHeadersPostJsonThen( @@ -47,18 +48,21 @@ public void testSessionEvictionOnAllNodesFailed() throws InterruptedException { } """) .statusCode(500) - .body("$", responseIsError()); + .body("$", responseIsErrorWithStatus()) + .body("errors[0].message", containsString("No node was available")); // restart container - System.out.println("Starting Database Container to simulate recovery..."); - dbContainer.start(); + Log.info("Unpausing Database Container to simulate recovery..."); + dockerClient.unpauseContainerCmd(containerId).exec(); // wait - Thread.sleep(20000); + Log.info("start to sleep"); + Thread.sleep(30000); + Log.info("end sleep"); // restore - createKeyspace(); - createDefaultCollection(); + // createKeyspace(); + // createDefaultCollection(); // verify insertDoc( @@ -66,12 +70,12 @@ public void testSessionEvictionOnAllNodesFailed() throws InterruptedException { { "insertOne": { "document": { - "name": "after_crash" + "name": "after_recovery" } } } """); - System.out.println("Test Passed: Session recovered after DB restart."); + Log.info("Test Passed: Session recovered after DB restart."); } } From 764966138e08124d560bdcadfc7c1a3d158aaae2 Mon Sep 17 00:00:00 2001 From: Hazel Date: Tue, 6 Jan 2026 12:05:49 -0800 Subject: [PATCH 03/35] test - isolate the test --- .../v1/SessionEvictionIntegrationTest.java | 149 ++++++++++++++---- .../testresource/IsolatedDseTestResource.java | 26 +++ .../testresource/StargateTestResource.java | 8 +- 3 files changed, 151 insertions(+), 32 deletions(-) create mode 100644 src/test/java/io/stargate/sgv2/jsonapi/testresource/IsolatedDseTestResource.java diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java index 7c9eaca68b..c701a6ffd0 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java @@ -1,24 +1,24 @@ package io.stargate.sgv2.jsonapi.api.v1; import static io.stargate.sgv2.jsonapi.api.v1.ResponseAssertions.*; -import static org.hamcrest.Matchers.containsString; import com.github.dockerjava.api.DockerClient; import io.quarkus.logging.Log; import io.quarkus.test.common.QuarkusTestResource; import io.quarkus.test.junit.QuarkusIntegrationTest; -import io.stargate.sgv2.jsonapi.testresource.DseTestResource; -import io.stargate.sgv2.jsonapi.testresource.StargateTestResource; +import io.stargate.sgv2.jsonapi.testresource.IsolatedDseTestResource; +import java.io.IOException; import org.junit.jupiter.api.Test; import org.testcontainers.containers.GenericContainer; @QuarkusIntegrationTest -@QuarkusTestResource(DseTestResource.class) +@QuarkusTestResource(IsolatedDseTestResource.class) public class SessionEvictionIntegrationTest extends AbstractCollectionIntegrationTestBase { @Test public void testSessionEvictionOnAllNodesFailed() throws Exception { + // 1. Insert initial data to ensure the database is healthy before the test insertDoc( """ { @@ -30,41 +30,43 @@ public void testSessionEvictionOnAllNodesFailed() throws Exception { } """); - GenericContainer dbContainer = StargateTestResource.getCassandraContainer(); + // 2. Pause/stop the container to simulate DB failure + GenericContainer dbContainer = IsolatedDseTestResource.getIsolatedContainer(); DockerClient dockerClient = dbContainer.getDockerClient(); String containerId = dbContainer.getContainerId(); Log.info("Pausing Database Container to simulate failure (Freeze)..."); dockerClient.pauseContainerCmd(containerId).exec(); - // Should get AllNodeFailedException - givenHeadersPostJsonThen( - """ - { - "insertOne": { - "document": { - "name": "after_crash" - } - } - } - """) - .statusCode(500) - .body("$", responseIsErrorWithStatus()) - .body("errors[0].message", containsString("No node was available")); + try { + // 3. Verify failure: The application should receive a 500 error/AllNodeFailedException + givenHeadersPostJsonThen( + """ + { + "insertOne": { + "document": { + "name": "after_crash" + } + } + } + """) + .statusCode(500) + .body("$", responseIsErrorWithStatus()); + // .body("errors[0].message", containsString("AllNodesFailedException")); + // .body("errors[0].message", containsString("No node was available")); - // restart container - Log.info("Unpausing Database Container to simulate recovery..."); - dockerClient.unpauseContainerCmd(containerId).exec(); + } finally { + // 4. Always unpause the container in finally block to ensure cleanup + Log.info("Unpausing Database Container to simulate recovery..."); + dockerClient.unpauseContainerCmd(containerId).exec(); + } - // wait + // 5. Wait for the database to become responsive again Log.info("start to sleep"); Thread.sleep(30000); Log.info("end sleep"); - // restore - // createKeyspace(); - // createDefaultCollection(); - - // verify + // 6. Verify Session Recovery: The application should have evicted the bad session + // and created a new one automatically. insertDoc( """ { @@ -78,4 +80,95 @@ public void testSessionEvictionOnAllNodesFailed() throws Exception { Log.info("Test Passed: Session recovered after DB restart."); } + + /** Pauses the container using either 'podman' or 'docker' command, depending on availability. */ + private void pauseContainer(String containerId) throws IOException, InterruptedException { + if (isCommandAvailable("podman")) { + runCommand("podman", "pause", containerId); + } else if (isCommandAvailable("docker")) { + runCommand("docker", "pause", containerId); + } else { + throw new RuntimeException("Neither 'podman' nor 'docker' command found to pause container."); + } + } + + /** Unpauses the container using either 'podman' or 'docker' command. */ + private void unpauseContainer(String containerId) throws IOException, InterruptedException { + if (isCommandAvailable("podman")) { + runCommand("podman", "unpause", containerId); + } else if (isCommandAvailable("docker")) { + runCommand("docker", "unpause", containerId); + } else { + // Best effort warning if unpause fails because no command is found + System.err.println("WARNING: Could not unpause container, no container runtime found."); + } + } + + /** Checks if a shell command is available in the current environment. */ + private boolean isCommandAvailable(String command) { + try { + // Checking version is a safe, side-effect-free way to test existence + new ProcessBuilder(command, "--version").start().waitFor(); + return true; + } catch (Exception e) { + return false; + } + } + + /** Executes a shell command and waits for it to finish. */ + private void runCommand(String... command) throws IOException, InterruptedException { + ProcessBuilder pb = new ProcessBuilder(command); + pb.inheritIO(); // Prints stdout/stderr to the console for debugging + Process process = pb.start(); + int exitCode = process.waitFor(); + if (exitCode != 0) { + throw new RuntimeException( + "Command failed with exit code " + exitCode + ": " + String.join(" ", command)); + } + } + + /** Polls the database until it becomes responsive again. */ + private void waitForDbRecovery() { + System.out.println("Waiting for DB to recover..."); + long start = System.currentTimeMillis(); + long timeout = 30000; // 30 seconds timeout + + while (System.currentTimeMillis() - start < timeout) { + try { + // Perform a lightweight check (e.g., an empty find) to see if DB responds + String json = + """ + { + "find": { + "filter": {"name": "check_recovery"} + } + } + """; + + int statusCode = + io.restassured.RestAssured.given() + .port(getTestPort()) + .headers(getHeaders()) + .contentType(io.restassured.http.ContentType.JSON) + .body(json) + .when() + .post(GeneralResource.BASE_PATH + "/" + keyspaceName + "/" + collectionName) + .getStatusCode(); + + // 200 OK means the DB handled the request (even if empty result) + if (statusCode == 200) { + System.out.println("DB recovered!"); + return; + } + } catch (Exception e) { + // Ignore connection errors and continue retrying + } + + try { + Thread.sleep(500); // Poll every 500ms + } catch (InterruptedException ignored) { + } + } + throw new RuntimeException("DB failed to recover within " + timeout + "ms"); + } } diff --git a/src/test/java/io/stargate/sgv2/jsonapi/testresource/IsolatedDseTestResource.java b/src/test/java/io/stargate/sgv2/jsonapi/testresource/IsolatedDseTestResource.java new file mode 100644 index 0000000000..0cc2659e50 --- /dev/null +++ b/src/test/java/io/stargate/sgv2/jsonapi/testresource/IsolatedDseTestResource.java @@ -0,0 +1,26 @@ +package io.stargate.sgv2.jsonapi.testresource; + +import java.util.Map; +import org.testcontainers.containers.GenericContainer; + +public class IsolatedDseTestResource extends DseTestResource { + + private static GenericContainer isolatedContainer; + + @Override + public Map start() { + Map props = super.start(); + isolatedContainer = super.getCassandraContainer(); + return props; + } + + @Override + public void stop() { + super.stop(); + isolatedContainer = null; + } + + public static GenericContainer getIsolatedContainer() { + return isolatedContainer; + } +} diff --git a/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java b/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java index 65046615ee..2e45d98819 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java @@ -20,9 +20,6 @@ public abstract class StargateTestResource private static final Logger LOG = LoggerFactory.getLogger(StargateTestResource.class); - /** The backend database container (Cassandra, DSE, or HCD). */ - private static GenericContainer cassandraContainer; - /** * Network ID injected by Quarkus when running tests inside a container (e.g., CI/CD). Used to * connect the database container to the same network as the test runner. @@ -35,6 +32,9 @@ public abstract class StargateTestResource */ private Network network; + /** The backend database container (Cassandra, DSE, or HCD). */ + private GenericContainer cassandraContainer; + /** * Called by Quarkus to inject the DevServicesContext, allowing us to detect if we are running * inside a container network. @@ -110,7 +110,7 @@ public static String getPersistenceModule() { "testing.containers.cluster-persistence", "persistence-cassandra-4.0"); } - public static GenericContainer getCassandraContainer() { + public GenericContainer getCassandraContainer() { return cassandraContainer; } From f5e4364acc80dee13d54c8c86218066fd5c55ac1 Mon Sep 17 00:00:00 2001 From: Hazel Date: Tue, 6 Jan 2026 12:56:17 -0800 Subject: [PATCH 04/35] test - isolate the integration tests --- .../AbstractKeyspaceIntegrationTestBase.java | 4 +-- .../v1/SessionEvictionIntegrationTest.java | 28 +++++++++++++++++++ .../testresource/IsolatedDseTestResource.java | 5 ++++ .../testresource/StargateTestResource.java | 6 +++- 4 files changed, 40 insertions(+), 3 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/AbstractKeyspaceIntegrationTestBase.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/AbstractKeyspaceIntegrationTestBase.java index 82a05f00bf..e5a9eaf98d 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/AbstractKeyspaceIntegrationTestBase.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/AbstractKeyspaceIntegrationTestBase.java @@ -54,7 +54,7 @@ public abstract class AbstractKeyspaceIntegrationTestBase { /** * Access is protected via {@link #createDriverSession()} method and closed in {@link #cleanUp()}. */ - private CqlSession cqlSession; + protected CqlSession cqlSession; @BeforeAll public static void enableLog() { @@ -350,7 +350,7 @@ protected boolean executeCqlStatement(SimpleStatement... statements) { * Synchronized to avoid creating multiple sessions, performance is not a concern. Session is * closed in {@link #cleanUp()} method. */ - private synchronized CqlSession createDriverSession() { + protected synchronized CqlSession createDriverSession() { if (cqlSession == null) { int port = Integer.getInteger(IntegrationTestUtils.CASSANDRA_CQL_PORT_PROP); String dc; diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java index c701a6ffd0..2e13040e83 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java @@ -2,12 +2,16 @@ import static io.stargate.sgv2.jsonapi.api.v1.ResponseAssertions.*; +import com.datastax.oss.driver.api.core.CqlSession; +import com.datastax.oss.driver.api.core.CqlSessionBuilder; import com.github.dockerjava.api.DockerClient; import io.quarkus.logging.Log; import io.quarkus.test.common.QuarkusTestResource; import io.quarkus.test.junit.QuarkusIntegrationTest; import io.stargate.sgv2.jsonapi.testresource.IsolatedDseTestResource; +import io.stargate.sgv2.jsonapi.testresource.StargateTestResource; import java.io.IOException; +import java.net.InetSocketAddress; import org.junit.jupiter.api.Test; import org.testcontainers.containers.GenericContainer; @@ -15,6 +19,30 @@ @QuarkusTestResource(IsolatedDseTestResource.class) public class SessionEvictionIntegrationTest extends AbstractCollectionIntegrationTestBase { + @Override + protected synchronized CqlSession createDriverSession() { + if (cqlSession == null) { + GenericContainer container = IsolatedDseTestResource.getIsolatedContainer(); + if (container == null) { + throw new IllegalStateException("Isolated container not started!"); + } + int port = container.getMappedPort(9042); + String dc; + if (StargateTestResource.isDse() || StargateTestResource.isHcd()) { + dc = "dc1"; + } else { + dc = "datacenter1"; + } + var builder = + new CqlSessionBuilder() + .withLocalDatacenter(dc) + .addContactPoint(new InetSocketAddress("localhost", port)) + .withAuthCredentials("cassandra", "cassandra"); // default admin password :) + cqlSession = builder.build(); + } + return cqlSession; + } + @Test public void testSessionEvictionOnAllNodesFailed() throws Exception { diff --git a/src/test/java/io/stargate/sgv2/jsonapi/testresource/IsolatedDseTestResource.java b/src/test/java/io/stargate/sgv2/jsonapi/testresource/IsolatedDseTestResource.java index 0cc2659e50..0f07a25e90 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/testresource/IsolatedDseTestResource.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/testresource/IsolatedDseTestResource.java @@ -14,6 +14,11 @@ public Map start() { return props; } + @Override + protected void exposeSystemProperties(Map props) { + // Do not expose system properties to avoid interfering with other tests running in parallel + } + @Override public void stop() { super.stop(); diff --git a/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java b/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java index 2e45d98819..a48af9bafc 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java @@ -83,11 +83,15 @@ public Map start() { propsBuilder.put("stargate.jsonapi.operations.vectorize-enabled", "true"); ImmutableMap props = propsBuilder.build(); - props.forEach(System::setProperty); + exposeSystemProperties(props); LOG.info("Using props map for the integration tests: %s".formatted(props)); return props; } + protected void exposeSystemProperties(Map props) { + props.forEach(System::setProperty); + } + @Override public void stop() { if (null != cassandraContainer && !cassandraContainer.isShouldBeReused()) { From f71659abf40aa7e5ec6f489208fe95481850b3e7 Mon Sep 17 00:00:00 2001 From: Hazel Date: Tue, 6 Jan 2026 17:43:39 -0800 Subject: [PATCH 05/35] test - isolate the integration tests again --- .../sgv2/jsonapi/testresource/DseTestResource.java | 13 +++++++++---- .../testresource/IsolatedDseTestResource.java | 4 +++- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/testresource/DseTestResource.java b/src/test/java/io/stargate/sgv2/jsonapi/testresource/DseTestResource.java index 61e7fb685b..41d2be62e3 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/testresource/DseTestResource.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/testresource/DseTestResource.java @@ -151,12 +151,17 @@ public Map start() { propsBuilder.put( "stargate.jsonapi.embedding.providers.vertexai.models[0].parameters[0].required", "true"); if (this.containerNetworkId.isPresent()) { - String host = System.getProperty("stargate.int-test.cassandra.host"); + String host = + env.getOrDefault( + "stargate.int-test.cassandra.host", + System.getProperty("stargate.int-test.cassandra.host")); propsBuilder.put("stargate.jsonapi.operations.database-config.cassandra-end-points", host); } else { - int port = Integer.getInteger(IntegrationTestUtils.CASSANDRA_CQL_PORT_PROP); - propsBuilder.put( - "stargate.jsonapi.operations.database-config.cassandra-port", String.valueOf(port)); + String port = + env.getOrDefault( + IntegrationTestUtils.CASSANDRA_CQL_PORT_PROP, + System.getProperty(IntegrationTestUtils.CASSANDRA_CQL_PORT_PROP)); + propsBuilder.put("stargate.jsonapi.operations.database-config.cassandra-port", port); } if (isDse() || isHcd()) { propsBuilder.put("stargate.jsonapi.operations.database-config.local-datacenter", "dc1"); diff --git a/src/test/java/io/stargate/sgv2/jsonapi/testresource/IsolatedDseTestResource.java b/src/test/java/io/stargate/sgv2/jsonapi/testresource/IsolatedDseTestResource.java index 0f07a25e90..a21398a4f3 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/testresource/IsolatedDseTestResource.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/testresource/IsolatedDseTestResource.java @@ -11,7 +11,9 @@ public class IsolatedDseTestResource extends DseTestResource { public Map start() { Map props = super.start(); isolatedContainer = super.getCassandraContainer(); - return props; + java.util.HashMap mutableProps = new java.util.HashMap<>(props); + mutableProps.remove("stargate.int-test.cassandra.cql-port"); + return mutableProps; } @Override From ce1cf5e439bf022c66b8dd9e27d4c49968eb0431 Mon Sep 17 00:00:00 2001 From: Hazel Date: Wed, 7 Jan 2026 12:32:30 -0800 Subject: [PATCH 06/35] test - isolate the integration tests again --- .../sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java | 3 ++- .../sgv2/jsonapi/testresource/IsolatedDseTestResource.java | 4 +--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java index 2e13040e83..84edd36563 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java @@ -1,6 +1,7 @@ package io.stargate.sgv2.jsonapi.api.v1; import static io.stargate.sgv2.jsonapi.api.v1.ResponseAssertions.*; +import static org.hamcrest.Matchers.*; import com.datastax.oss.driver.api.core.CqlSession; import com.datastax.oss.driver.api.core.CqlSessionBuilder; @@ -77,7 +78,7 @@ public void testSessionEvictionOnAllNodesFailed() throws Exception { } } """) - .statusCode(500) + .statusCode(anyOf(is(500), is(504))) .body("$", responseIsErrorWithStatus()); // .body("errors[0].message", containsString("AllNodesFailedException")); // .body("errors[0].message", containsString("No node was available")); diff --git a/src/test/java/io/stargate/sgv2/jsonapi/testresource/IsolatedDseTestResource.java b/src/test/java/io/stargate/sgv2/jsonapi/testresource/IsolatedDseTestResource.java index a21398a4f3..0f07a25e90 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/testresource/IsolatedDseTestResource.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/testresource/IsolatedDseTestResource.java @@ -11,9 +11,7 @@ public class IsolatedDseTestResource extends DseTestResource { public Map start() { Map props = super.start(); isolatedContainer = super.getCassandraContainer(); - java.util.HashMap mutableProps = new java.util.HashMap<>(props); - mutableProps.remove("stargate.int-test.cassandra.cql-port"); - return mutableProps; + return props; } @Override From c394e5654fd425ddc1ef1b733214db81b2970966 Mon Sep 17 00:00:00 2001 From: Hazel Date: Wed, 7 Jan 2026 14:22:44 -0800 Subject: [PATCH 07/35] test - isolate the integration tests again --- .../v1/SessionEvictionIntegrationTest.java | 38 +++++++++++++++++-- .../testresource/IsolatedDseTestResource.java | 31 --------------- 2 files changed, 34 insertions(+), 35 deletions(-) delete mode 100644 src/test/java/io/stargate/sgv2/jsonapi/testresource/IsolatedDseTestResource.java diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java index 84edd36563..5c7f78b30b 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java @@ -9,21 +9,51 @@ import io.quarkus.logging.Log; import io.quarkus.test.common.QuarkusTestResource; import io.quarkus.test.junit.QuarkusIntegrationTest; -import io.stargate.sgv2.jsonapi.testresource.IsolatedDseTestResource; +import io.stargate.sgv2.jsonapi.testresource.DseTestResource; import io.stargate.sgv2.jsonapi.testresource.StargateTestResource; import java.io.IOException; import java.net.InetSocketAddress; +import java.util.Map; import org.junit.jupiter.api.Test; import org.testcontainers.containers.GenericContainer; @QuarkusIntegrationTest -@QuarkusTestResource(IsolatedDseTestResource.class) +@QuarkusTestResource( + value = SessionEvictionIntegrationTest.SessionEvictionTestResource.class, + restrictToAnnotatedClass = true) public class SessionEvictionIntegrationTest extends AbstractCollectionIntegrationTestBase { + public static class SessionEvictionTestResource extends DseTestResource { + + private static GenericContainer isolatedContainer; + + @Override + public Map start() { + Map props = super.start(); + isolatedContainer = super.getCassandraContainer(); + return props; + } + + @Override + protected void exposeSystemProperties(Map props) { + // Do not expose system properties to avoid interfering with other tests running in parallel + } + + @Override + public void stop() { + super.stop(); + isolatedContainer = null; + } + + public static GenericContainer getIsolatedContainer() { + return isolatedContainer; + } + } + @Override protected synchronized CqlSession createDriverSession() { if (cqlSession == null) { - GenericContainer container = IsolatedDseTestResource.getIsolatedContainer(); + GenericContainer container = SessionEvictionTestResource.getIsolatedContainer(); if (container == null) { throw new IllegalStateException("Isolated container not started!"); } @@ -60,7 +90,7 @@ public void testSessionEvictionOnAllNodesFailed() throws Exception { """); // 2. Pause/stop the container to simulate DB failure - GenericContainer dbContainer = IsolatedDseTestResource.getIsolatedContainer(); + GenericContainer dbContainer = SessionEvictionTestResource.getIsolatedContainer(); DockerClient dockerClient = dbContainer.getDockerClient(); String containerId = dbContainer.getContainerId(); Log.info("Pausing Database Container to simulate failure (Freeze)..."); diff --git a/src/test/java/io/stargate/sgv2/jsonapi/testresource/IsolatedDseTestResource.java b/src/test/java/io/stargate/sgv2/jsonapi/testresource/IsolatedDseTestResource.java deleted file mode 100644 index 0f07a25e90..0000000000 --- a/src/test/java/io/stargate/sgv2/jsonapi/testresource/IsolatedDseTestResource.java +++ /dev/null @@ -1,31 +0,0 @@ -package io.stargate.sgv2.jsonapi.testresource; - -import java.util.Map; -import org.testcontainers.containers.GenericContainer; - -public class IsolatedDseTestResource extends DseTestResource { - - private static GenericContainer isolatedContainer; - - @Override - public Map start() { - Map props = super.start(); - isolatedContainer = super.getCassandraContainer(); - return props; - } - - @Override - protected void exposeSystemProperties(Map props) { - // Do not expose system properties to avoid interfering with other tests running in parallel - } - - @Override - public void stop() { - super.stop(); - isolatedContainer = null; - } - - public static GenericContainer getIsolatedContainer() { - return isolatedContainer; - } -} From 45e6a9d43012582bd0754da9d34753ee05f006c2 Mon Sep 17 00:00:00 2001 From: Hazel Date: Wed, 7 Jan 2026 17:28:07 -0800 Subject: [PATCH 08/35] add java docs and make codes clear --- .../v1/SessionEvictionIntegrationTest.java | 54 ++++++++++++++----- .../jsonapi/testresource/DseTestResource.java | 5 ++ .../testresource/StargateTestResource.java | 15 +++++- 3 files changed, 60 insertions(+), 14 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java index 5c7f78b30b..8c39f808df 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java @@ -23,37 +23,64 @@ restrictToAnnotatedClass = true) public class SessionEvictionIntegrationTest extends AbstractCollectionIntegrationTestBase { + /** + * A specialized TestResource that spins up a new HCD/DSE container exclusively for this test + * class. + * + *

Unlike the standard {@link DseTestResource} used by other tests, this resource ensures a + * dedicated database instance. This isolation is crucial because this test involves destructive + * operations that would negatively impact other tests sharing a common database. + */ public static class SessionEvictionTestResource extends DseTestResource { - private static GenericContainer isolatedContainer; + /** + * Holds the reference to the container started by this resource. + * + *

This field is {@code static} to act as a bridge between the {@link QuarkusTestResource} + * lifecycle (which manages the resource instance) and the test instance (where we need to + * access the container to perform operations). + */ + private static GenericContainer sessionEvictionCassandraContainer; + /** + * Starts the container and captures the reference. + * + *

We override this method to capture the container instance created by the superclass into + * our static {@link #sessionEvictionCassandraContainer} field, making it accessible to the test + * methods. + */ @Override public Map start() { Map props = super.start(); - isolatedContainer = super.getCassandraContainer(); + sessionEvictionCassandraContainer = super.getCassandraContainer(); return props; } + /** + * Overridden to strictly prevent system property pollution. + * + *

The standard {@link DseTestResource} publishes connection details (like CQL port) to + * global System Properties. Since this test runs in parallel with others, publishing our + * isolated container's details would overwrite the shared container's configuration, causing + * other tests to connect to this container (which we are about to kill), leading to random + * failures in the test suite. + */ @Override protected void exposeSystemProperties(Map props) { - // Do not expose system properties to avoid interfering with other tests running in parallel + // No-op: Do not expose system properties to avoid interfering with other tests running in + // parallel } - @Override - public void stop() { - super.stop(); - isolatedContainer = null; - } - - public static GenericContainer getIsolatedContainer() { - return isolatedContainer; + public static GenericContainer getSessionEvictionCassandraContainer() { + return sessionEvictionCassandraContainer; } } @Override protected synchronized CqlSession createDriverSession() { if (cqlSession == null) { - GenericContainer container = SessionEvictionTestResource.getIsolatedContainer(); + GenericContainer container = + SessionEvictionTestResource.getSessionEvictionCassandraContainer(); if (container == null) { throw new IllegalStateException("Isolated container not started!"); } @@ -90,7 +117,8 @@ public void testSessionEvictionOnAllNodesFailed() throws Exception { """); // 2. Pause/stop the container to simulate DB failure - GenericContainer dbContainer = SessionEvictionTestResource.getIsolatedContainer(); + GenericContainer dbContainer = + SessionEvictionTestResource.getSessionEvictionCassandraContainer(); DockerClient dockerClient = dbContainer.getDockerClient(); String containerId = dbContainer.getContainerId(); Log.info("Pausing Database Container to simulate failure (Freeze)..."); diff --git a/src/test/java/io/stargate/sgv2/jsonapi/testresource/DseTestResource.java b/src/test/java/io/stargate/sgv2/jsonapi/testresource/DseTestResource.java index 41d2be62e3..f234d9445d 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/testresource/DseTestResource.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/testresource/DseTestResource.java @@ -150,6 +150,10 @@ public Map start() { propsBuilder.put("stargate.jsonapi.embedding.providers.vertexai.enabled", "true"); propsBuilder.put( "stargate.jsonapi.embedding.providers.vertexai.models[0].parameters[0].required", "true"); + + // Prefer instance-specific configuration from 'env' to support parallel execution and + // isolation. Fall back to global system properties only if instance-specific values are + // missing. if (this.containerNetworkId.isPresent()) { String host = env.getOrDefault( @@ -163,6 +167,7 @@ public Map start() { System.getProperty(IntegrationTestUtils.CASSANDRA_CQL_PORT_PROP)); propsBuilder.put("stargate.jsonapi.operations.database-config.cassandra-port", port); } + if (isDse() || isHcd()) { propsBuilder.put("stargate.jsonapi.operations.database-config.local-datacenter", "dc1"); } diff --git a/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java b/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java index a48af9bafc..f049ebc149 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java @@ -88,6 +88,12 @@ public Map start() { return props; } + /** + * Exposes the provided configuration properties as system properties. This makes the database + * connection and test-specific settings available to the whole test environment. + * + * @param props A map containing the properties to be set as system properties. + */ protected void exposeSystemProperties(Map props) { props.forEach(System::setProperty); } @@ -114,7 +120,14 @@ public static String getPersistenceModule() { "testing.containers.cluster-persistence", "persistence-cassandra-4.0"); } - public GenericContainer getCassandraContainer() { + /** + * Provides access to the backend database container. This allows subclasses to perform operations + * directly on the container, such as stop/start or pause/unpause it to simulate failure + * scenarios. + * + * @return The generic container instance for the backend database. + */ + protected GenericContainer getCassandraContainer() { return cassandraContainer; } From 6dc24d51e2780d360fd442955cc7b4c615c1b979 Mon Sep 17 00:00:00 2001 From: Hazel Date: Thu, 8 Jan 2026 16:36:29 -0800 Subject: [PATCH 09/35] test - delete maybe unused code --- .../jsonapi/testresource/DseTestResource.java | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/testresource/DseTestResource.java b/src/test/java/io/stargate/sgv2/jsonapi/testresource/DseTestResource.java index f234d9445d..4a77b4fdd2 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/testresource/DseTestResource.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/testresource/DseTestResource.java @@ -1,7 +1,6 @@ package io.stargate.sgv2.jsonapi.testresource; import com.google.common.collect.ImmutableMap; -import io.stargate.sgv2.jsonapi.api.v1.util.IntegrationTestUtils; import java.io.File; import java.io.FileInputStream; import java.io.IOException; @@ -151,23 +150,6 @@ public Map start() { propsBuilder.put( "stargate.jsonapi.embedding.providers.vertexai.models[0].parameters[0].required", "true"); - // Prefer instance-specific configuration from 'env' to support parallel execution and - // isolation. Fall back to global system properties only if instance-specific values are - // missing. - if (this.containerNetworkId.isPresent()) { - String host = - env.getOrDefault( - "stargate.int-test.cassandra.host", - System.getProperty("stargate.int-test.cassandra.host")); - propsBuilder.put("stargate.jsonapi.operations.database-config.cassandra-end-points", host); - } else { - String port = - env.getOrDefault( - IntegrationTestUtils.CASSANDRA_CQL_PORT_PROP, - System.getProperty(IntegrationTestUtils.CASSANDRA_CQL_PORT_PROP)); - propsBuilder.put("stargate.jsonapi.operations.database-config.cassandra-port", port); - } - if (isDse() || isHcd()) { propsBuilder.put("stargate.jsonapi.operations.database-config.local-datacenter", "dc1"); } From ea49c4d2312bec628dba0570dc94cf578fd443da Mon Sep 17 00:00:00 2001 From: Hazel Date: Thu, 8 Jan 2026 16:54:19 -0800 Subject: [PATCH 10/35] rollback previous commit --- .../jsonapi/testresource/DseTestResource.java | 18 ++++++++++++++++++ .../testresource/StargateTestResource.java | 4 +++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/testresource/DseTestResource.java b/src/test/java/io/stargate/sgv2/jsonapi/testresource/DseTestResource.java index 4a77b4fdd2..f234d9445d 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/testresource/DseTestResource.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/testresource/DseTestResource.java @@ -1,6 +1,7 @@ package io.stargate.sgv2.jsonapi.testresource; import com.google.common.collect.ImmutableMap; +import io.stargate.sgv2.jsonapi.api.v1.util.IntegrationTestUtils; import java.io.File; import java.io.FileInputStream; import java.io.IOException; @@ -150,6 +151,23 @@ public Map start() { propsBuilder.put( "stargate.jsonapi.embedding.providers.vertexai.models[0].parameters[0].required", "true"); + // Prefer instance-specific configuration from 'env' to support parallel execution and + // isolation. Fall back to global system properties only if instance-specific values are + // missing. + if (this.containerNetworkId.isPresent()) { + String host = + env.getOrDefault( + "stargate.int-test.cassandra.host", + System.getProperty("stargate.int-test.cassandra.host")); + propsBuilder.put("stargate.jsonapi.operations.database-config.cassandra-end-points", host); + } else { + String port = + env.getOrDefault( + IntegrationTestUtils.CASSANDRA_CQL_PORT_PROP, + System.getProperty(IntegrationTestUtils.CASSANDRA_CQL_PORT_PROP)); + propsBuilder.put("stargate.jsonapi.operations.database-config.cassandra-port", port); + } + if (isDse() || isHcd()) { propsBuilder.put("stargate.jsonapi.operations.database-config.local-datacenter", "dc1"); } diff --git a/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java b/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java index f049ebc149..d758a30eb1 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java @@ -3,6 +3,7 @@ import com.google.common.collect.ImmutableMap; import io.quarkus.test.common.DevServicesContext; import io.quarkus.test.common.QuarkusTestResourceLifecycleManager; +import io.stargate.sgv2.jsonapi.api.v1.util.IntegrationTestUtils; import java.time.Duration; import java.util.Collections; import java.util.Map; @@ -60,7 +61,8 @@ public Map start() { "stargate.int-test.cassandra.host", cassandraContainer.getCurrentContainerInfo().getConfig().getHostName()); propsBuilder.put( - "stargate.int-test.cassandra.cql-port", cassandraContainer.getMappedPort(9042).toString()); + IntegrationTestUtils.CASSANDRA_CQL_PORT_PROP, + cassandraContainer.getMappedPort(9042).toString()); propsBuilder.put("stargate.int-test.cluster.persistence", getPersistenceModule()); // 2. Runtime Limits Config: Override default limits for testing purposes From eee8cf605d897866acddf56369e2b617de278d4c Mon Sep 17 00:00:00 2001 From: Hazel Date: Thu, 8 Jan 2026 17:15:47 -0800 Subject: [PATCH 11/35] some code refactor --- .../AbstractKeyspaceIntegrationTestBase.java | 10 ++++- .../v1/SessionEvictionIntegrationTest.java | 38 +++++++------------ 2 files changed, 22 insertions(+), 26 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/AbstractKeyspaceIntegrationTestBase.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/AbstractKeyspaceIntegrationTestBase.java index e5a9eaf98d..5b8e8485c4 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/AbstractKeyspaceIntegrationTestBase.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/AbstractKeyspaceIntegrationTestBase.java @@ -352,7 +352,7 @@ protected boolean executeCqlStatement(SimpleStatement... statements) { */ protected synchronized CqlSession createDriverSession() { if (cqlSession == null) { - int port = Integer.getInteger(IntegrationTestUtils.CASSANDRA_CQL_PORT_PROP); + int port = getCassandraCqlPort(); String dc; if (StargateTestResource.isDse() || StargateTestResource.isHcd()) { dc = "dc1"; @@ -369,6 +369,14 @@ protected synchronized CqlSession createDriverSession() { return cqlSession; } + /** + * Gets the Cassandra CQL port. Subclasses can override this if the port is not available via + * standard system properties. + */ + protected int getCassandraCqlPort() { + return Integer.getInteger(IntegrationTestUtils.CASSANDRA_CQL_PORT_PROP); + } + /** Helper method for determining if lexical search is available for the database backend */ protected boolean isLexicalAvailableForDB() { return !"true".equals(System.getProperty("testing.db.lexical-disabled")); diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java index 8c39f808df..919add658e 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java @@ -3,16 +3,12 @@ import static io.stargate.sgv2.jsonapi.api.v1.ResponseAssertions.*; import static org.hamcrest.Matchers.*; -import com.datastax.oss.driver.api.core.CqlSession; -import com.datastax.oss.driver.api.core.CqlSessionBuilder; import com.github.dockerjava.api.DockerClient; import io.quarkus.logging.Log; import io.quarkus.test.common.QuarkusTestResource; import io.quarkus.test.junit.QuarkusIntegrationTest; import io.stargate.sgv2.jsonapi.testresource.DseTestResource; -import io.stargate.sgv2.jsonapi.testresource.StargateTestResource; import java.io.IOException; -import java.net.InetSocketAddress; import java.util.Map; import org.junit.jupiter.api.Test; import org.testcontainers.containers.GenericContainer; @@ -76,29 +72,21 @@ public static GenericContainer getSessionEvictionCassandraContainer() { } } + /** + * Overridden to ensure we connect to the isolated container created for this test. + * + *

The base class implementation relies on global system properties, which point to the shared + * database container. To ensure this test correctly interacts with (and pauses) its dedicated + * container, we must retrieve the port directly from the isolated container instance. + */ @Override - protected synchronized CqlSession createDriverSession() { - if (cqlSession == null) { - GenericContainer container = - SessionEvictionTestResource.getSessionEvictionCassandraContainer(); - if (container == null) { - throw new IllegalStateException("Isolated container not started!"); - } - int port = container.getMappedPort(9042); - String dc; - if (StargateTestResource.isDse() || StargateTestResource.isHcd()) { - dc = "dc1"; - } else { - dc = "datacenter1"; - } - var builder = - new CqlSessionBuilder() - .withLocalDatacenter(dc) - .addContactPoint(new InetSocketAddress("localhost", port)) - .withAuthCredentials("cassandra", "cassandra"); // default admin password :) - cqlSession = builder.build(); + protected int getCassandraCqlPort() { + GenericContainer container = + SessionEvictionTestResource.getSessionEvictionCassandraContainer(); + if (container == null) { + throw new IllegalStateException("Session eviction IT Cassandra container not started!"); } - return cqlSession; + return container.getMappedPort(9042); } @Test From 29cebeb632bee210ae0aeb88369b7bb34b218890 Mon Sep 17 00:00:00 2001 From: Hazel Date: Thu, 8 Jan 2026 17:41:09 -0800 Subject: [PATCH 12/35] rollback some changes --- .../jsonapi/api/v1/AbstractKeyspaceIntegrationTestBase.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/AbstractKeyspaceIntegrationTestBase.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/AbstractKeyspaceIntegrationTestBase.java index 5b8e8485c4..43f3380698 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/AbstractKeyspaceIntegrationTestBase.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/AbstractKeyspaceIntegrationTestBase.java @@ -54,7 +54,7 @@ public abstract class AbstractKeyspaceIntegrationTestBase { /** * Access is protected via {@link #createDriverSession()} method and closed in {@link #cleanUp()}. */ - protected CqlSession cqlSession; + private CqlSession cqlSession; @BeforeAll public static void enableLog() { @@ -350,7 +350,7 @@ protected boolean executeCqlStatement(SimpleStatement... statements) { * Synchronized to avoid creating multiple sessions, performance is not a concern. Session is * closed in {@link #cleanUp()} method. */ - protected synchronized CqlSession createDriverSession() { + private synchronized CqlSession createDriverSession() { if (cqlSession == null) { int port = getCassandraCqlPort(); String dc; From 7c41134888538209e9ae051473d63aa6c9e3aa7c Mon Sep 17 00:00:00 2001 From: Hazel Date: Fri, 9 Jan 2026 15:19:12 -0800 Subject: [PATCH 13/35] test db failure simulation --- .../v1/SessionEvictionIntegrationTest.java | 97 +++++-------------- 1 file changed, 26 insertions(+), 71 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java index 919add658e..d57fbede11 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java @@ -1,6 +1,6 @@ package io.stargate.sgv2.jsonapi.api.v1; -import static io.stargate.sgv2.jsonapi.api.v1.ResponseAssertions.*; +import static io.restassured.RestAssured.given; import static org.hamcrest.Matchers.*; import com.github.dockerjava.api.DockerClient; @@ -8,7 +8,6 @@ import io.quarkus.test.common.QuarkusTestResource; import io.quarkus.test.junit.QuarkusIntegrationTest; import io.stargate.sgv2.jsonapi.testresource.DseTestResource; -import java.io.IOException; import java.util.Map; import org.junit.jupiter.api.Test; import org.testcontainers.containers.GenericContainer; @@ -76,8 +75,8 @@ public static GenericContainer getSessionEvictionCassandraContainer() { * Overridden to ensure we connect to the isolated container created for this test. * *

The base class implementation relies on global system properties, which point to the shared - * database container. To ensure this test correctly interacts with (and pauses) its dedicated - * container, we must retrieve the port directly from the isolated container instance. + * database container. To ensure this test correctly interacts with its dedicated container, we + * must retrieve the port directly from the isolated container instance. */ @Override protected int getCassandraCqlPort() { @@ -104,16 +103,22 @@ public void testSessionEvictionOnAllNodesFailed() throws Exception { } """); - // 2. Pause/stop the container to simulate DB failure + // 2. Stop the container to simulate DB failure + // IMPORTANT: We use dockerClient.stopContainerCmd() instead of container.stop(). + // - container.stop() (Testcontainers) removes the container, so a restart would create a NEW + // container with a NEW random port. + // - dockerClient.stopContainerCmd() (Docker API) simply halts the process but keeps the + // container (and its port mapping) intact. GenericContainer dbContainer = SessionEvictionTestResource.getSessionEvictionCassandraContainer(); DockerClient dockerClient = dbContainer.getDockerClient(); String containerId = dbContainer.getContainerId(); - Log.info("Pausing Database Container to simulate failure (Freeze)..."); - dockerClient.pauseContainerCmd(containerId).exec(); + + Log.info("Stopping Database Container to simulate failure..."); + dockerClient.stopContainerCmd(containerId).exec(); try { - // 3. Verify failure: The application should receive a 500 error/AllNodeFailedException + // 3. Verify failure: The application should receive a 500 error/AllNodesFailedException givenHeadersPostJsonThen( """ { @@ -124,21 +129,17 @@ public void testSessionEvictionOnAllNodesFailed() throws Exception { } } """) - .statusCode(anyOf(is(500), is(504))) - .body("$", responseIsErrorWithStatus()); - // .body("errors[0].message", containsString("AllNodesFailedException")); - // .body("errors[0].message", containsString("No node was available")); + .statusCode(500) + .body("errors[0].message", containsString("AllNodesFailedException")); } finally { - // 4. Always unpause the container in finally block to ensure cleanup - Log.info("Unpausing Database Container to simulate recovery..."); - dockerClient.unpauseContainerCmd(containerId).exec(); + // 4. Restart the container to simulate recovery + Log.info("Restarting Database Container to simulate recovery..."); + dockerClient.startContainerCmd(containerId).exec(); } // 5. Wait for the database to become responsive again - Log.info("start to sleep"); - Thread.sleep(30000); - Log.info("end sleep"); + waitForDbRecovery(); // 6. Verify Session Recovery: The application should have evicted the bad session // and created a new one automatically. @@ -156,57 +157,11 @@ public void testSessionEvictionOnAllNodesFailed() throws Exception { Log.info("Test Passed: Session recovered after DB restart."); } - /** Pauses the container using either 'podman' or 'docker' command, depending on availability. */ - private void pauseContainer(String containerId) throws IOException, InterruptedException { - if (isCommandAvailable("podman")) { - runCommand("podman", "pause", containerId); - } else if (isCommandAvailable("docker")) { - runCommand("docker", "pause", containerId); - } else { - throw new RuntimeException("Neither 'podman' nor 'docker' command found to pause container."); - } - } - - /** Unpauses the container using either 'podman' or 'docker' command. */ - private void unpauseContainer(String containerId) throws IOException, InterruptedException { - if (isCommandAvailable("podman")) { - runCommand("podman", "unpause", containerId); - } else if (isCommandAvailable("docker")) { - runCommand("docker", "unpause", containerId); - } else { - // Best effort warning if unpause fails because no command is found - System.err.println("WARNING: Could not unpause container, no container runtime found."); - } - } - - /** Checks if a shell command is available in the current environment. */ - private boolean isCommandAvailable(String command) { - try { - // Checking version is a safe, side-effect-free way to test existence - new ProcessBuilder(command, "--version").start().waitFor(); - return true; - } catch (Exception e) { - return false; - } - } - - /** Executes a shell command and waits for it to finish. */ - private void runCommand(String... command) throws IOException, InterruptedException { - ProcessBuilder pb = new ProcessBuilder(command); - pb.inheritIO(); // Prints stdout/stderr to the console for debugging - Process process = pb.start(); - int exitCode = process.waitFor(); - if (exitCode != 0) { - throw new RuntimeException( - "Command failed with exit code " + exitCode + ": " + String.join(" ", command)); - } - } - /** Polls the database until it becomes responsive again. */ private void waitForDbRecovery() { - System.out.println("Waiting for DB to recover..."); + Log.info("Waiting for DB to recover..."); long start = System.currentTimeMillis(); - long timeout = 30000; // 30 seconds timeout + long timeout = 60000; // 60 seconds timeout while (System.currentTimeMillis() - start < timeout) { try { @@ -215,24 +170,24 @@ private void waitForDbRecovery() { """ { "find": { - "filter": {"name": "check_recovery"} + "filter": {} } } """; int statusCode = - io.restassured.RestAssured.given() + given() .port(getTestPort()) .headers(getHeaders()) .contentType(io.restassured.http.ContentType.JSON) .body(json) .when() - .post(GeneralResource.BASE_PATH + "/" + keyspaceName + "/" + collectionName) + .post(CollectionResource.BASE_PATH, keyspaceName, collectionName) .getStatusCode(); // 200 OK means the DB handled the request (even if empty result) if (statusCode == 200) { - System.out.println("DB recovered!"); + Log.info("DB recovered!"); return; } } catch (Exception e) { @@ -240,7 +195,7 @@ private void waitForDbRecovery() { } try { - Thread.sleep(500); // Poll every 500ms + Thread.sleep(1000); // Poll every 1s } catch (InterruptedException ignored) { } } From 69d7f65bd61bc3c44912395d334bb631bc9ef783 Mon Sep 17 00:00:00 2001 From: Hazel Date: Fri, 9 Jan 2026 16:04:51 -0800 Subject: [PATCH 14/35] test db failure simulation again --- .../api/v1/SessionEvictionIntegrationTest.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java index d57fbede11..b144bccb6d 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java @@ -114,7 +114,7 @@ public void testSessionEvictionOnAllNodesFailed() throws Exception { DockerClient dockerClient = dbContainer.getDockerClient(); String containerId = dbContainer.getContainerId(); - Log.info("Stopping Database Container to simulate failure..."); + Log.error("Stopping Database Container to simulate failure..."); dockerClient.stopContainerCmd(containerId).exec(); try { @@ -130,11 +130,11 @@ public void testSessionEvictionOnAllNodesFailed() throws Exception { } """) .statusCode(500) - .body("errors[0].message", containsString("AllNodesFailedException")); + .body("errors[0].message", containsString("No node was available")); } finally { // 4. Restart the container to simulate recovery - Log.info("Restarting Database Container to simulate recovery..."); + Log.error("Restarting Database Container to simulate recovery..."); dockerClient.startContainerCmd(containerId).exec(); } @@ -154,12 +154,12 @@ public void testSessionEvictionOnAllNodesFailed() throws Exception { } """); - Log.info("Test Passed: Session recovered after DB restart."); + Log.error("Test Passed: Session recovered after DB restart."); } /** Polls the database until it becomes responsive again. */ private void waitForDbRecovery() { - Log.info("Waiting for DB to recover..."); + Log.error("Waiting for DB to recover..."); long start = System.currentTimeMillis(); long timeout = 60000; // 60 seconds timeout @@ -187,7 +187,7 @@ private void waitForDbRecovery() { // 200 OK means the DB handled the request (even if empty result) if (statusCode == 200) { - Log.info("DB recovered!"); + Log.error("DB recovered!"); return; } } catch (Exception e) { From 2e7dd5c49c209156298e685d7207c530b2077b76 Mon Sep 17 00:00:00 2001 From: Hazel Date: Mon, 12 Jan 2026 15:26:22 -0800 Subject: [PATCH 15/35] test db failure simulation again --- .../v1/SessionEvictionIntegrationTest.java | 34 +++++++++++-------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java index b144bccb6d..1a297ad6fa 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java @@ -1,6 +1,7 @@ package io.stargate.sgv2.jsonapi.api.v1; import static io.restassured.RestAssured.given; +import static io.stargate.sgv2.jsonapi.api.v1.ResponseAssertions.responseIsFindSuccess; import static org.hamcrest.Matchers.*; import com.github.dockerjava.api.DockerClient; @@ -89,20 +90,31 @@ protected int getCassandraCqlPort() { } @Test - public void testSessionEvictionOnAllNodesFailed() throws Exception { + public void testSessionEvictionOnAllNodesFailed() { - // 1. Insert initial data to ensure the database is healthy before the test + // 1. Insert and find initial data to ensure the database is healthy before the test insertDoc( """ { "insertOne": { "document": { - "name": "before_crash" + "_id": "before_crash" } } } """); + givenHeadersPostJsonThenOkNoErrors( + """ + { + "findOne": { + "filter" : {"_id" : "before_crash"} + } + } + """) + .body("$", responseIsFindSuccess()) + .body("data.document._id", is("before_crash")); + // 2. Stop the container to simulate DB failure // IMPORTANT: We use dockerClient.stopContainerCmd() instead of container.stop(). // - container.stop() (Testcontainers) removes the container, so a restart would create a NEW @@ -121,14 +133,10 @@ public void testSessionEvictionOnAllNodesFailed() throws Exception { // 3. Verify failure: The application should receive a 500 error/AllNodesFailedException givenHeadersPostJsonThen( """ - { - "insertOne": { - "document": { - "name": "after_crash" - } - } - } - """) + { + "findOne": {} + } + """) .statusCode(500) .body("errors[0].message", containsString("No node was available")); @@ -169,9 +177,7 @@ private void waitForDbRecovery() { String json = """ { - "find": { - "filter": {} - } + "findOne": {} } """; From 14940d19bb890ad1508b599e308c5e95801dcc08 Mon Sep 17 00:00:00 2001 From: Hazel Date: Mon, 12 Jan 2026 16:04:10 -0800 Subject: [PATCH 16/35] fix test error --- .../v1/SessionEvictionIntegrationTest.java | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java index 1a297ad6fa..f9445ac978 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java @@ -96,11 +96,7 @@ public void testSessionEvictionOnAllNodesFailed() { insertDoc( """ { - "insertOne": { - "document": { - "_id": "before_crash" - } - } + "_id": "before_crash" } """); @@ -153,14 +149,21 @@ public void testSessionEvictionOnAllNodesFailed() { // and created a new one automatically. insertDoc( """ - { - "insertOne": { - "document": { - "name": "after_recovery" - } + { + "_id": "after_crash" } + """); + + givenHeadersPostJsonThenOkNoErrors( + """ + { + "findOne": { + "filter" : {"_id" : "after_crash"} } - """); + } + """) + .body("$", responseIsFindSuccess()) + .body("data.document._id", is("after_crash")); Log.error("Test Passed: Session recovered after DB restart."); } From 49b6541b07e4deaa9616cb749ac226e7e1c2d080 Mon Sep 17 00:00:00 2001 From: Hazel Date: Mon, 12 Jan 2026 16:41:36 -0800 Subject: [PATCH 17/35] test again --- .../sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java index f9445ac978..3d24fbdbe5 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java @@ -8,6 +8,7 @@ import io.quarkus.logging.Log; import io.quarkus.test.common.QuarkusTestResource; import io.quarkus.test.junit.QuarkusIntegrationTest; +import io.restassured.http.ContentType; import io.stargate.sgv2.jsonapi.testresource.DseTestResource; import java.util.Map; import org.junit.jupiter.api.Test; @@ -186,9 +187,8 @@ private void waitForDbRecovery() { int statusCode = given() - .port(getTestPort()) .headers(getHeaders()) - .contentType(io.restassured.http.ContentType.JSON) + .contentType(ContentType.JSON) .body(json) .when() .post(CollectionResource.BASE_PATH, keyspaceName, collectionName) From 68d316570dda899549eedb02b3f149f6106c2268 Mon Sep 17 00:00:00 2001 From: Hazel Date: Tue, 13 Jan 2026 13:29:44 -0800 Subject: [PATCH 18/35] add debug log --- .../api/v1/SessionEvictionIntegrationTest.java | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java index 3d24fbdbe5..c2870ba762 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java @@ -173,7 +173,8 @@ public void testSessionEvictionOnAllNodesFailed() { private void waitForDbRecovery() { Log.error("Waiting for DB to recover..."); long start = System.currentTimeMillis(); - long timeout = 60000; // 60 seconds timeout + long timeout = 120000; // 120 seconds timeout + String lastError = null; while (System.currentTimeMillis() - start < timeout) { try { @@ -185,22 +186,26 @@ private void waitForDbRecovery() { } """; - int statusCode = + var response = given() .headers(getHeaders()) .contentType(ContentType.JSON) .body(json) .when() - .post(CollectionResource.BASE_PATH, keyspaceName, collectionName) - .getStatusCode(); + .post(CollectionResource.BASE_PATH, keyspaceName, collectionName); + + int statusCode = response.getStatusCode(); // 200 OK means the DB handled the request (even if empty result) if (statusCode == 200) { Log.error("DB recovered!"); return; + } else { + lastError = "Status: " + statusCode + ", Body: " + response.getBody().asString(); } } catch (Exception e) { // Ignore connection errors and continue retrying + lastError = "Recovery Exception: " + e.getMessage(); } try { @@ -208,6 +213,7 @@ private void waitForDbRecovery() { } catch (InterruptedException ignored) { } } - throw new RuntimeException("DB failed to recover within " + timeout + "ms"); + throw new RuntimeException( + "DB failed to recover within " + timeout + "ms. Last error: " + lastError); } } From 666c1db742358aff6c7093d627f4a212924a039a Mon Sep 17 00:00:00 2001 From: Hazel Date: Tue, 13 Jan 2026 14:39:32 -0800 Subject: [PATCH 19/35] change resource limit, test again --- .../api/v1/SessionEvictionIntegrationTest.java | 18 +++++++++++++++--- .../testresource/StargateTestResource.java | 3 ++- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java index c2870ba762..333af78418 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java @@ -48,9 +48,21 @@ public static class SessionEvictionTestResource extends DseTestResource { */ @Override public Map start() { - Map props = super.start(); - sessionEvictionCassandraContainer = super.getCassandraContainer(); - return props; + // Reduce memory usage for this dedicated container to avoid OOM in CI. + // We set it temporarily just for this container's creation. + String originalHeap = System.getProperty("testing.containers.cassandra-heap-max"); + System.setProperty("testing.containers.cassandra-heap-max", "1024M"); + try { + Map props = super.start(); + sessionEvictionCassandraContainer = super.getCassandraContainer(); + return props; + } finally { + if (originalHeap != null) { + System.setProperty("testing.containers.cassandra-heap-max", originalHeap); + } else { + System.clearProperty("testing.containers.cassandra-heap-max"); + } + } } /** diff --git a/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java b/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java index d758a30eb1..a637ffaa44 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java @@ -214,7 +214,8 @@ private GenericContainer baseCassandraContainer(boolean reuse) { container .withEnv("HEAP_NEWSIZE", "512M") - .withEnv("MAX_HEAP_SIZE", "2048M") + .withEnv( + "MAX_HEAP_SIZE", System.getProperty("testing.containers.cassandra-heap-max", "2048M")) .withEnv("CASSANDRA_CGROUP_MEMORY_LIMIT", "true") .withEnv("JVM_EXTRA_OPTS", JVM_EXTRA_OPTS) .withNetworkAliases(new String[] {"cassandra"}) From 6d793a9df5ea7d6d79d32600dbf6bf5639b78e78 Mon Sep 17 00:00:00 2001 From: Hazel Date: Tue, 13 Jan 2026 15:08:40 -0800 Subject: [PATCH 20/35] rollback resource limit and add log, test again --- .../v1/SessionEvictionIntegrationTest.java | 25 ++++++++----------- .../testresource/StargateTestResource.java | 3 +-- 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java index 333af78418..a40a4790d0 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java @@ -48,21 +48,9 @@ public static class SessionEvictionTestResource extends DseTestResource { */ @Override public Map start() { - // Reduce memory usage for this dedicated container to avoid OOM in CI. - // We set it temporarily just for this container's creation. - String originalHeap = System.getProperty("testing.containers.cassandra-heap-max"); - System.setProperty("testing.containers.cassandra-heap-max", "1024M"); - try { - Map props = super.start(); - sessionEvictionCassandraContainer = super.getCassandraContainer(); - return props; - } finally { - if (originalHeap != null) { - System.setProperty("testing.containers.cassandra-heap-max", originalHeap); - } else { - System.clearProperty("testing.containers.cassandra-heap-max"); - } - } + Map props = super.start(); + sessionEvictionCassandraContainer = super.getCassandraContainer(); + return props; } /** @@ -136,6 +124,8 @@ public void testSessionEvictionOnAllNodesFailed() { String containerId = dbContainer.getContainerId(); Log.error("Stopping Database Container to simulate failure..."); + Log.info( + "Container ID: " + containerId + ", Port Before Stop: " + dbContainer.getMappedPort(9042)); dockerClient.stopContainerCmd(containerId).exec(); try { @@ -153,6 +143,11 @@ public void testSessionEvictionOnAllNodesFailed() { // 4. Restart the container to simulate recovery Log.error("Restarting Database Container to simulate recovery..."); dockerClient.startContainerCmd(containerId).exec(); + Log.info( + "Container ID: " + + containerId + + ", Port After Start: " + + dbContainer.getMappedPort(9042)); } // 5. Wait for the database to become responsive again diff --git a/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java b/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java index a637ffaa44..d758a30eb1 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java @@ -214,8 +214,7 @@ private GenericContainer baseCassandraContainer(boolean reuse) { container .withEnv("HEAP_NEWSIZE", "512M") - .withEnv( - "MAX_HEAP_SIZE", System.getProperty("testing.containers.cassandra-heap-max", "2048M")) + .withEnv("MAX_HEAP_SIZE", "2048M") .withEnv("CASSANDRA_CGROUP_MEMORY_LIMIT", "true") .withEnv("JVM_EXTRA_OPTS", JVM_EXTRA_OPTS) .withNetworkAliases(new String[] {"cassandra"}) From e92d044fd7e53b23b197b1f2a26b02bf0c8bbb5f Mon Sep 17 00:00:00 2001 From: Hazel Date: Tue, 13 Jan 2026 15:31:58 -0800 Subject: [PATCH 21/35] add log, test again --- .../sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java index a40a4790d0..10ccea2283 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java @@ -124,7 +124,7 @@ public void testSessionEvictionOnAllNodesFailed() { String containerId = dbContainer.getContainerId(); Log.error("Stopping Database Container to simulate failure..."); - Log.info( + Log.error( "Container ID: " + containerId + ", Port Before Stop: " + dbContainer.getMappedPort(9042)); dockerClient.stopContainerCmd(containerId).exec(); @@ -143,7 +143,7 @@ public void testSessionEvictionOnAllNodesFailed() { // 4. Restart the container to simulate recovery Log.error("Restarting Database Container to simulate recovery..."); dockerClient.startContainerCmd(containerId).exec(); - Log.info( + Log.error( "Container ID: " + containerId + ", Port After Start: " From fe9d5d354c1fc43cb54d571187aa1262fd90fb8c Mon Sep 17 00:00:00 2001 From: Hazel Date: Tue, 13 Jan 2026 16:41:11 -0800 Subject: [PATCH 22/35] more logs, test again --- .../v1/SessionEvictionIntegrationTest.java | 102 ++++++++++++++++-- 1 file changed, 92 insertions(+), 10 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java index 10ccea2283..558d114873 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java @@ -113,11 +113,6 @@ public void testSessionEvictionOnAllNodesFailed() { .body("data.document._id", is("before_crash")); // 2. Stop the container to simulate DB failure - // IMPORTANT: We use dockerClient.stopContainerCmd() instead of container.stop(). - // - container.stop() (Testcontainers) removes the container, so a restart would create a NEW - // container with a NEW random port. - // - dockerClient.stopContainerCmd() (Docker API) simply halts the process but keeps the - // container (and its port mapping) intact. GenericContainer dbContainer = SessionEvictionTestResource.getSessionEvictionCassandraContainer(); DockerClient dockerClient = dbContainer.getDockerClient(); @@ -148,6 +143,18 @@ public void testSessionEvictionOnAllNodesFailed() { + containerId + ", Port After Start: " + dbContainer.getMappedPort(9042)); + + // check status + var inspectAfter = dockerClient.inspectContainerCmd(containerId).exec(); + var state = inspectAfter.getState(); + Log.error( + "Container Status After Start: " + + dockerClient + .inspectContainerCmd(containerId) + .exec() + .getState() + .getStatus()); // 应该是 "running" + Log.error("Container Running: " + state.getRunning()); } // 5. Wait for the database to become responsive again @@ -179,13 +186,31 @@ public void testSessionEvictionOnAllNodesFailed() { /** Polls the database until it becomes responsive again. */ private void waitForDbRecovery() { Log.error("Waiting for DB to recover..."); + GenericContainer dbContainer = + SessionEvictionTestResource.getSessionEvictionCassandraContainer(); + DockerClient dockerClient = dbContainer.getDockerClient(); + String containerId = dbContainer.getContainerId(); + long start = System.currentTimeMillis(); long timeout = 120000; // 120 seconds timeout String lastError = null; while (System.currentTimeMillis() - start < timeout) { try { - // Perform a lightweight check (e.g., an empty find) to see if DB responds + // 1. Log real-time container status from Docker + var state = dockerClient.inspectContainerCmd(containerId).exec().getState(); + boolean isRunning = Boolean.TRUE.equals(state.getRunning()); + Log.error( + "Polling - Container Status: " + state.getStatus() + " (Running: " + isRunning + ")"); + + if (isRunning) { + // 2. Check internal Cassandra status via nodetool + boolean isNodeUp = isCassandraUp(dockerClient, containerId); + Log.error( + "Polling - Cassandra Nodetool Status: " + (isNodeUp ? "UP" : "DOWN/Initializing")); + } + + // 3. Perform a lightweight check (e.g., an empty find) to see if DB responds String json = """ { @@ -205,22 +230,79 @@ private void waitForDbRecovery() { // 200 OK means the DB handled the request (even if empty result) if (statusCode == 200) { - Log.error("DB recovered!"); + Log.error("DB recovered! Received 200 OK."); return; } else { lastError = "Status: " + statusCode + ", Body: " + response.getBody().asString(); + Log.error("DB responded but not ready: " + lastError); } } catch (Exception e) { - // Ignore connection errors and continue retrying - lastError = "Recovery Exception: " + e.getMessage(); + // Log connection errors + lastError = "Exception: " + e.getMessage(); + Log.error("DB connection error during polling: " + lastError); } try { - Thread.sleep(1000); // Poll every 1s + Thread.sleep(2000); // Poll every 2s to reduce log noise } catch (InterruptedException ignored) { } } throw new RuntimeException( "DB failed to recover within " + timeout + "ms. Last error: " + lastError); } + + /** Checks if Cassandra is up and normal by running "nodetool status" inside the container. */ + private boolean isCassandraUp(DockerClient dockerClient, String containerId) { + try { + var execCreateCmdResponse = + dockerClient + .execCreateCmd(containerId) + .withAttachStdout(true) + .withAttachStderr(true) + .withCmd("nodetool", "status") + .exec(); + + var callback = + new com.github.dockerjava.api.async.ResultCallback.Adapter< + com.github.dockerjava.api.model.Frame>() { + @Override + public void onNext(com.github.dockerjava.api.model.Frame object) { + // No-op: we don't strictly need to capture output here as we rely on exit code. + // In a more advanced version, we could collect bytes here to check for "UN". + } + }; + + dockerClient.execStartCmd(execCreateCmdResponse.getId()).exec(callback).awaitCompletion(); + + // Note: In a real implementation we would capture stdout to check for "UN" + // But WaitContainerResultCallback doesn't easily expose stdout capture without custom logic. + // For now, if the command returns exit code 0, it means nodetool connected successfully, + // which is a very strong indicator that JMX and the JVM are up. + // A failed node usually causes nodetool to throw a ConnectException and exit with non-zero. + + // However, to be more robust for "UN" check, let's use a simpler approach: + // If exit code is 0, we assume UP for debugging purposes. + // (Capturing output requires a custom Adapter which adds complexity to this test class). + + // Let's rely on exit code for now. + // 0 = Success (JMX connected, likely UP) + // 1+ = Failed (JMX not ready) + + // We can improve this if needed by implementing a proper ResultCallback that collects bytes. + + // Actually, let's use a standard adapter to be sure: + // But since we can't easily add inner classes, let's just use the exit code as a proxy. + + // Correction: WaitContainerResultCallback does not provide exit code directly in all + // versions. + // Let's use InspectExecCmd to get the exit code after execution. + var inspectExecResponse = dockerClient.inspectExecCmd(execCreateCmdResponse.getId()).exec(); + long exitCode = inspectExecResponse.getExitCodeLong(); + + return exitCode == 0; + } catch (Exception e) { + Log.error("Failed to run nodetool status: " + e.getMessage()); + return false; + } + } } From ff8bfe2ef0ba29b3fa08701dfbe6a8f20b81032d Mon Sep 17 00:00:00 2001 From: Hazel Date: Wed, 14 Jan 2026 14:16:01 -0800 Subject: [PATCH 23/35] check if it's OOM, test again --- .../v1/SessionEvictionIntegrationTest.java | 61 +++++++++++++------ .../testresource/StargateTestResource.java | 3 +- 2 files changed, 45 insertions(+), 19 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java index 558d114873..66e058a7ea 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java @@ -48,9 +48,21 @@ public static class SessionEvictionTestResource extends DseTestResource { */ @Override public Map start() { - Map props = super.start(); - sessionEvictionCassandraContainer = super.getCassandraContainer(); - return props; + // Reduce memory usage for this dedicated container to avoid OOM in CI. + // We set it temporarily just for this container's creation. + String originalHeap = System.getProperty("testing.containers.cassandra-heap-max"); + System.setProperty("testing.containers.cassandra-heap-max", "1024M"); + try { + Map props = super.start(); + sessionEvictionCassandraContainer = super.getCassandraContainer(); + return props; + } finally { + if (originalHeap != null) { + System.setProperty("testing.containers.cassandra-heap-max", originalHeap); + } else { + System.clearProperty("testing.containers.cassandra-heap-max"); + } + } } /** @@ -190,34 +202,52 @@ private void waitForDbRecovery() { SessionEvictionTestResource.getSessionEvictionCassandraContainer(); DockerClient dockerClient = dbContainer.getDockerClient(); String containerId = dbContainer.getContainerId(); - long start = System.currentTimeMillis(); long timeout = 120000; // 120 seconds timeout String lastError = null; - while (System.currentTimeMillis() - start < timeout) { try { - // 1. Log real-time container status from Docker - var state = dockerClient.inspectContainerCmd(containerId).exec().getState(); + var inspect = dockerClient.inspectContainerCmd(containerId).exec(); + var state = inspect.getState(); boolean isRunning = Boolean.TRUE.equals(state.getRunning()); Log.error( "Polling - Container Status: " + state.getStatus() + " (Running: " + isRunning + ")"); - + if (!isRunning) { + Log.error("CRITICAL: Container is NOT running. Performing post-mortem..."); + Log.error(" Exit Code: " + state.getExitCode()); // 137 = OOM Killed + Log.error(" OOMKilled: " + state.getOOMKilled()); + try { + Log.error("--- CONTAINER LOGS (LAST 50 LINES) ---"); + dockerClient + .logContainerCmd(containerId) + .withStdOut(true) + .withStdErr(true) + .withTail(50) + .exec( + new com.github.dockerjava.api.async.ResultCallback.Adapter< + com.github.dockerjava.api.model.Frame>() { + @Override + public void onNext(com.github.dockerjava.api.model.Frame frame) { + Log.error(new String(frame.getPayload())); + } + }) + .awaitCompletion(); + Log.error("--- END CONTAINER LOGS ---"); + } catch (Exception logEx) { + Log.error("Failed to fetch container logs: " + logEx.getMessage()); + } + } if (isRunning) { - // 2. Check internal Cassandra status via nodetool boolean isNodeUp = isCassandraUp(dockerClient, containerId); Log.error( "Polling - Cassandra Nodetool Status: " + (isNodeUp ? "UP" : "DOWN/Initializing")); } - - // 3. Perform a lightweight check (e.g., an empty find) to see if DB responds String json = """ { "findOne": {} } """; - var response = given() .headers(getHeaders()) @@ -225,10 +255,7 @@ private void waitForDbRecovery() { .body(json) .when() .post(CollectionResource.BASE_PATH, keyspaceName, collectionName); - int statusCode = response.getStatusCode(); - - // 200 OK means the DB handled the request (even if empty result) if (statusCode == 200) { Log.error("DB recovered! Received 200 OK."); return; @@ -237,13 +264,11 @@ private void waitForDbRecovery() { Log.error("DB responded but not ready: " + lastError); } } catch (Exception e) { - // Log connection errors lastError = "Exception: " + e.getMessage(); Log.error("DB connection error during polling: " + lastError); } - try { - Thread.sleep(2000); // Poll every 2s to reduce log noise + Thread.sleep(2000); // Poll every 2s } catch (InterruptedException ignored) { } } diff --git a/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java b/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java index d758a30eb1..a637ffaa44 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java @@ -214,7 +214,8 @@ private GenericContainer baseCassandraContainer(boolean reuse) { container .withEnv("HEAP_NEWSIZE", "512M") - .withEnv("MAX_HEAP_SIZE", "2048M") + .withEnv( + "MAX_HEAP_SIZE", System.getProperty("testing.containers.cassandra-heap-max", "2048M")) .withEnv("CASSANDRA_CGROUP_MEMORY_LIMIT", "true") .withEnv("JVM_EXTRA_OPTS", JVM_EXTRA_OPTS) .withNetworkAliases(new String[] {"cassandra"}) From 7df60422739dca8b0eef7d4fe2cabdaa351497ed Mon Sep 17 00:00:00 2001 From: Hazel Date: Wed, 14 Jan 2026 15:34:06 -0800 Subject: [PATCH 24/35] not OOM, change initial_token, test again --- .../api/v1/SessionEvictionIntegrationTest.java | 18 +++--------------- .../testresource/StargateTestResource.java | 5 ++--- 2 files changed, 5 insertions(+), 18 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java index 66e058a7ea..c50850296f 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java @@ -48,21 +48,9 @@ public static class SessionEvictionTestResource extends DseTestResource { */ @Override public Map start() { - // Reduce memory usage for this dedicated container to avoid OOM in CI. - // We set it temporarily just for this container's creation. - String originalHeap = System.getProperty("testing.containers.cassandra-heap-max"); - System.setProperty("testing.containers.cassandra-heap-max", "1024M"); - try { - Map props = super.start(); - sessionEvictionCassandraContainer = super.getCassandraContainer(); - return props; - } finally { - if (originalHeap != null) { - System.setProperty("testing.containers.cassandra-heap-max", originalHeap); - } else { - System.clearProperty("testing.containers.cassandra-heap-max"); - } - } + Map props = super.start(); + sessionEvictionCassandraContainer = super.getCassandraContainer(); + return props; } /** diff --git a/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java b/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java index a637ffaa44..5e3203f889 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java @@ -176,7 +176,7 @@ private GenericContainer baseCassandraContainer(boolean reuse) { // Some JVM options are same for all backends, start with those: String JVM_EXTRA_OPTS = - "-Dcassandra.skip_wait_for_gossip_to_settle=0 -Dcassandra.load_ring_state=false -Dcassandra.initial_token=1 -Dcassandra.sai.max_string_term_size_kb=8" + "-Dcassandra.skip_wait_for_gossip_to_settle=0 -Dcassandra.load_ring_state=false -Dcassandra.sai.max_string_term_size_kb=8" // 18-Mar-2025, tatu: to work around [https://github.com/riptano/cndb/issues/13330], // need to temporarily add this for HCD: + " -Dcassandra.cluster_version_provider.min_stable_duration_ms=-1" @@ -214,8 +214,7 @@ private GenericContainer baseCassandraContainer(boolean reuse) { container .withEnv("HEAP_NEWSIZE", "512M") - .withEnv( - "MAX_HEAP_SIZE", System.getProperty("testing.containers.cassandra-heap-max", "2048M")) + .withEnv("MAX_HEAP_SIZE", "2048M") .withEnv("CASSANDRA_CGROUP_MEMORY_LIMIT", "true") .withEnv("JVM_EXTRA_OPTS", JVM_EXTRA_OPTS) .withNetworkAliases(new String[] {"cassandra"}) From 6c5f87595ba5b7d3fa548cfd7a3c821ce3cbbd98 Mon Sep 17 00:00:00 2001 From: Hazel Date: Wed, 14 Jan 2026 16:18:43 -0800 Subject: [PATCH 25/35] add java comments --- .../sgv2/jsonapi/testresource/StargateTestResource.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java b/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java index 5e3203f889..14d7623b78 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java @@ -175,6 +175,9 @@ private GenericContainer baseCassandraContainer(boolean reuse) { GenericContainer container; // Some JVM options are same for all backends, start with those: + // 14-Jan-2026, hazel: [data-api#2263] Removed '-Dcassandra.initial_token=1' as it prevents + // restarting containers with existing data (ConfigurationException: Cannot change tokens 1 to + // 16) String JVM_EXTRA_OPTS = "-Dcassandra.skip_wait_for_gossip_to_settle=0 -Dcassandra.load_ring_state=false -Dcassandra.sai.max_string_term_size_kb=8" // 18-Mar-2025, tatu: to work around [https://github.com/riptano/cndb/issues/13330], From adc66d767fd63dbad068b959d3d2d70076916c81 Mon Sep 17 00:00:00 2001 From: Hazel Date: Wed, 14 Jan 2026 16:35:10 -0800 Subject: [PATCH 26/35] initial_token solved one problem, another problem surfaces, add more log... --- .../v1/SessionEvictionIntegrationTest.java | 75 ++++++++++--------- 1 file changed, 40 insertions(+), 35 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java index c50850296f..0fef5be4c0 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java @@ -153,7 +153,7 @@ public void testSessionEvictionOnAllNodesFailed() { .inspectContainerCmd(containerId) .exec() .getState() - .getStatus()); // 应该是 "running" + .getStatus()); // should be "running" Log.error("Container Running: " + state.getRunning()); } @@ -227,15 +227,19 @@ public void onNext(com.github.dockerjava.api.model.Frame frame) { } if (isRunning) { boolean isNodeUp = isCassandraUp(dockerClient, containerId); + boolean isNativeTransportActive = isNativeTransportActive(dockerClient, containerId); Log.error( - "Polling - Cassandra Nodetool Status: " + (isNodeUp ? "UP" : "DOWN/Initializing")); + "Polling - Cassandra Status: NodeUp=" + + isNodeUp + + ", NativeTransport=" + + isNativeTransportActive); } String json = """ - { - "findOne": {} - } - """; + { + "findOne": {} + } + """; var response = given() .headers(getHeaders()) @@ -279,42 +283,43 @@ private boolean isCassandraUp(DockerClient dockerClient, String containerId) { new com.github.dockerjava.api.async.ResultCallback.Adapter< com.github.dockerjava.api.model.Frame>() { @Override - public void onNext(com.github.dockerjava.api.model.Frame object) { - // No-op: we don't strictly need to capture output here as we rely on exit code. - // In a more advanced version, we could collect bytes here to check for "UN". - } + public void onNext(com.github.dockerjava.api.model.Frame object) {} }; dockerClient.execStartCmd(execCreateCmdResponse.getId()).exec(callback).awaitCompletion(); + var inspectExecResponse = dockerClient.inspectExecCmd(execCreateCmdResponse.getId()).exec(); + return inspectExecResponse.getExitCodeLong() == 0; + } catch (Exception e) { + Log.error("Failed to run nodetool status: " + e.getMessage()); + return false; + } + } - // Note: In a real implementation we would capture stdout to check for "UN" - // But WaitContainerResultCallback doesn't easily expose stdout capture without custom logic. - // For now, if the command returns exit code 0, it means nodetool connected successfully, - // which is a very strong indicator that JMX and the JVM are up. - // A failed node usually causes nodetool to throw a ConnectException and exit with non-zero. - - // However, to be more robust for "UN" check, let's use a simpler approach: - // If exit code is 0, we assume UP for debugging purposes. - // (Capturing output requires a custom Adapter which adds complexity to this test class). - - // Let's rely on exit code for now. - // 0 = Success (JMX connected, likely UP) - // 1+ = Failed (JMX not ready) - - // We can improve this if needed by implementing a proper ResultCallback that collects bytes. - - // Actually, let's use a standard adapter to be sure: - // But since we can't easily add inner classes, let's just use the exit code as a proxy. + /** Checks if Native Transport is active by running "nodetool info" inside the container. */ + private boolean isNativeTransportActive(DockerClient dockerClient, String containerId) { + try { + var execCreateCmdResponse = + dockerClient + .execCreateCmd(containerId) + .withAttachStdout(true) + .withAttachStderr(true) + .withCmd("nodetool", "info") + .exec(); - // Correction: WaitContainerResultCallback does not provide exit code directly in all - // versions. - // Let's use InspectExecCmd to get the exit code after execution. - var inspectExecResponse = dockerClient.inspectExecCmd(execCreateCmdResponse.getId()).exec(); - long exitCode = inspectExecResponse.getExitCodeLong(); + StringBuilder output = new StringBuilder(); + var callback = + new com.github.dockerjava.api.async.ResultCallback.Adapter< + com.github.dockerjava.api.model.Frame>() { + @Override + public void onNext(com.github.dockerjava.api.model.Frame frame) { + output.append(new String(frame.getPayload())); + } + }; - return exitCode == 0; + dockerClient.execStartCmd(execCreateCmdResponse.getId()).exec(callback).awaitCompletion(); + return output.toString().contains("Native Transport active: true"); } catch (Exception e) { - Log.error("Failed to run nodetool status: " + e.getMessage()); + Log.error("Failed to run nodetool info: " + e.getMessage()); return false; } } From e30ddf07911f7fdb92b2a3923cb153367ef1aae5 Mon Sep 17 00:00:00 2001 From: Hazel Date: Wed, 14 Jan 2026 17:33:17 -0800 Subject: [PATCH 27/35] cannot find the problem, add more log... --- .../v1/SessionEvictionIntegrationTest.java | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java index 0fef5be4c0..79c627ae36 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java @@ -198,8 +198,26 @@ private void waitForDbRecovery() { var inspect = dockerClient.inspectContainerCmd(containerId).exec(); var state = inspect.getState(); boolean isRunning = Boolean.TRUE.equals(state.getRunning()); + var ports = inspect.getNetworkSettings().getPorts().getBindings(); + int mappedPort = dbContainer.getMappedPort(9042); Log.error( - "Polling - Container Status: " + state.getStatus() + " (Running: " + isRunning + ")"); + "Polling - Container Status: " + + state.getStatus() + + " (Running: " + + isRunning + + "), Ports: " + + ports + + ", CurrentMappedPort: " + + mappedPort); + + // 2. TCP Socket Probe + try (java.net.Socket socket = new java.net.Socket()) { + socket.connect(new java.net.InetSocketAddress("localhost", mappedPort), 2000); + Log.error("Polling - Socket Probe: SUCCESS (TCP Handshake OK)"); + } catch (Exception e) { + Log.error("Polling - Socket Probe: FAILED - " + e.getMessage()); + } + if (!isRunning) { Log.error("CRITICAL: Container is NOT running. Performing post-mortem..."); Log.error(" Exit Code: " + state.getExitCode()); // 137 = OOM Killed From 5617e2a25033505fbfe95d908ddb25a473bdc5b0 Mon Sep 17 00:00:00 2001 From: Hazel Date: Thu, 15 Jan 2026 11:42:56 -0800 Subject: [PATCH 28/35] Force port binding, test again --- .../api/v1/SessionEvictionIntegrationTest.java | 13 +++++++++++++ .../jsonapi/testresource/StargateTestResource.java | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java index 79c627ae36..af3c033a94 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java @@ -10,6 +10,7 @@ import io.quarkus.test.junit.QuarkusIntegrationTest; import io.restassured.http.ContentType; import io.stargate.sgv2.jsonapi.testresource.DseTestResource; +import java.util.Collections; import java.util.Map; import org.junit.jupiter.api.Test; import org.testcontainers.containers.GenericContainer; @@ -39,6 +40,18 @@ public static class SessionEvictionTestResource extends DseTestResource { */ private static GenericContainer sessionEvictionCassandraContainer; + @Override + protected GenericContainer baseCassandraContainer(boolean reuse) { + GenericContainer container = super.baseCassandraContainer(reuse); + try (java.net.ServerSocket socket = new java.net.ServerSocket(0)) { + int port = socket.getLocalPort(); + container.setPortBindings(Collections.singletonList(port + ":9042")); + } catch (java.io.IOException e) { + throw new RuntimeException("Failed to find open port", e); + } + return container; + } + /** * Starts the container and captures the reference. * diff --git a/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java b/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java index 14d7623b78..a535b60f68 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/testresource/StargateTestResource.java @@ -170,7 +170,7 @@ private ImmutableMap.Builder startContainer( return ImmutableMap.builder(); } - private GenericContainer baseCassandraContainer(boolean reuse) { + protected GenericContainer baseCassandraContainer(boolean reuse) { String image = getCassandraImage(); GenericContainer container; From 0174c07214bd7123643b1da6874aaeadbb4df593 Mon Sep 17 00:00:00 2001 From: Hazel Date: Thu, 15 Jan 2026 15:30:32 -0800 Subject: [PATCH 29/35] Add java doc --- .../v1/SessionEvictionIntegrationTest.java | 31 +++++++++++++------ 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java index af3c033a94..a2d501e389 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java @@ -10,6 +10,7 @@ import io.quarkus.test.junit.QuarkusIntegrationTest; import io.restassured.http.ContentType; import io.stargate.sgv2.jsonapi.testresource.DseTestResource; +import java.net.ServerSocket; import java.util.Collections; import java.util.Map; import org.junit.jupiter.api.Test; @@ -40,10 +41,21 @@ public static class SessionEvictionTestResource extends DseTestResource { */ private static GenericContainer sessionEvictionCassandraContainer; + /** + * Overridden to enforce a fixed port binding for the Cassandra container. + * + *

Standard Testcontainers use random port mapping. However, this test manually stops and + * restarts the container to simulate failure. In some environments, a restarted container might + * not retain its original random port mapping, or the port forwarding may break. + * + *

By using a fixed port binding (finding an available local port and mapping it explicitly), + * we ensure the database is always accessible on the same port after a restart, allowing the + * Java driver to successfully reconnect. + */ @Override protected GenericContainer baseCassandraContainer(boolean reuse) { GenericContainer container = super.baseCassandraContainer(reuse); - try (java.net.ServerSocket socket = new java.net.ServerSocket(0)) { + try (ServerSocket socket = new ServerSocket(0)) { int port = socket.getLocalPort(); container.setPortBindings(Collections.singletonList(port + ":9042")); } catch (java.io.IOException e) { @@ -105,7 +117,6 @@ protected int getCassandraCqlPort() { @Test public void testSessionEvictionOnAllNodesFailed() { - // 1. Insert and find initial data to ensure the database is healthy before the test insertDoc( """ @@ -130,10 +141,9 @@ public void testSessionEvictionOnAllNodesFailed() { SessionEvictionTestResource.getSessionEvictionCassandraContainer(); DockerClient dockerClient = dbContainer.getDockerClient(); String containerId = dbContainer.getContainerId(); - - Log.error("Stopping Database Container to simulate failure..."); - Log.error( - "Container ID: " + containerId + ", Port Before Stop: " + dbContainer.getMappedPort(9042)); + // Use low-level dockerClient to stop the container without triggering Testcontainers' + // cleanup/termination logic (which dbContainer.stop() would do). + // This effectively "pulls the plug" while keeping the container instance intact for restart. dockerClient.stopContainerCmd(containerId).exec(); try { @@ -192,11 +202,14 @@ public void testSessionEvictionOnAllNodesFailed() { """) .body("$", responseIsFindSuccess()) .body("data.document._id", is("after_crash")); - - Log.error("Test Passed: Session recovered after DB restart."); } - /** Polls the database until it becomes responsive again. */ + /** + * Polls the Data API until it returns a successful response, indicating the database has + * recovered. + * + * @throws RuntimeException if the system does not recover within the timeout period. + */ private void waitForDbRecovery() { Log.error("Waiting for DB to recover..."); GenericContainer dbContainer = From ee555b21dde2d0553a74d0c6a43d7082831aec91 Mon Sep 17 00:00:00 2001 From: Hazel Date: Thu, 15 Jan 2026 18:19:26 -0800 Subject: [PATCH 30/35] Makes the core function clean --- .../v1/SessionEvictionIntegrationTest.java | 77 +++++++------------ 1 file changed, 29 insertions(+), 48 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java index a2d501e389..41d3f91836 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java @@ -5,7 +5,6 @@ import static org.hamcrest.Matchers.*; import com.github.dockerjava.api.DockerClient; -import io.quarkus.logging.Log; import io.quarkus.test.common.QuarkusTestResource; import io.quarkus.test.junit.QuarkusIntegrationTest; import io.restassured.http.ContentType; @@ -14,6 +13,8 @@ import java.util.Collections; import java.util.Map; import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.testcontainers.containers.GenericContainer; @QuarkusIntegrationTest @@ -21,6 +22,8 @@ value = SessionEvictionIntegrationTest.SessionEvictionTestResource.class, restrictToAnnotatedClass = true) public class SessionEvictionIntegrationTest extends AbstractCollectionIntegrationTestBase { + private static final Logger logger = + LoggerFactory.getLogger(SessionEvictionIntegrationTest.class); /** * A specialized TestResource that spins up a new HCD/DSE container exclusively for this test @@ -146,45 +149,23 @@ public void testSessionEvictionOnAllNodesFailed() { // This effectively "pulls the plug" while keeping the container instance intact for restart. dockerClient.stopContainerCmd(containerId).exec(); - try { - // 3. Verify failure: The application should receive a 500 error/AllNodesFailedException - givenHeadersPostJsonThen( - """ + // 3. Verify failure: The request should receive a 500 error/AllNodesFailedException + givenHeadersPostJsonThen( + """ { "findOne": {} } """) - .statusCode(500) - .body("errors[0].message", containsString("No node was available")); + .statusCode(500) + .body("errors[0].message", containsString("No node was available")); - } finally { - // 4. Restart the container to simulate recovery - Log.error("Restarting Database Container to simulate recovery..."); - dockerClient.startContainerCmd(containerId).exec(); - Log.error( - "Container ID: " - + containerId - + ", Port After Start: " - + dbContainer.getMappedPort(9042)); - - // check status - var inspectAfter = dockerClient.inspectContainerCmd(containerId).exec(); - var state = inspectAfter.getState(); - Log.error( - "Container Status After Start: " - + dockerClient - .inspectContainerCmd(containerId) - .exec() - .getState() - .getStatus()); // should be "running" - Log.error("Container Running: " + state.getRunning()); - } + // 4. Restart the container to simulate recovery + dockerClient.startContainerCmd(containerId).exec(); // 5. Wait for the database to become responsive again waitForDbRecovery(); - // 6. Verify Session Recovery: The application should have evicted the bad session - // and created a new one automatically. + // 6. Verify Session Recovery: insert and find data again, the request should succeed insertDoc( """ { @@ -211,7 +192,7 @@ public void testSessionEvictionOnAllNodesFailed() { * @throws RuntimeException if the system does not recover within the timeout period. */ private void waitForDbRecovery() { - Log.error("Waiting for DB to recover..."); + logger.info("Waiting for DB to recover..."); GenericContainer dbContainer = SessionEvictionTestResource.getSessionEvictionCassandraContainer(); DockerClient dockerClient = dbContainer.getDockerClient(); @@ -226,7 +207,7 @@ private void waitForDbRecovery() { boolean isRunning = Boolean.TRUE.equals(state.getRunning()); var ports = inspect.getNetworkSettings().getPorts().getBindings(); int mappedPort = dbContainer.getMappedPort(9042); - Log.error( + logger.info( "Polling - Container Status: " + state.getStatus() + " (Running: " @@ -239,17 +220,17 @@ private void waitForDbRecovery() { // 2. TCP Socket Probe try (java.net.Socket socket = new java.net.Socket()) { socket.connect(new java.net.InetSocketAddress("localhost", mappedPort), 2000); - Log.error("Polling - Socket Probe: SUCCESS (TCP Handshake OK)"); + logger.info("Polling - Socket Probe: SUCCESS (TCP Handshake OK)"); } catch (Exception e) { - Log.error("Polling - Socket Probe: FAILED - " + e.getMessage()); + logger.info("Polling - Socket Probe: FAILED - " + e.getMessage()); } if (!isRunning) { - Log.error("CRITICAL: Container is NOT running. Performing post-mortem..."); - Log.error(" Exit Code: " + state.getExitCode()); // 137 = OOM Killed - Log.error(" OOMKilled: " + state.getOOMKilled()); + logger.info("CRITICAL: Container is NOT running. Performing post-mortem..."); + logger.info(" Exit Code: " + state.getExitCode()); // 137 = OOM Killed + logger.info(" OOMKilled: " + state.getOOMKilled()); try { - Log.error("--- CONTAINER LOGS (LAST 50 LINES) ---"); + logger.info("--- CONTAINER LOGS (LAST 50 LINES) ---"); dockerClient .logContainerCmd(containerId) .withStdOut(true) @@ -260,19 +241,19 @@ private void waitForDbRecovery() { com.github.dockerjava.api.model.Frame>() { @Override public void onNext(com.github.dockerjava.api.model.Frame frame) { - Log.error(new String(frame.getPayload())); + logger.info(new String(frame.getPayload())); } }) .awaitCompletion(); - Log.error("--- END CONTAINER LOGS ---"); + logger.info("--- END CONTAINER LOGS ---"); } catch (Exception logEx) { - Log.error("Failed to fetch container logs: " + logEx.getMessage()); + logger.info("Failed to fetch container logs: " + logEx.getMessage()); } } if (isRunning) { boolean isNodeUp = isCassandraUp(dockerClient, containerId); boolean isNativeTransportActive = isNativeTransportActive(dockerClient, containerId); - Log.error( + logger.info( "Polling - Cassandra Status: NodeUp=" + isNodeUp + ", NativeTransport=" @@ -293,15 +274,15 @@ public void onNext(com.github.dockerjava.api.model.Frame frame) { .post(CollectionResource.BASE_PATH, keyspaceName, collectionName); int statusCode = response.getStatusCode(); if (statusCode == 200) { - Log.error("DB recovered! Received 200 OK."); + logger.info("DB recovered! Received 200 OK."); return; } else { lastError = "Status: " + statusCode + ", Body: " + response.getBody().asString(); - Log.error("DB responded but not ready: " + lastError); + logger.info("DB responded but not ready: " + lastError); } } catch (Exception e) { lastError = "Exception: " + e.getMessage(); - Log.error("DB connection error during polling: " + lastError); + logger.info("DB connection error during polling: " + lastError); } try { Thread.sleep(2000); // Poll every 2s @@ -334,7 +315,7 @@ public void onNext(com.github.dockerjava.api.model.Frame object) {} var inspectExecResponse = dockerClient.inspectExecCmd(execCreateCmdResponse.getId()).exec(); return inspectExecResponse.getExitCodeLong() == 0; } catch (Exception e) { - Log.error("Failed to run nodetool status: " + e.getMessage()); + logger.info("Failed to run nodetool status: " + e.getMessage()); return false; } } @@ -363,7 +344,7 @@ public void onNext(com.github.dockerjava.api.model.Frame frame) { dockerClient.execStartCmd(execCreateCmdResponse.getId()).exec(callback).awaitCompletion(); return output.toString().contains("Native Transport active: true"); } catch (Exception e) { - Log.error("Failed to run nodetool info: " + e.getMessage()); + logger.info("Failed to run nodetool info: " + e.getMessage()); return false; } } From 517ebfb5df99198e9501d83f7e5e0db316e0c0b4 Mon Sep 17 00:00:00 2001 From: Hazel Date: Thu, 15 Jan 2026 19:07:59 -0800 Subject: [PATCH 31/35] Clean up and test --- .../v1/SessionEvictionIntegrationTest.java | 169 ++++++------------ 1 file changed, 52 insertions(+), 117 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java index 41d3f91836..669198607d 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java @@ -5,10 +5,14 @@ import static org.hamcrest.Matchers.*; import com.github.dockerjava.api.DockerClient; +import com.github.dockerjava.api.async.ResultCallback; +import com.github.dockerjava.api.model.Frame; import io.quarkus.test.common.QuarkusTestResource; import io.quarkus.test.junit.QuarkusIntegrationTest; import io.restassured.http.ContentType; +import io.restassured.response.Response; import io.stargate.sgv2.jsonapi.testresource.DseTestResource; +import java.io.IOException; import java.net.ServerSocket; import java.util.Collections; import java.util.Map; @@ -22,6 +26,7 @@ value = SessionEvictionIntegrationTest.SessionEvictionTestResource.class, restrictToAnnotatedClass = true) public class SessionEvictionIntegrationTest extends AbstractCollectionIntegrationTestBase { + private static final Logger logger = LoggerFactory.getLogger(SessionEvictionIntegrationTest.class); @@ -61,7 +66,7 @@ protected GenericContainer baseCassandraContainer(boolean reuse) { try (ServerSocket socket = new ServerSocket(0)) { int port = socket.getLocalPort(); container.setPortBindings(Collections.singletonList(port + ":9042")); - } catch (java.io.IOException e) { + } catch (IOException e) { throw new RuntimeException("Failed to find open port", e); } return container; @@ -186,111 +191,72 @@ public void testSessionEvictionOnAllNodesFailed() { } /** - * Polls the Data API until it returns a successful response, indicating the database has - * recovered. + * Polls the DB container until the container is running, Cassandra is up, and the API request + * returns 200. * - * @throws RuntimeException if the system does not recover within the timeout period. + * @throws RuntimeException if recovery does not occur within the timeout period. */ private void waitForDbRecovery() { - logger.info("Waiting for DB to recover..."); GenericContainer dbContainer = SessionEvictionTestResource.getSessionEvictionCassandraContainer(); DockerClient dockerClient = dbContainer.getDockerClient(); String containerId = dbContainer.getContainerId(); long start = System.currentTimeMillis(); long timeout = 120000; // 120 seconds timeout - String lastError = null; + boolean isContainerRunning = false; + boolean isCassandraUp = false; + Response response = null; + + logger.info("Waiting for database recovery..."); + while (System.currentTimeMillis() - start < timeout) { try { - var inspect = dockerClient.inspectContainerCmd(containerId).exec(); - var state = inspect.getState(); - boolean isRunning = Boolean.TRUE.equals(state.getRunning()); - var ports = inspect.getNetworkSettings().getPorts().getBindings(); - int mappedPort = dbContainer.getMappedPort(9042); - logger.info( - "Polling - Container Status: " - + state.getStatus() - + " (Running: " - + isRunning - + "), Ports: " - + ports - + ", CurrentMappedPort: " - + mappedPort); + // 1. Check if the container is running + isContainerRunning = + Boolean.TRUE.equals( + dockerClient.inspectContainerCmd(containerId).exec().getState().getRunning()); - // 2. TCP Socket Probe - try (java.net.Socket socket = new java.net.Socket()) { - socket.connect(new java.net.InetSocketAddress("localhost", mappedPort), 2000); - logger.info("Polling - Socket Probe: SUCCESS (TCP Handshake OK)"); - } catch (Exception e) { - logger.info("Polling - Socket Probe: FAILED - " + e.getMessage()); - } + // 2. Check if Cassandra is up + isCassandraUp = isCassandraUp(dockerClient, containerId); - if (!isRunning) { - logger.info("CRITICAL: Container is NOT running. Performing post-mortem..."); - logger.info(" Exit Code: " + state.getExitCode()); // 137 = OOM Killed - logger.info(" OOMKilled: " + state.getOOMKilled()); - try { - logger.info("--- CONTAINER LOGS (LAST 50 LINES) ---"); - dockerClient - .logContainerCmd(containerId) - .withStdOut(true) - .withStdErr(true) - .withTail(50) - .exec( - new com.github.dockerjava.api.async.ResultCallback.Adapter< - com.github.dockerjava.api.model.Frame>() { - @Override - public void onNext(com.github.dockerjava.api.model.Frame frame) { - logger.info(new String(frame.getPayload())); - } - }) - .awaitCompletion(); - logger.info("--- END CONTAINER LOGS ---"); - } catch (Exception logEx) { - logger.info("Failed to fetch container logs: " + logEx.getMessage()); + // 3. Verify Data API connectivity via simple findOne command + if (isContainerRunning && isCassandraUp) { + response = + given() + .headers(getHeaders()) + .contentType(ContentType.JSON) + .body("{\"findOne\": {}}") + .post(CollectionResource.BASE_PATH, keyspaceName, collectionName); + + if (response.getStatusCode() == 200) { + logger.info( + "Database and API have recovered after " + + (System.currentTimeMillis() - start) + + "ms."); + return; } } - if (isRunning) { - boolean isNodeUp = isCassandraUp(dockerClient, containerId); - boolean isNativeTransportActive = isNativeTransportActive(dockerClient, containerId); - logger.info( - "Polling - Cassandra Status: NodeUp=" - + isNodeUp - + ", NativeTransport=" - + isNativeTransportActive); - } - String json = - """ - { - "findOne": {} - } - """; - var response = - given() - .headers(getHeaders()) - .contentType(ContentType.JSON) - .body(json) - .when() - .post(CollectionResource.BASE_PATH, keyspaceName, collectionName); - int statusCode = response.getStatusCode(); - if (statusCode == 200) { - logger.info("DB recovered! Received 200 OK."); - return; - } else { - lastError = "Status: " + statusCode + ", Body: " + response.getBody().asString(); - logger.info("DB responded but not ready: " + lastError); - } } catch (Exception e) { - lastError = "Exception: " + e.getMessage(); - logger.info("DB connection error during polling: " + lastError); + logger.info("Error checking DB status: " + e.getMessage()); } + + // Poll every 1s try { - Thread.sleep(2000); // Poll every 2s + Thread.sleep(1000); } catch (InterruptedException ignored) { + Thread.currentThread().interrupt(); + break; } } throw new RuntimeException( - "DB failed to recover within " + timeout + "ms. Last error: " + lastError); + "DB failed to recover within " + + timeout + + "ms. Container status: " + + isContainerRunning + + ", Cassandra status: " + + isCassandraUp + + ", API response: " + + response); } /** Checks if Cassandra is up and normal by running "nodetool status" inside the container. */ @@ -305,46 +271,15 @@ private boolean isCassandraUp(DockerClient dockerClient, String containerId) { .exec(); var callback = - new com.github.dockerjava.api.async.ResultCallback.Adapter< - com.github.dockerjava.api.model.Frame>() { + new ResultCallback.Adapter() { @Override - public void onNext(com.github.dockerjava.api.model.Frame object) {} + public void onNext(Frame object) {} }; dockerClient.execStartCmd(execCreateCmdResponse.getId()).exec(callback).awaitCompletion(); var inspectExecResponse = dockerClient.inspectExecCmd(execCreateCmdResponse.getId()).exec(); return inspectExecResponse.getExitCodeLong() == 0; } catch (Exception e) { - logger.info("Failed to run nodetool status: " + e.getMessage()); - return false; - } - } - - /** Checks if Native Transport is active by running "nodetool info" inside the container. */ - private boolean isNativeTransportActive(DockerClient dockerClient, String containerId) { - try { - var execCreateCmdResponse = - dockerClient - .execCreateCmd(containerId) - .withAttachStdout(true) - .withAttachStderr(true) - .withCmd("nodetool", "info") - .exec(); - - StringBuilder output = new StringBuilder(); - var callback = - new com.github.dockerjava.api.async.ResultCallback.Adapter< - com.github.dockerjava.api.model.Frame>() { - @Override - public void onNext(com.github.dockerjava.api.model.Frame frame) { - output.append(new String(frame.getPayload())); - } - }; - - dockerClient.execStartCmd(execCreateCmdResponse.getId()).exec(callback).awaitCompletion(); - return output.toString().contains("Native Transport active: true"); - } catch (Exception e) { - logger.info("Failed to run nodetool info: " + e.getMessage()); return false; } } From 8e34626d97e20c1c6b42a11161058c3c482ab928 Mon Sep 17 00:00:00 2001 From: Hazel Date: Thu, 15 Jan 2026 20:29:05 -0800 Subject: [PATCH 32/35] Clean up again --- .../v1/SessionEvictionIntegrationTest.java | 72 +++++++++++-------- 1 file changed, 43 insertions(+), 29 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java index 669198607d..3ecb93adc2 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java @@ -123,6 +123,14 @@ protected int getCassandraCqlPort() { return container.getMappedPort(9042); } + private DockerClient getDockerClient() { + return SessionEvictionTestResource.getSessionEvictionCassandraContainer().getDockerClient(); + } + + private String getContainerId() { + return SessionEvictionTestResource.getSessionEvictionCassandraContainer().getContainerId(); + } + @Test public void testSessionEvictionOnAllNodesFailed() { // 1. Insert and find initial data to ensure the database is healthy before the test @@ -145,14 +153,10 @@ public void testSessionEvictionOnAllNodesFailed() { .body("data.document._id", is("before_crash")); // 2. Stop the container to simulate DB failure - GenericContainer dbContainer = - SessionEvictionTestResource.getSessionEvictionCassandraContainer(); - DockerClient dockerClient = dbContainer.getDockerClient(); - String containerId = dbContainer.getContainerId(); // Use low-level dockerClient to stop the container without triggering Testcontainers' // cleanup/termination logic (which dbContainer.stop() would do). // This effectively "pulls the plug" while keeping the container instance intact for restart. - dockerClient.stopContainerCmd(containerId).exec(); + getDockerClient().stopContainerCmd(getContainerId()).exec(); // 3. Verify failure: The request should receive a 500 error/AllNodesFailedException givenHeadersPostJsonThen( @@ -165,7 +169,7 @@ public void testSessionEvictionOnAllNodesFailed() { .body("errors[0].message", containsString("No node was available")); // 4. Restart the container to simulate recovery - dockerClient.startContainerCmd(containerId).exec(); + getDockerClient().startContainerCmd(getContainerId()).exec(); // 5. Wait for the database to become responsive again waitForDbRecovery(); @@ -197,37 +201,29 @@ public void testSessionEvictionOnAllNodesFailed() { * @throws RuntimeException if recovery does not occur within the timeout period. */ private void waitForDbRecovery() { - GenericContainer dbContainer = - SessionEvictionTestResource.getSessionEvictionCassandraContainer(); - DockerClient dockerClient = dbContainer.getDockerClient(); - String containerId = dbContainer.getContainerId(); long start = System.currentTimeMillis(); long timeout = 120000; // 120 seconds timeout boolean isContainerRunning = false; boolean isCassandraUp = false; Response response = null; - logger.info("Waiting for database recovery..."); + logger.info( + "Waiting for DB recovery. Container status: " + + isContainerRunning + + ", Cassandra status: " + + isCassandraUp); while (System.currentTimeMillis() - start < timeout) { try { - // 1. Check if the container is running - isContainerRunning = - Boolean.TRUE.equals( - dockerClient.inspectContainerCmd(containerId).exec().getState().getRunning()); - - // 2. Check if Cassandra is up - isCassandraUp = isCassandraUp(dockerClient, containerId); - - // 3. Verify Data API connectivity via simple findOne command - if (isContainerRunning && isCassandraUp) { - response = - given() - .headers(getHeaders()) - .contentType(ContentType.JSON) - .body("{\"findOne\": {}}") - .post(CollectionResource.BASE_PATH, keyspaceName, collectionName); + // 1. Check container + isContainerRunning = isContainerRunning(); + // 2. Check Cassandra (only after the container is running) + isCassandraUp = isContainerRunning && isCassandraUp(getDockerClient(), getContainerId()); + + // 3. Check API (only after Cassandra is up) + if (isCassandraUp) { + response = getAPIResponse(); if (response.getStatusCode() == 200) { logger.info( "Database and API have recovered after " @@ -255,8 +251,26 @@ private void waitForDbRecovery() { + isContainerRunning + ", Cassandra status: " + isCassandraUp - + ", API response: " - + response); + + ", API response body: " + + (response != null ? response.asString() : "null")); + } + + /** Checks if the Cassandra container is currently in the "running" state. */ + private boolean isContainerRunning() { + return Boolean.TRUE.equals( + getDockerClient().inspectContainerCmd(getContainerId()).exec().getState().getRunning()); + } + + /** + * Get a simple findOne response from the Data API. We will verify the response after the function + * call + */ + private Response getAPIResponse() { + return given() + .headers(getHeaders()) + .contentType(ContentType.JSON) + .body("{\"findOne\": {}}") + .post(CollectionResource.BASE_PATH, keyspaceName, collectionName); } /** Checks if Cassandra is up and normal by running "nodetool status" inside the container. */ From 0f5a6ddb077be89a81e1139c09a97a0a0f4f6593 Mon Sep 17 00:00:00 2001 From: Hazel Date: Fri, 16 Jan 2026 12:51:44 -0800 Subject: [PATCH 33/35] tweak java doc --- .../api/v1/AbstractKeyspaceIntegrationTestBase.java | 4 ++-- .../api/v1/SessionEvictionIntegrationTest.java | 11 +++++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/AbstractKeyspaceIntegrationTestBase.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/AbstractKeyspaceIntegrationTestBase.java index 43f3380698..c9124336f5 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/AbstractKeyspaceIntegrationTestBase.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/AbstractKeyspaceIntegrationTestBase.java @@ -370,8 +370,8 @@ private synchronized CqlSession createDriverSession() { } /** - * Gets the Cassandra CQL port. Subclasses can override this if the port is not available via - * standard system properties. + * Gets the Cassandra CQL port. Subclasses can override this if their tests need isolated + * container and port. */ protected int getCassandraCqlPort() { return Integer.getInteger(IntegrationTestUtils.CASSANDRA_CQL_PORT_PROP); diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java index 3ecb93adc2..9b6152df07 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java @@ -53,8 +53,9 @@ public static class SessionEvictionTestResource extends DseTestResource { * Overridden to enforce a fixed port binding for the Cassandra container. * *

Standard Testcontainers use random port mapping. However, this test manually stops and - * restarts the container to simulate failure. In some environments, a restarted container might - * not retain its original random port mapping, or the port forwarding may break. + * restarts the container to simulate failure. Under normal circumstances, a restarted container + * will not retain its original random port mapping, causing the initial port forwarding to + * break. * *

By using a fixed port binding (finding an available local port and mapping it explicitly), * we ensure the database is always accessible on the same port after a restart, allowing the @@ -123,10 +124,16 @@ protected int getCassandraCqlPort() { return container.getMappedPort(9042); } + /** + * @return The DockerClient for the isolated Cassandra container. + */ private DockerClient getDockerClient() { return SessionEvictionTestResource.getSessionEvictionCassandraContainer().getDockerClient(); } + /** + * @return The container ID of the isolated Cassandra container. + */ private String getContainerId() { return SessionEvictionTestResource.getSessionEvictionCassandraContainer().getContainerId(); } From 8999c935515ff1bfd6da8c9466566838a1d67dcf Mon Sep 17 00:00:00 2001 From: Hazel Date: Fri, 16 Jan 2026 12:59:21 -0800 Subject: [PATCH 34/35] change log --- .../sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java index 9b6152df07..8ddcc75dce 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java @@ -214,7 +214,7 @@ private void waitForDbRecovery() { boolean isCassandraUp = false; Response response = null; - logger.info( + logger.warn( "Waiting for DB recovery. Container status: " + isContainerRunning + ", Cassandra status: " @@ -232,7 +232,7 @@ private void waitForDbRecovery() { if (isCassandraUp) { response = getAPIResponse(); if (response.getStatusCode() == 200) { - logger.info( + logger.warn( "Database and API have recovered after " + (System.currentTimeMillis() - start) + "ms."); @@ -240,7 +240,7 @@ private void waitForDbRecovery() { } } } catch (Exception e) { - logger.info("Error checking DB status: " + e.getMessage()); + logger.warn("Error checking DB status: " + e.getMessage()); } // Poll every 1s From b701aad9db06069678292e3d931ac704a5a7fa4d Mon Sep 17 00:00:00 2001 From: Hazel Date: Fri, 16 Jan 2026 13:56:43 -0800 Subject: [PATCH 35/35] remove logs --- .../sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java index 8ddcc75dce..e6de62047e 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/SessionEvictionIntegrationTest.java @@ -214,12 +214,6 @@ private void waitForDbRecovery() { boolean isCassandraUp = false; Response response = null; - logger.warn( - "Waiting for DB recovery. Container status: " - + isContainerRunning - + ", Cassandra status: " - + isCassandraUp); - while (System.currentTimeMillis() - start < timeout) { try { // 1. Check container