diff --git a/src/main/java/com/databricks/jdbc/api/impl/DatabricksStatement.java b/src/main/java/com/databricks/jdbc/api/impl/DatabricksStatement.java index c9b6733e7..ebd142ef2 100644 --- a/src/main/java/com/databricks/jdbc/api/impl/DatabricksStatement.java +++ b/src/main/java/com/databricks/jdbc/api/impl/DatabricksStatement.java @@ -770,6 +770,15 @@ static boolean shouldReturnResultSet(String query, List nonRowcountQuery return true; } + // DML statements (INSERT / UPDATE / DELETE / MERGE) always return an update count per the + // JDBC spec, even when their subqueries or CTEs contain UNION / INTERSECT / EXCEPT. Without + // this short-circuit the non-anchored UNION_PATTERN et al. below match anywhere in the SQL + // and mis-classify DML — including the column-exclusion form `SELECT * EXCEPT (col)`. See + // https://github.com/databricks/databricks-jdbc/issues/1418. + if (DML_PREFIX_PATTERN.matcher(trimmedQuery).find()) { + return false; + } + // Check if the query matches any of the patterns that return a ResultSet return SELECT_PATTERN.matcher(trimmedQuery).find() || SHOW_PATTERN.matcher(trimmedQuery).find() diff --git a/src/main/java/com/databricks/jdbc/common/DatabricksJdbcConstants.java b/src/main/java/com/databricks/jdbc/common/DatabricksJdbcConstants.java index 7849411f5..568277c42 100644 --- a/src/main/java/com/databricks/jdbc/common/DatabricksJdbcConstants.java +++ b/src/main/java/com/databricks/jdbc/common/DatabricksJdbcConstants.java @@ -177,6 +177,14 @@ public enum FakeServiceType { public static final Pattern CALL_PATTERN = Pattern.compile("^(\\s*\\()*\\s*CALL", Pattern.CASE_INSENSITIVE); + /** + * Matches statements whose leading keyword is a DML (INSERT / UPDATE / DELETE / MERGE). Unlike + * {@link #INSERT_PATTERN} this also accepts {@code INSERT OVERWRITE ...} (not just {@code INSERT + * INTO ...}) and does not share the batching parser's expectations, so it is safe to broaden. + */ + public static final Pattern DML_PREFIX_PATTERN = + Pattern.compile("^(\\s*\\()*\\s*(INSERT|UPDATE|DELETE|MERGE)\\s+", Pattern.CASE_INSENSITIVE); + /** Maximum number of parameters allowed in a single Databricks query */ public static final int MAX_QUERY_PARAMETERS = 256; diff --git a/src/test/java/com/databricks/jdbc/api/impl/DatabricksStatementTest.java b/src/test/java/com/databricks/jdbc/api/impl/DatabricksStatementTest.java index ff5d9e7ec..3b9d416f2 100644 --- a/src/test/java/com/databricks/jdbc/api/impl/DatabricksStatementTest.java +++ b/src/test/java/com/databricks/jdbc/api/impl/DatabricksStatementTest.java @@ -581,6 +581,84 @@ public void testShouldReturnResultSet_ExceptQuery() { assertTrue(DatabricksStatement.shouldReturnResultSet(query, Collections.emptyList())); } + // https://github.com/databricks/databricks-jdbc/issues/1418 — DML statements whose subqueries + // or CTEs contain UNION / INTERSECT / EXCEPT were being mis-classified as ResultSet-producing. + @Test + public void testShouldReturnResultSet_InsertWithUnionInSubquery() { + String query = + "INSERT INTO my_catalog.my_schema.target_table " + + "SELECT * FROM ( " + + " SELECT col1, col2 FROM src WHERE 1 = 0 " + + " UNION ALL " + + " SELECT col1, col2 FROM src WHERE 1 = 0 " + + ") t"; + assertFalse(DatabricksStatement.shouldReturnResultSet(query, Collections.emptyList())); + } + + @Test + public void testShouldReturnResultSet_InsertWithIntersectInSubquery() { + String query = "INSERT INTO t SELECT x FROM (SELECT x FROM a INTERSECT SELECT x FROM b) s"; + assertFalse(DatabricksStatement.shouldReturnResultSet(query, Collections.emptyList())); + } + + @Test + public void testShouldReturnResultSet_InsertWithExceptInSubquery() { + String query = "INSERT INTO t SELECT x FROM (SELECT x FROM a EXCEPT SELECT x FROM b) s"; + assertFalse(DatabricksStatement.shouldReturnResultSet(query, Collections.emptyList())); + } + + @Test + public void testShouldReturnResultSet_InsertWithSelectStarExceptColumnExclusion() { + // `SELECT * EXCEPT (col)` is Databricks column-exclusion syntax, not a set operator. + String query = "INSERT INTO t SELECT * EXCEPT (secret_col) FROM source"; + assertFalse(DatabricksStatement.shouldReturnResultSet(query, Collections.emptyList())); + } + + @Test + public void testShouldReturnResultSet_InsertOverwriteDirectoryWithIntersect() { + String query = + "INSERT OVERWRITE DIRECTORY 's3://bucket/path' USING CSV " + + "SELECT x FROM a INTERSECT SELECT x FROM b"; + assertFalse(DatabricksStatement.shouldReturnResultSet(query, Collections.emptyList())); + } + + @Test + public void testShouldReturnResultSet_UpdateWithUnionInSubquery() { + String query = "UPDATE t SET col = (SELECT x FROM a UNION SELECT x FROM b LIMIT 1)"; + assertFalse(DatabricksStatement.shouldReturnResultSet(query, Collections.emptyList())); + } + + @Test + public void testShouldReturnResultSet_DeleteWithExceptInSubquery() { + String query = "DELETE FROM t WHERE id IN (SELECT id FROM a EXCEPT SELECT id FROM b)"; + assertFalse(DatabricksStatement.shouldReturnResultSet(query, Collections.emptyList())); + } + + @Test + public void testShouldReturnResultSet_MergeWithUnionInSource() { + String query = + "MERGE INTO target t USING (SELECT id FROM a UNION SELECT id FROM b) s " + + "ON t.id = s.id WHEN MATCHED THEN DELETE"; + assertFalse(DatabricksStatement.shouldReturnResultSet(query, Collections.emptyList())); + } + + @Test + public void testShouldReturnResultSet_DmlPrefixOverriddenByNonRowcountConfig() { + // The NonRowcountQueryPrefixes opt-in path must still win over the DML short-circuit. + String query = "INSERT INTO t VALUES (1)"; + assertTrue( + DatabricksStatement.shouldReturnResultSet(query, Arrays.asList("INSERT")), + "NonRowcountQueryPrefixes=INSERT should force ResultSet mode"); + } + + @Test + public void testShouldReturnResultSet_TopLevelParenthesizedUnionStillMatches() { + // Regression guard: top-level set operations starting with `(` still classify as ResultSet + // via SELECT_PATTERN's `^(\s*\()*\s*SELECT` prefix. + String query = "(SELECT a FROM t1) UNION (SELECT a FROM t2)"; + assertTrue(DatabricksStatement.shouldReturnResultSet(query, Collections.emptyList())); + } + @Test public void testShouldReturnResultSet_DeclareQuery() { String query = "DECLARE @var INT;";