Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -770,6 +770,15 @@ static boolean shouldReturnResultSet(String query, List<String> nonRowcountQuery
return true;
}

// DML statements (INSERT / UPDATE / DELETE / MERGE) always return an update count per the
// JDBC spec, even when their subqueries or CTEs contain UNION / INTERSECT / EXCEPT. Without
// this short-circuit the non-anchored UNION_PATTERN et al. below match anywhere in the SQL
// and mis-classify DML — including the column-exclusion form `SELECT * EXCEPT (col)`. See
// https://github.com/databricks/databricks-jdbc/issues/1418.
if (DML_PREFIX_PATTERN.matcher(trimmedQuery).find()) {
return false;
}

// Check if the query matches any of the patterns that return a ResultSet
return SELECT_PATTERN.matcher(trimmedQuery).find()
|| SHOW_PATTERN.matcher(trimmedQuery).find()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,14 @@ public enum FakeServiceType {
public static final Pattern CALL_PATTERN =
Pattern.compile("^(\\s*\\()*\\s*CALL", Pattern.CASE_INSENSITIVE);

/**
* Matches statements whose leading keyword is a DML (INSERT / UPDATE / DELETE / MERGE). Unlike
* {@link #INSERT_PATTERN} this also accepts {@code INSERT OVERWRITE ...} (not just {@code INSERT
* INTO ...}) and does not share the batching parser's expectations, so it is safe to broaden.
*/
public static final Pattern DML_PREFIX_PATTERN =
Pattern.compile("^(\\s*\\()*\\s*(INSERT|UPDATE|DELETE|MERGE)\\s+", Pattern.CASE_INSENSITIVE);

/** Maximum number of parameters allowed in a single Databricks query */
public static final int MAX_QUERY_PARAMETERS = 256;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,84 @@ public void testShouldReturnResultSet_ExceptQuery() {
assertTrue(DatabricksStatement.shouldReturnResultSet(query, Collections.emptyList()));
}

// https://github.com/databricks/databricks-jdbc/issues/1418 — DML statements whose subqueries
// or CTEs contain UNION / INTERSECT / EXCEPT were being mis-classified as ResultSet-producing.
@Test
public void testShouldReturnResultSet_InsertWithUnionInSubquery() {
String query =
"INSERT INTO my_catalog.my_schema.target_table "
+ "SELECT * FROM ( "
+ " SELECT col1, col2 FROM src WHERE 1 = 0 "
+ " UNION ALL "
+ " SELECT col1, col2 FROM src WHERE 1 = 0 "
+ ") t";
assertFalse(DatabricksStatement.shouldReturnResultSet(query, Collections.emptyList()));
}

@Test
public void testShouldReturnResultSet_InsertWithIntersectInSubquery() {
String query = "INSERT INTO t SELECT x FROM (SELECT x FROM a INTERSECT SELECT x FROM b) s";
assertFalse(DatabricksStatement.shouldReturnResultSet(query, Collections.emptyList()));
}

@Test
public void testShouldReturnResultSet_InsertWithExceptInSubquery() {
String query = "INSERT INTO t SELECT x FROM (SELECT x FROM a EXCEPT SELECT x FROM b) s";
assertFalse(DatabricksStatement.shouldReturnResultSet(query, Collections.emptyList()));
}

@Test
public void testShouldReturnResultSet_InsertWithSelectStarExceptColumnExclusion() {
// `SELECT * EXCEPT (col)` is Databricks column-exclusion syntax, not a set operator.
String query = "INSERT INTO t SELECT * EXCEPT (secret_col) FROM source";
assertFalse(DatabricksStatement.shouldReturnResultSet(query, Collections.emptyList()));
}

@Test
public void testShouldReturnResultSet_InsertOverwriteDirectoryWithIntersect() {
String query =
"INSERT OVERWRITE DIRECTORY 's3://bucket/path' USING CSV "
+ "SELECT x FROM a INTERSECT SELECT x FROM b";
assertFalse(DatabricksStatement.shouldReturnResultSet(query, Collections.emptyList()));
}

@Test
public void testShouldReturnResultSet_UpdateWithUnionInSubquery() {
String query = "UPDATE t SET col = (SELECT x FROM a UNION SELECT x FROM b LIMIT 1)";
assertFalse(DatabricksStatement.shouldReturnResultSet(query, Collections.emptyList()));
}

@Test
public void testShouldReturnResultSet_DeleteWithExceptInSubquery() {
String query = "DELETE FROM t WHERE id IN (SELECT id FROM a EXCEPT SELECT id FROM b)";
assertFalse(DatabricksStatement.shouldReturnResultSet(query, Collections.emptyList()));
}

@Test
public void testShouldReturnResultSet_MergeWithUnionInSource() {
String query =
"MERGE INTO target t USING (SELECT id FROM a UNION SELECT id FROM b) s "
+ "ON t.id = s.id WHEN MATCHED THEN DELETE";
assertFalse(DatabricksStatement.shouldReturnResultSet(query, Collections.emptyList()));
}

@Test
public void testShouldReturnResultSet_DmlPrefixOverriddenByNonRowcountConfig() {
// The NonRowcountQueryPrefixes opt-in path must still win over the DML short-circuit.
String query = "INSERT INTO t VALUES (1)";
assertTrue(
DatabricksStatement.shouldReturnResultSet(query, Arrays.asList("INSERT")),
"NonRowcountQueryPrefixes=INSERT should force ResultSet mode");
}

@Test
public void testShouldReturnResultSet_TopLevelParenthesizedUnionStillMatches() {
// Regression guard: top-level set operations starting with `(` still classify as ResultSet
// via SELECT_PATTERN's `^(\s*\()*\s*SELECT` prefix.
String query = "(SELECT a FROM t1) UNION (SELECT a FROM t2)";
assertTrue(DatabricksStatement.shouldReturnResultSet(query, Collections.emptyList()));
}

@Test
public void testShouldReturnResultSet_DeclareQuery() {
String query = "DECLARE @var INT;";
Expand Down
Loading