diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index a5c655497..edb74d15f 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -19,29 +19,9 @@ - Arrow schema deserialization failures (Thrift metadata path) now surface a dedicated driver error code `ARROW_SCHEMA_PARSING_ERROR` (vendor code `22000`) and a proper SQLSTATE `22000` (Data Exception) on the thrown `SQLException`, instead of the generic `RESULT_SET_ERROR` (1004) and the enum name as SQLSTATE. The exception message is unchanged. ### Fixed +- Fixed `?` characters inside SQL comments, string literals, and quoted identifiers being incorrectly counted as parameter placeholders when `supportManyParameters=1`. `SQLInterpolator` now uses `SqlCommentParser` to locate only real placeholders. Fixes #1331. - Fixed `MetadataOperationTimeout` not being applied when metadata operations use SHOW commands. Operations like `getTables`, `getSchemas`, and `getColumns` now respect the `MetadataOperationTimeout` connection property instead of hanging indefinitely with no timeout. - -- Reclassify transient/mis-categorized server errors so callers can identify - retryable failures. The remap is applied at all Thrift error sites - (`checkResponseForErrors`, `executeAsync`, `verifySuccessStatus`, and the - polling status handler) so the same server failure surfaces with the same - SQL state regardless of which response carries it. - - Unity Catalog unavailability (`[UC_CLIENT_EXCEPTION]`, previously `XXUCC`) - and parquet read / connection-acquisition deadlines - (`[PARQUET_FAILED_READ_FOOTER]`, `DEADLINE_EXCEEDED: acquiring connection`) - are now reported with SQL state `08S01` (communication link failure). - - Server-side `java.util.ConcurrentModificationException` is now reported - with SQL state `40001` (serialization failure) instead of the misleading - `42000`. The remap only applies when the original SQL state is `42000` so - unrelated `42xxx` states (e.g. `42501` insufficient privilege) are - preserved. - Notes for callers and operators: - - Callers branching on the legacy `XXUCC`/`42000` states for these failures - must update to `08S01`/`40001`. The driver logs the original→remapped - state at `INFO` level for traceability. - - The driver's failure telemetry uses `sqlState` as the error-name field, - so dashboards/alerts keyed on `XXUCC` or `42000` for these specific - failure modes will need to be updated to the new states. +- Reclassify transient server errors to standard SQL states (08S01, 40001) across all Thrift error sites. This ensures UC unavailability and concurrent modification errors surface consistently for better retry handling. Note: Dashboards and branching logic keyed on legacy XXUCC or 42000 must be updated. --- *Note: When making changes, please add your change under the appropriate section diff --git a/src/main/java/com/databricks/jdbc/common/util/SQLInterpolator.java b/src/main/java/com/databricks/jdbc/common/util/SQLInterpolator.java index 15259cbf6..1928dfe26 100644 --- a/src/main/java/com/databricks/jdbc/common/util/SQLInterpolator.java +++ b/src/main/java/com/databricks/jdbc/common/util/SQLInterpolator.java @@ -6,8 +6,6 @@ import com.databricks.jdbc.exception.DatabricksValidationException; import com.databricks.jdbc.model.core.ColumnInfoTypeName; import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; public class SQLInterpolator { protected static String escapeInputs(String input) { @@ -65,22 +63,12 @@ private static String formatObject(ImmutableSqlParameter object) { } } - private static int countPlaceholders(String sql) { - int count = 0; - for (char c : sql.toCharArray()) { - if (c == '?') { - count++; - } - } - return count; - } - /** * Interpolates the given SQL string by replacing placeholders with the provided parameters. * - *

This method splits the SQL string by placeholders (question marks) and replaces each - * placeholder with the corresponding parameter from the provided map. The map keys are 1-based - * indexes, aligning with the SQL parameter positions. + *

Only '?' characters that appear outside of comments, string literals, and quoted identifiers + * are treated as placeholders. The map keys are 1-based indexes, aligning with the SQL parameter + * positions. * * @param sql the SQL string containing placeholders ('?') to be replaced. * @param params a map of parameters where the key is the 1-based index of the placeholder in the @@ -91,37 +79,48 @@ private static int countPlaceholders(String sql) { */ public static String interpolateSQL(String sql, Map params) throws DatabricksValidationException { - String[] parts = sql.split("\\?"); - if (countPlaceholders(sql) != params.size()) { + int[] positions = SqlCommentParser.findPlaceholderPositions(sql); + if (positions.length != params.size()) { throw new DatabricksValidationException( "Parameter count does not match. Provide equal number of parameters as placeholders. SQL " + sql); } - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < parts.length; i++) { - sb.append(parts[i]); - if (i < params.size()) { - sb.append(formatObject(params.get(i + 1))); // because we have 1 based index in params - } + StringBuilder sb = new StringBuilder(sql.length() + 16); + int last = 0; + for (int i = 0; i < positions.length; i++) { + int pos = positions[i]; + sb.append(sql, last, pos); + sb.append(formatObject(params.get(i + 1))); // 1-based index in params + last = pos + 1; + } + if (last < sql.length()) { + sb.append(sql, last, sql.length()); } return sb.toString(); } /** - * Surrounds unquoted placeholders (?) with single quotes, preserving already quoted ones. This is - * crucial for DESCRIBE QUERY commands as unquoted placeholders will cause a parse_syntax_error. + * Surrounds real placeholders (?) with single quotes, leaving '?' characters that appear inside + * comments, string literals, or quoted identifiers untouched. This is crucial for DESCRIBE QUERY + * commands, where bare placeholders cause a parse_syntax_error. */ public static String surroundPlaceholdersWithQuotes(String sql) { if (sql == null || sql.isEmpty()) { return sql; } - // This pattern matches any '?' that is NOT already inside single quotes - StringBuilder sb = new StringBuilder(); - Matcher m = Pattern.compile("(? consumer.accept(state, c)); + } + + /** + * Same as {@link #forEachNonCommentChar(String, SqlCharConsumer)} but the consumer also receives + * the source-string index of each emitted character. Useful when callers need to relate emitted + * characters back to positions in the original SQL (e.g. parameter placeholder discovery). + */ + public static void forEachNonCommentChar(String sql, IndexedSqlCharConsumer consumer) { if (sql == null || sql.isEmpty()) { return; } @@ -48,22 +70,22 @@ public static void forEachNonCommentChar(String sql, SqlCharConsumer consumer) { i++; // skip '*' } else if (c == '\'') { state = State.IN_SINGLE_QUOTE; - consumer.accept(state, c); + consumer.accept(state, c, i); } else if (c == '"') { state = State.IN_DOUBLE_QUOTE; - consumer.accept(state, c); + consumer.accept(state, c, i); } else if (c == '`') { state = State.IN_BACKTICK; - consumer.accept(state, c); + consumer.accept(state, c, i); } else { - consumer.accept(state, c); + consumer.accept(state, c, i); } break; case IN_SINGLE_QUOTE: - consumer.accept(state, c); + consumer.accept(state, c, i); if (c == '\'' && next == '\'') { - consumer.accept(state, next); + consumer.accept(state, next, i + 1); i++; // skip escaped quote } else if (c == '\'') { state = State.NORMAL; @@ -71,9 +93,9 @@ public static void forEachNonCommentChar(String sql, SqlCharConsumer consumer) { break; case IN_DOUBLE_QUOTE: - consumer.accept(state, c); + consumer.accept(state, c, i); if (c == '"' && next == '"') { - consumer.accept(state, next); + consumer.accept(state, next, i + 1); i++; // skip escaped quote } else if (c == '"') { state = State.NORMAL; @@ -81,9 +103,9 @@ public static void forEachNonCommentChar(String sql, SqlCharConsumer consumer) { break; case IN_BACKTICK: - consumer.accept(state, c); + consumer.accept(state, c, i); if (c == '`' && next == '`') { - consumer.accept(state, next); + consumer.accept(state, next, i + 1); i++; // skip escaped backtick } else if (c == '`') { state = State.NORMAL; @@ -93,7 +115,7 @@ public static void forEachNonCommentChar(String sql, SqlCharConsumer consumer) { case IN_LINE_COMMENT: if (c == '\n' || c == '\r') { state = State.NORMAL; - consumer.accept(state, ' '); + consumer.accept(state, ' ', i); if (c == '\r' && next == '\n') { i++; // Treat \r\n as a single line ending } @@ -110,7 +132,7 @@ public static void forEachNonCommentChar(String sql, SqlCharConsumer consumer) { i++; // skip '/' if (blockCommentDepth == 0) { state = State.NORMAL; - consumer.accept(state, ' '); + consumer.accept(state, ' ', i); } } // else: skip character (part of the comment) @@ -119,6 +141,30 @@ public static void forEachNonCommentChar(String sql, SqlCharConsumer consumer) { } } + /** + * Finds the source-string indices of placeholder ('?') characters that appear in {@link + * State#NORMAL} state — i.e. not inside comments, string literals, or quoted identifiers. Used by + * parameter binding to locate the real placeholders in a SQL statement. + * + * @param sql the SQL string to scan + * @return source-string indices of placeholder '?' characters, in order + */ + public static int[] findPlaceholderPositions(String sql) { + List positions = new ArrayList<>(); + forEachNonCommentChar( + sql, + (state, c, sourceIndex) -> { + if (state == State.NORMAL && c == '?') { + positions.add(sourceIndex); + } + }); + int[] out = new int[positions.size()]; + for (int k = 0; k < out.length; k++) { + out[k] = positions.get(k); + } + return out; + } + /** * Removes all comments and extra whitespace from the SQL string. * diff --git a/src/test/java/com/databricks/jdbc/common/util/SQLInterpolatorTest.java b/src/test/java/com/databricks/jdbc/common/util/SQLInterpolatorTest.java index 2887db468..22b35f465 100644 --- a/src/test/java/com/databricks/jdbc/common/util/SQLInterpolatorTest.java +++ b/src/test/java/com/databricks/jdbc/common/util/SQLInterpolatorTest.java @@ -126,6 +126,78 @@ public void testStringWithNewline() throws DatabricksValidationException { assertEquals(expected, SQLInterpolator.interpolateSQL(sql, params)); } + @Test + public void testQuestionMarkInLineCommentNotTreatedAsPlaceholder() + throws DatabricksValidationException { + String sql = "-- does this work?\nselect * from mytable where id = ?"; + Map params = new HashMap<>(); + params.put(1, getSqlParam(1, 42, DatabricksTypeUtil.INT)); + String expected = "-- does this work?\nselect * from mytable where id = 42"; + assertEquals(expected, SQLInterpolator.interpolateSQL(sql, params)); + } + + @Test + public void testQuestionMarkInBlockCommentNotTreatedAsPlaceholder() + throws DatabricksValidationException { + String sql = "select /* maybe? or ? */ * from t where id = ?"; + Map params = new HashMap<>(); + params.put(1, getSqlParam(1, 1, DatabricksTypeUtil.INT)); + String expected = "select /* maybe? or ? */ * from t where id = 1"; + assertEquals(expected, SQLInterpolator.interpolateSQL(sql, params)); + } + + @Test + public void testQuestionMarkInStringLiteralNotTreatedAsPlaceholder() + throws DatabricksValidationException { + String sql = "select 'hello?', * from mytable where id = ?"; + Map params = new HashMap<>(); + params.put(1, getSqlParam(1, 5, DatabricksTypeUtil.INT)); + String expected = "select 'hello?', * from mytable where id = 5"; + assertEquals(expected, SQLInterpolator.interpolateSQL(sql, params)); + } + + @Test + public void testQuestionMarkInQuotedIdentifierNotTreatedAsPlaceholder() + throws DatabricksValidationException { + String sql = "select `col?` from t where id = ? and name = \"who?\""; + Map params = new HashMap<>(); + params.put(1, getSqlParam(1, 7, DatabricksTypeUtil.INT)); + String expected = "select `col?` from t where id = 7 and name = \"who?\""; + assertEquals(expected, SQLInterpolator.interpolateSQL(sql, params)); + } + + @Test + public void testCombinedCommentsAndLiteralsWithQuestionMarks() + throws DatabricksValidationException { + String sql = + "-- ?\n/* ? */ select 'a?' as x, ? as y, \"b?\" as z, `c?` as w from t where id = ?"; + Map params = new HashMap<>(); + params.put(1, getSqlParam(1, "alice", DatabricksTypeUtil.STRING)); + params.put(2, getSqlParam(2, 99, DatabricksTypeUtil.INT)); + String expected = + "-- ?\n/* ? */ select 'a?' as x, 'alice' as y, \"b?\" as z, `c?` as w from t where id = 99"; + assertEquals(expected, SQLInterpolator.interpolateSQL(sql, params)); + } + + @Test + public void testInterpolateAdjacentPlaceholders() throws DatabricksValidationException { + String sql = "select ?,?,? from t"; + Map params = new HashMap<>(); + params.put(1, getSqlParam(1, 1, DatabricksTypeUtil.INT)); + params.put(2, getSqlParam(2, 2, DatabricksTypeUtil.INT)); + params.put(3, getSqlParam(3, 3, DatabricksTypeUtil.INT)); + assertEquals("select 1,2,3 from t", SQLInterpolator.interpolateSQL(sql, params)); + } + + @Test + public void testInterpolatePlaceholderAtStartAndEnd() throws DatabricksValidationException { + Map params = new HashMap<>(); + params.put(1, getSqlParam(1, 7, DatabricksTypeUtil.INT)); + assertEquals("7 ", SQLInterpolator.interpolateSQL("? ", params)); + assertEquals(" 7", SQLInterpolator.interpolateSQL(" ?", params)); + assertEquals("7", SQLInterpolator.interpolateSQL("?", params)); + } + @Test public void testEscapeInputs() { // Simple apostrophe doubling @@ -177,7 +249,37 @@ private static Stream providePlaceholderQuotingTestCases() { Arguments.of( "SELECT * FROM table WHERE id = ? AND (name = ? OR age = ?) AND status = ?", "SELECT * FROM table WHERE id = '?' AND (name = '?' OR age = '?') AND status = '?'", - "Complex query with multiple conditions")); + "Complex query with multiple conditions"), + + // ? inside line comment must not be quoted + Arguments.of( + "-- is this a ?\nSELECT ? FROM t", + "-- is this a ?\nSELECT '?' FROM t", + "Question mark inside line comment is preserved"), + + // ? inside block comment must not be quoted + Arguments.of( + "SELECT /* ? */ ? FROM t", + "SELECT /* ? */ '?' FROM t", + "Question mark inside block comment is preserved"), + + // ? inside double-quoted identifier must not be quoted + Arguments.of( + "SELECT \"col?\" FROM t WHERE id = ?", + "SELECT \"col?\" FROM t WHERE id = '?'", + "Question mark inside double-quoted identifier is preserved"), + + // ? inside backtick identifier must not be quoted + Arguments.of( + "SELECT `col?` FROM t WHERE id = ?", + "SELECT `col?` FROM t WHERE id = '?'", + "Question mark inside backtick identifier is preserved"), + + // Adjacent placeholders — both must be quoted independently + Arguments.of( + "SELECT ?,?,? FROM t", + "SELECT '?','?','?' FROM t", + "Adjacent placeholders are each quoted")); } @ParameterizedTest(name = "{2}") diff --git a/src/test/java/com/databricks/jdbc/common/util/SqlCommentParserTest.java b/src/test/java/com/databricks/jdbc/common/util/SqlCommentParserTest.java index 3ce166f7f..ca3c2ff31 100644 --- a/src/test/java/com/databricks/jdbc/common/util/SqlCommentParserTest.java +++ b/src/test/java/com/databricks/jdbc/common/util/SqlCommentParserTest.java @@ -1,5 +1,6 @@ package com.databricks.jdbc.common.util; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -392,4 +393,141 @@ public void testForEachEmitsSyntheticSpaceOnLineCommentExit() { SqlCommentParser.forEachNonCommentChar("a--x\nb", (state, c) -> chars.add(c)); assertEquals(List.of('a', ' ', 'b'), chars); } + + // --- findPlaceholderPositions --- + + @Test + public void testFindPlaceholderPositionsNullAndEmpty() { + assertArrayEquals(new int[0], SqlCommentParser.findPlaceholderPositions(null)); + assertArrayEquals(new int[0], SqlCommentParser.findPlaceholderPositions("")); + } + + @Test + public void testFindPlaceholderPositionsBasic() { + assertArrayEquals( + new int[] {29}, + SqlCommentParser.findPlaceholderPositions("DELETE FROM log WHERE date = ?")); + assertArrayEquals( + new int[] {7, 10, 13}, SqlCommentParser.findPlaceholderPositions("select ?, ?, ? from t")); + } + + @Test + public void testFindPlaceholderPositionsSkipsLineComment() { + String sql = "-- ?\nselect ? from t"; + assertArrayEquals( + new int[] {sql.indexOf('?', 5)}, SqlCommentParser.findPlaceholderPositions(sql)); + } + + @Test + public void testFindPlaceholderPositionsSkipsBlockComment() { + String sql = "select /* ? or ? */ ? from t"; + assertArrayEquals( + new int[] {sql.indexOf('?', 18)}, SqlCommentParser.findPlaceholderPositions(sql)); + } + + @Test + public void testFindPlaceholderPositionsSkipsStringLiteral() { + String sql = "select 'a?b' as x, ? as y from t"; + assertArrayEquals( + new int[] {sql.indexOf('?', 12)}, SqlCommentParser.findPlaceholderPositions(sql)); + } + + @Test + public void testFindPlaceholderPositionsSkipsQuotedIdentifiers() { + String sql = "select \"col?\", `col?` from t where x = ?"; + assertArrayEquals( + new int[] {sql.indexOf('?', 22)}, SqlCommentParser.findPlaceholderPositions(sql)); + } + + @Test + public void testFindPlaceholderPositionsHandlesEscapedQuotes() { + String sql = "select 'it''s ?' as x, ? as y from t"; + assertArrayEquals( + new int[] {sql.indexOf('?', 16)}, SqlCommentParser.findPlaceholderPositions(sql)); + } + + @Test + public void testFindPlaceholderPositionsHandlesCrlfLineComment() { + String sql = "-- ?\r\nselect ? from t"; + assertArrayEquals( + new int[] {sql.indexOf('?', 6)}, SqlCommentParser.findPlaceholderPositions(sql)); + } + + @Test + public void testFindPlaceholderPositionsHandlesNestedBlockCommentWithQuestionMarks() { + String sql = "select /* outer ? /* inner ? */ outer ? */ ? from t"; + int outerEnd = sql.indexOf("*/ ?") + 3; + assertArrayEquals(new int[] {outerEnd}, SqlCommentParser.findPlaceholderPositions(sql)); + } + + @Test + public void testFindPlaceholderPositionsAdjacentPlaceholders() { + assertArrayEquals(new int[] {0, 1, 2}, SqlCommentParser.findPlaceholderPositions("???")); + } + + @Test + public void testFindPlaceholderPositionsLeadingAndTrailingPlaceholder() { + assertArrayEquals(new int[] {0, 8}, SqlCommentParser.findPlaceholderPositions("? from t?")); + } + + @Test + public void testFindPlaceholderPositionsHandlesEscapedBacktick() { + String sql = "select `c``ol?` from t where x = ?"; + assertArrayEquals( + new int[] {sql.indexOf('?', 17)}, SqlCommentParser.findPlaceholderPositions(sql)); + } + + @Test + public void testFindPlaceholderPositionsCommentsOnlySql() { + assertArrayEquals(new int[0], SqlCommentParser.findPlaceholderPositions("-- just a comment")); + assertArrayEquals(new int[0], SqlCommentParser.findPlaceholderPositions("-- foo\n")); + assertArrayEquals(new int[0], SqlCommentParser.findPlaceholderPositions("/* block ? */")); + assertArrayEquals( + new int[0], SqlCommentParser.findPlaceholderPositions("-- ?\n/* ? */ -- more")); + } + + @Test + public void testFindPlaceholderPositionsLineCommentImmediatelyConsumesQuestionMark() { + // `?` appearing right after the `--` line-comment opener is part of the comment. + assertArrayEquals(new int[0], SqlCommentParser.findPlaceholderPositions("--?")); + assertArrayEquals(new int[0], SqlCommentParser.findPlaceholderPositions("--?\n")); + String sql = "--?\nselect ? from t"; + assertArrayEquals( + new int[] {sql.indexOf('?', 4)}, SqlCommentParser.findPlaceholderPositions(sql)); + } + + @Test + public void testFindPlaceholderPositionsUnclosedSingleQuote() { + // `?` inside an unterminated single-quoted literal stays inside the literal and is not a + // placeholder. + assertArrayEquals( + new int[0], SqlCommentParser.findPlaceholderPositions("select 'never ? closed")); + } + + @Test + public void testFindPlaceholderPositionsUnclosedDoubleQuote() { + assertArrayEquals( + new int[0], SqlCommentParser.findPlaceholderPositions("select \"never ? closed")); + } + + @Test + public void testFindPlaceholderPositionsUnclosedBacktick() { + assertArrayEquals( + new int[0], SqlCommentParser.findPlaceholderPositions("select `never ? closed")); + } + + @Test + public void testFindPlaceholderPositionsUnclosedBlockComment() { + assertArrayEquals( + new int[0], SqlCommentParser.findPlaceholderPositions("select /* never ? closed")); + } + + @Test + public void testFindPlaceholderPositionsBacktickWithEscapeBetweenQuestionMarks() { + // `?` then escaped backtick (``) then `?` — both `?` characters stay inside the identifier. + String sql = "select `a?``b?` from t where x = ?"; + assertArrayEquals( + new int[] {sql.indexOf('?', sql.indexOf("from"))}, + SqlCommentParser.findPlaceholderPositions(sql)); + } }