Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEXT_CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
### Updated

### Fixed
- Fixed `NullPointerException` / `StringIndexOutOfBoundsException` in `MetadataParser` when fetching complex types with `EnableComplexDatatypeSupport=1` and the server returns a bare type name (`ARRAY`/`MAP`/`STRUCT`) without parameterized element/field types. The driver now falls back to inferring types from the JSON body instead of crashing.
- Fixed `EnableBatchedInserts` silently falling back to individual execution when table or schema names contain special characters (e.g., hyphens) inside backtick-quoted identifiers. Added a warn log when the fallback occurs.
- Fixed `IntervalConverter` crash (`IllegalArgumentException: Invalid interval metadata`) when INTERVAL columns are returned via CloudFetch. Arrow metadata from CloudFetch uses underscored format (`INTERVAL_YEAR_MONTH`, `INTERVAL_DAY_TIME`) which the driver's regex did not accept.
- Fixed primitive types within complex types (ARRAY, MAP, STRUCT) not being correctly parsed when Arrow serialization uses alternate formats: TIMESTAMP/TIMESTAMP_NTZ as epoch microseconds or component arrays, and BINARY as base64-encoded strings.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,13 +107,16 @@ public DatabricksStruct parseToStruct(JsonNode node, String structMetadata)
}
LOGGER.debug("Parsing struct with metadata: {}", structMetadata);
Map<String, String> fieldTypeMap = MetadataParser.parseStructMetadata(structMetadata);
// When the server did not populate parameterized field types (bare "STRUCT"), infer
// each field's type from the JSON node shape instead of defaulting every field to STRING.
String fallbackFieldType = fieldTypeMap.isEmpty() ? "" : DatabricksTypeUtil.STRING;
Map<String, Object> structMap = new LinkedHashMap<>();
Iterator<Map.Entry<String, JsonNode>> fields = node.fields();
while (fields.hasNext()) {
Map.Entry<String, JsonNode> entry = fields.next();
String fieldName = entry.getKey();
JsonNode fieldNode = entry.getValue();
String fieldType = fieldTypeMap.getOrDefault(fieldName, DatabricksTypeUtil.STRING);
String fieldType = fieldTypeMap.getOrDefault(fieldName, fallbackFieldType);
Object convertedValue = convertValueNode(fieldNode, fieldType);
structMap.put(fieldName, convertedValue);
}
Expand All @@ -125,6 +128,12 @@ private Object convertValueNode(JsonNode node, String expectedType)
if (node == null || node.isNull()) {
return null;
}
// Expected type can be empty when the server omitted parameterized complex-type metadata
// (e.g. TColumnDesc reports just ARRAY_TYPE and the arrow schema was not populated).
// Fall back to inferring types from the JSON node shape.
if (expectedType == null || expectedType.isEmpty()) {
return convertJsonNodeDynamic(node);
}
if (expectedType.startsWith(DatabricksTypeUtil.ARRAY)) {
return parseToArray(node, expectedType);
}
Expand Down Expand Up @@ -158,6 +167,42 @@ private Object convertValueNode(JsonNode node, String expectedType)
return convertPrimitive(node.asText(), expectedType);
}

/**
* Infers the Java representation of a JSON node when no SQL type hint is available. Arrays become
* {@link DatabricksArray}, objects become {@link DatabricksStruct}, and primitives are mapped to
* their natural Java type (Number / Boolean / String). Used as a fallback for the case where the
* server returns bare complex-type metadata like {@code ARRAY} or {@code STRUCT} without element
* parameters.
*/
private Object convertJsonNodeDynamic(JsonNode node) {
if (node == null || node.isNull()) {
return null;
}
if (node.isArray()) {
List<Object> list = new ArrayList<>();
for (JsonNode child : node) {
list.add(convertJsonNodeDynamic(child));
}
return new DatabricksArray(list, DatabricksTypeUtil.ARRAY);
}
if (node.isObject()) {
Map<String, Object> map = new LinkedHashMap<>();
Iterator<Map.Entry<String, JsonNode>> it = node.fields();
while (it.hasNext()) {
Map.Entry<String, JsonNode> entry = it.next();
map.put(entry.getKey(), convertJsonNodeDynamic(entry.getValue()));
}
return new DatabricksStruct(map, DatabricksTypeUtil.STRUCT);
}
if (node.isBoolean()) {
return node.booleanValue();
}
if (node.isNumber()) {
return node.numberValue();
}
return node.asText();
}

private Map<String, Object> convertJsonNodeToJavaMap(
JsonNode node, String keyType, String valueType) throws DatabricksParsingException {
Map<String, Object> result = new LinkedHashMap<>();
Expand Down Expand Up @@ -339,7 +384,14 @@ public String formatMapString(String jsonString, String mapMetadata) {
if (node.isArray() && node.size() > 0 && node.get(0).has("key")) {
String[] kv = new String[] {"STRING", "STRING"};
if (mapMetadata != null && mapMetadata.startsWith(DatabricksTypeUtil.MAP)) {
kv = MetadataParser.parseMapMetadata(mapMetadata).split(",", 2);
String[] parsed = MetadataParser.parseMapMetadata(mapMetadata).split(",", 2);
// Only adopt the parsed key/value types when both halves are non-empty. Bare "MAP"
// metadata returns empty halves (server omitted parameterized types); in that case we
// keep the STRING/STRING default so quoting still matches the legacy disabled-complex
// path and we don't emit unquoted string keys/values.
if (parsed.length == 2 && !parsed[0].trim().isEmpty() && !parsed[1].trim().isEmpty()) {
kv = parsed;
}
}

String keyType = kv[0].trim();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ private Object[] convertElements(List<Object> elements, String elementType) {
for (int i = 0; i < elements.size(); i++) {
Object element = elements.get(i);
try {
// When element type is unknown (e.g. server returned bare "ARRAY" without parameters),
// preserve the values as-is — they were already typed by dynamic JSON inference upstream.
if (elementType == null || elementType.isEmpty()) {
convertedElements[i] = element;
continue;
}
if (elementType.startsWith(DatabricksTypeUtil.STRUCT)) {
if (element instanceof Map) {
convertedElements[i] = new DatabricksStruct((Map<String, Object>) element, elementType);
Expand Down
8 changes: 6 additions & 2 deletions src/main/java/com/databricks/jdbc/api/impl/DatabricksMap.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,13 @@ private Map<K, V> convertMap(Map<K, V> originalMap, String metadata) {
String valueType = mapMetadata[1].trim();
LOGGER.debug("Parsed metadata - Key Type: {}, Value Type: {}", keyType, valueType);

boolean bareKey = keyType.isEmpty();
boolean bareValue = valueType.isEmpty();
for (Map.Entry<K, V> entry : originalMap.entrySet()) {
K key = convertSimpleValue(entry.getKey(), keyType);
V value = convertValue(entry.getValue(), valueType);
// When the server omitted parameterized key/value types (bare "MAP"), preserve the
// already-typed value produced by dynamic JSON inference upstream.
K key = bareKey ? entry.getKey() : convertSimpleValue(entry.getKey(), keyType);
V value = bareValue ? entry.getValue() : convertValue(entry.getValue(), valueType);
convertedMap.put(key, value);
LOGGER.trace("Converted entry - Key: {}, Converted Value: {}", key, value);
}
Expand Down
16 changes: 11 additions & 5 deletions src/main/java/com/databricks/jdbc/api/impl/DatabricksStruct.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,17 @@ public DatabricksStruct(Map<String, Object> attributes, String metadata) {
// Parse the metadata into a map: fieldName -> fieldType
Map<String, String> typeMap = MetadataParser.parseStructMetadata(metadata);

// Capture field names (in the same order they appear in typeMap).
this.fieldNames = new ArrayList<>(typeMap.keySet());

// Convert attributes to the appropriate array of Objects.
this.attributes = convertAttributes(attributes, typeMap);
// When the server omitted parameterized field info (bare "STRUCT"), fall back to the
// field names and already-typed values from the supplied attributes map.
if (typeMap.isEmpty()) {
this.fieldNames = new ArrayList<>(attributes.keySet());
this.attributes = attributes.values().toArray();
} else {
// Capture field names (in the same order they appear in typeMap).
this.fieldNames = new ArrayList<>(typeMap.keySet());
// Convert attributes to the appropriate array of Objects.
this.attributes = convertAttributes(attributes, typeMap);
}

// Store the entire type definition for getSQLTypeName().
this.typeName = metadata;
Expand Down
34 changes: 31 additions & 3 deletions src/main/java/com/databricks/jdbc/api/impl/MetadataParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,15 @@ public class MetadataParser {
* Parses STRUCT metadata to extract field types.
*
* @param metadata the metadata string representing a STRUCT type
* @return a map where each key is a field name, and the value is the field's data type
* @return a map where each key is a field name, and the value is the field's data type. Returns
* an empty map when the metadata lacks field parameters (e.g., bare {@code STRUCT} sent by
* older servers that did not populate parameterized type names).
*/
public static Map<String, String> parseStructMetadata(String metadata) {
Map<String, String> typeMap = new LinkedHashMap<>();
if (!hasAngleBrackets(metadata)) {
return typeMap;
}
metadata = metadata.substring("STRUCT<".length(), metadata.length() - 1);
String[] fields = splitFields(metadata);

Expand All @@ -42,20 +47,30 @@ public static Map<String, String> parseStructMetadata(String metadata) {
* Parses ARRAY metadata to retrieve the element type.
*
* @param metadata the metadata string representing an ARRAY type
* @return the element type contained within the array
* @return the element type contained within the array, or an empty string when the metadata lacks
* the element type parameter (e.g., bare {@code ARRAY}). Callers should treat an empty return
* as "unknown element type" and fall back to dynamic inference.
*/
public static String parseArrayMetadata(String metadata) {
if (!hasAngleBrackets(metadata)) {
return "";
}
return cleanTypeName(metadata.substring("ARRAY<".length(), metadata.length() - 1).trim());
}

/**
* Parses MAP metadata to retrieve key and value types.
*
* @param metadata the metadata string representing a MAP type
* @return a string formatted as "keyType, valueType"
* @return a string formatted as "keyType, valueType", or {@code ", "} when the metadata lacks
* type parameters (e.g., bare {@code MAP}). Callers should treat the empty halves as
* "unknown" and fall back to dynamic inference.
* @throws DatabricksDriverException if the MAP metadata format is invalid
*/
public static String parseMapMetadata(String metadata) {
if (!hasAngleBrackets(metadata)) {
return ", ";
}
metadata = metadata.substring("MAP<".length(), metadata.length() - 1).trim();

int depth = 0;
Expand Down Expand Up @@ -133,4 +148,17 @@ private static String[] splitFields(String metadata) {
private static String cleanTypeName(String typeName) {
return typeName.replaceAll(" NOT NULL", "").trim();
}

/**
* Returns true when the metadata contains angle brackets, i.e. looks parameterized. Used to skip
* the parameterized parsing logic for bare type names like {@code ARRAY}, {@code MAP}, or {@code
* STRUCT} that some servers return when the full parameterized type is unavailable (e.g., when
* the Thrift {@code TColumnDesc} carries only {@code ARRAY_TYPE} without element information and
* the arrow schema is not populated).
*/
private static boolean hasAngleBrackets(String metadata) {
return metadata != null
&& metadata.indexOf('<') >= 0
&& metadata.lastIndexOf('>') > metadata.indexOf('<');
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import com.databricks.jdbc.exception.DatabricksParsingException;
import java.sql.Date;
import java.sql.SQLException;
import java.sql.Time;
import java.sql.Timestamp;
import org.junit.jupiter.api.BeforeEach;
Expand Down Expand Up @@ -370,6 +371,111 @@ void testBinaryAsBase64InArray() throws DatabricksParsingException {
}
}

/**
* Regression for ES-1526082: when the server returns a bare type name such as "ARRAY" without the
* element-type parameter, the driver used to crash with StringIndexOutOfBoundsException while
* trying to parse "ARRAY<...>". It must now fall back to dynamic JSON inference and return the
* expected values without throwing.
*/
@Test
void testParseJsonStringToDbArray_bareArrayMetadataStrings() throws DatabricksParsingException {
String json = "[\"alpha\",\"beta\",\"gamma\"]";

DatabricksArray dbArray = parser.parseJsonStringToDbArray(json, "ARRAY");
assertNotNull(dbArray);

try {
Object[] elements = (Object[]) dbArray.getArray();
assertEquals(3, elements.length);
assertEquals("alpha", elements[0]);
assertEquals("beta", elements[1]);
assertEquals("gamma", elements[2]);
} catch (Exception e) {
fail("Should not throw on bare ARRAY: " + e.getMessage());
}
}

@Test
void testParseJsonStringToDbArray_bareArrayMetadataNumbers()
throws DatabricksParsingException, SQLException {
String json = "[1,2,3]";

DatabricksArray dbArray = parser.parseJsonStringToDbArray(json, "ARRAY");
assertNotNull(dbArray);

Object[] elements = (Object[]) dbArray.getArray();
assertEquals(3, elements.length);
// Dynamic inference preserves numeric types rather than stringifying.
assertInstanceOf(Number.class, elements[0]);
assertEquals(1, ((Number) elements[0]).intValue());
assertEquals(2, ((Number) elements[1]).intValue());
assertEquals(3, ((Number) elements[2]).intValue());
}

@Test
void testParseJsonStringToDbArray_bareArrayMetadataNested()
throws DatabricksParsingException, SQLException {
// Nested arrays must recurse through dynamic inference.
String json = "[[1,2],[3,4]]";

DatabricksArray dbArray = parser.parseJsonStringToDbArray(json, "ARRAY");
assertNotNull(dbArray);

Object[] elements = (Object[]) dbArray.getArray();
assertEquals(2, elements.length);
assertInstanceOf(DatabricksArray.class, elements[0]);
Object[] inner = (Object[]) ((DatabricksArray) elements[0]).getArray();
assertEquals(2, inner.length);
assertEquals(1, ((Number) inner[0]).intValue());
assertEquals(2, ((Number) inner[1]).intValue());
}

@Test
void testParseJsonStringToDbStruct_bareStructMetadata() throws DatabricksParsingException {
// Bare STRUCT metadata: the parser should infer field names from the JSON body.
String json = "{\"a\":1,\"b\":\"two\",\"c\":true}";

DatabricksStruct dbStruct = parser.parseJsonStringToDbStruct(json, "STRUCT");
assertNotNull(dbStruct);

try {
Object[] attrs = dbStruct.getAttributes();
assertEquals(3, attrs.length);
assertEquals(1, ((Number) attrs[0]).intValue());
assertEquals("two", attrs[1]);
assertEquals(Boolean.TRUE, attrs[2]);
} catch (Exception e) {
fail("Should not throw on bare STRUCT: " + e.getMessage());
}
}

@Test
void testParseJsonStringToDbMap_bareMapMetadata() throws DatabricksParsingException {
String json = "{\"k1\":100,\"k2\":200}";

DatabricksMap<String, Object> dbMap = parser.parseJsonStringToDbMap(json, "MAP");
assertNotNull(dbMap);
assertEquals(2, dbMap.size());
// Values retain their native JSON-inferred numeric type rather than being stringified.
assertEquals(100, ((Number) dbMap.get("k1")).intValue());
assertEquals(200, ((Number) dbMap.get("k2")).intValue());
}

/**
* Regression: formatMapString is used on the complex-types-disabled path. For bare "MAP" metadata
* it used to return the raw JSON (because parseMapMetadata threw and was swallowed by the outer
* catch). After relaxing parseMapMetadata, we must keep the STRING/STRING quoting default so
* string keys still get quoted correctly.
*/
@Test
void testFormatMapString_bareMapMetadataKeepsStringQuoting() {
String jsonString = "[{\"key\":\"a\",\"value\":\"b\"},{\"key\":\"c\",\"value\":\"d\"}]";
String expected = "{\"a\":\"b\",\"c\":\"d\"}";

String result = parser.formatMapString(jsonString, "MAP");
assertEquals(expected, result);
}

@Test
void testFormatComplexTypeString_withMapType() {
String jsonString = "[{\"key\":1,\"value\":2},{\"key\":3,\"value\":4}]";
Expand Down
34 changes: 34 additions & 0 deletions src/test/java/com/databricks/jdbc/api/impl/MetadataParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,40 @@ public void testParseStructMetadata_MixedComplexTypesWithDecimal() {
"Parsed struct metadata with mixed complex types and DECIMAL fields should handle all combinations correctly.");
}

/**
* Regression test for ES-1526082: the server can return a bare type name (e.g. "ARRAY") instead
* of the parameterized form ("ARRAY<STRING>") when both the arrow schema and the embedded arrow
* metadata are absent. The parser must not throw — it should return an empty string to signal
* "unknown element type" so the caller can fall back to dynamic JSON inference.
*/
@Test
@DisplayName("parseArrayMetadata with bare ARRAY returns empty string")
public void testParseArrayMetadata_BareArray() {
assertEquals("", MetadataParser.parseArrayMetadata("ARRAY"));
assertEquals("", MetadataParser.parseArrayMetadata(""));
assertEquals("", MetadataParser.parseArrayMetadata(null));
}

/** Regression test for ES-1526082: bare "MAP" must return a splittable "keyType, valueType". */
@Test
@DisplayName("parseMapMetadata with bare MAP returns empty key/value placeholder")
public void testParseMapMetadata_BareMap() {
String result = MetadataParser.parseMapMetadata("MAP");
String[] kv = result.split(",", 2);
assertEquals(2, kv.length, "Result must split into two parts for keyType/valueType.");
assertEquals("", kv[0].trim());
assertEquals("", kv[1].trim());
}

/** Regression test for ES-1526082: bare "STRUCT" must return an empty field-type map. */
@Test
@DisplayName("parseStructMetadata with bare STRUCT returns empty map")
public void testParseStructMetadata_BareStruct() {
assertTrue(MetadataParser.parseStructMetadata("STRUCT").isEmpty());
assertTrue(MetadataParser.parseStructMetadata("").isEmpty());
assertTrue(MetadataParser.parseStructMetadata(null).isEmpty());
}

/** Test parsing of STRUCT with other parenthesized types to ensure fix applies broadly. */
@Test
@DisplayName("parseStructMetadata with various parenthesized types")
Expand Down
Loading