diff --git a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_defaults_in_desc_cols.q b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_defaults_in_desc_cols.q new file mode 100644 index 000000000000..291315d8d3ff --- /dev/null +++ b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_defaults_in_desc_cols.q @@ -0,0 +1,44 @@ +-- Mask random uuid +--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/ +-- Mask a random snapshot id +--! qt:replace:/(\s+current-snapshot-id\s+)\S+(\s*)/$1#Masked#/ +-- Mask added file size +--! qt:replace:/(\S\"added-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/ +-- Mask total file size +--! qt:replace:/(\S\"total-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/ +-- Mask current-snapshot-timestamp-ms +--! qt:replace:/(\s+current-snapshot-timestamp-ms\s+)\S+(\s*)/$1#Masked#$2/ + +CREATE TABLE ice_parq ( + id INT) +STORED BY ICEBERG stored as parquet +TBLPROPERTIES ('format-version'='3'); + +INSERT INTO ice_parq (id) VALUES (1); + +ALTER TABLE ice_parq ADD COLUMNS (point STRUCT DEFAULT 'x:100,y:99', + name STRING DEFAULT 'unknown', + age INT DEFAULT 25); + +INSERT INTO ice_parq (id) VALUES (2); + +SELECT * FROM ice_parq ORDER BY id; + +DESCRIBE FORMATTED ice_parq; + +CREATE TABLE ice_orc ( + id INT) +STORED BY ICEBERG stored as orc +TBLPROPERTIES ('format-version'='3'); + +INSERT INTO ice_orc (id) VALUES (1); + +ALTER TABLE ice_orc ADD COLUMNS (point STRUCT DEFAULT 'x:100,y:99', + name STRING DEFAULT 'unknown', + age INT DEFAULT 25); + +INSERT INTO ice_orc (id) VALUES (2); + +SELECT * FROM ice_orc ORDER BY id; + +DESCRIBE FORMATTED ice_orc; diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_defaults_in_desc_cols.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_defaults_in_desc_cols.q.out new file mode 100644 index 000000000000..88f46ab0107a --- /dev/null +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_defaults_in_desc_cols.q.out @@ -0,0 +1,238 @@ +PREHOOK: query: CREATE TABLE ice_parq ( + id INT) +STORED BY ICEBERG stored as parquet +TBLPROPERTIES ('format-version'='3') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@ice_parq +POSTHOOK: query: CREATE TABLE ice_parq ( + id INT) +STORED BY ICEBERG stored as parquet +TBLPROPERTIES ('format-version'='3') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ice_parq +PREHOOK: query: INSERT INTO ice_parq (id) VALUES (1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@ice_parq +POSTHOOK: query: INSERT INTO ice_parq (id) VALUES (1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@ice_parq +PREHOOK: query: ALTER TABLE ice_parq ADD COLUMNS (point STRUCT DEFAULT 'x:100,y:99', + name STRING DEFAULT 'unknown', + age INT DEFAULT 25) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@ice_parq +PREHOOK: Output: default@ice_parq +POSTHOOK: query: ALTER TABLE ice_parq ADD COLUMNS (point STRUCT DEFAULT 'x:100,y:99', + name STRING DEFAULT 'unknown', + age INT DEFAULT 25) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@ice_parq +POSTHOOK: Output: default@ice_parq +PREHOOK: query: INSERT INTO ice_parq (id) VALUES (2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@ice_parq +POSTHOOK: query: INSERT INTO ice_parq (id) VALUES (2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@ice_parq +PREHOOK: query: SELECT * FROM ice_parq ORDER BY id +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_parq +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT * FROM ice_parq ORDER BY id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_parq +POSTHOOK: Output: hdfs://### HDFS PATH ### +1 NULL unknown 25 +2 {"x":100,"y":99} unknown 25 +PREHOOK: query: DESCRIBE FORMATTED ice_parq +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ice_parq +POSTHOOK: query: DESCRIBE FORMATTED ice_parq +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ice_parq +# col_name data_type comment +id int +point struct +name string +age int + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: EXTERNAL_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + EXTERNAL TRUE + bucketing_version 2 + current-schema {\"type\":\"struct\",\"schema-id\":1,\"fields\":[{\"id\":1,\"name\":\"id\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"point\",\"required\":false,\"type\":{\"type\":\"struct\",\"fields\":[{\"id\":3,\"name\":\"x\",\"required\":false,\"type\":\"int\",\"write-default\":100},{\"id\":4,\"name\":\"y\",\"required\":false,\"type\":\"int\",\"write-default\":99}]}},{\"id\":5,\"name\":\"name\",\"required\":false,\"type\":\"string\",\"initial-default\":\"unknown\",\"write-default\":\"unknown\"},{\"id\":6,\"name\":\"age\",\"required\":false,\"type\":\"int\",\"initial-default\":25,\"write-default\":25}]} + current-snapshot-id #Masked# + current-snapshot-summary {\"added-data-files\":\"1\",\"added-records\":\"1\",\"added-files-size\":\"#Masked#\",\"changed-partition-count\":\"1\",\"total-records\":\"2\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"2\",\"total-delete-files\":\"0\",\"total-position-deletes\":\"0\",\"total-equality-deletes\":\"0\",\"iceberg-version\":\"Apache Iceberg 1.10.1 (commit ccb8bc435062171e64bc8b7e5f56e6aed9c5b934)\"} + current-snapshot-timestamp-ms #Masked# + format-version 3 + iceberg.orc.files.only false + metadata_location hdfs://### HDFS PATH ### + numFiles 2 + numRows 2 + parquet.compression zstd + previous_metadata_location hdfs://### HDFS PATH ### + rawDataSize 0 + serialization.format 1 + snapshot-count 2 + storage_handler org.apache.iceberg.mr.hive.HiveIcebergStorageHandler + table_type ICEBERG + totalSize #Masked# +#### A masked pattern was here #### + uuid #Masked# + write.delete.mode merge-on-read + write.format.default parquet + write.merge.mode merge-on-read + write.metadata.delete-after-commit.enabled true + write.update.mode merge-on-read + +# Storage Information +SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe +InputFormat: org.apache.iceberg.mr.hive.HiveIcebergInputFormat +OutputFormat: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat +Compressed: No +Sort Columns: [] + +# Constraints + +# Default Constraints +Table: default.ice_parq +Constraint Name: #### A masked pattern was here #### +Column Name:age Initial Default Value:25 Write Default Value:25 + +Constraint Name: #### A masked pattern was here #### +Column Name:point Initial Default Value: Write Default Value:'x:100,y:99' + +Constraint Name: #### A masked pattern was here #### +Column Name:name Initial Default Value:'unknown' Write Default Value:'unknown' + +PREHOOK: query: CREATE TABLE ice_orc ( + id INT) +STORED BY ICEBERG stored as orc +TBLPROPERTIES ('format-version'='3') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@ice_orc +POSTHOOK: query: CREATE TABLE ice_orc ( + id INT) +STORED BY ICEBERG stored as orc +TBLPROPERTIES ('format-version'='3') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ice_orc +PREHOOK: query: INSERT INTO ice_orc (id) VALUES (1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@ice_orc +POSTHOOK: query: INSERT INTO ice_orc (id) VALUES (1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@ice_orc +PREHOOK: query: ALTER TABLE ice_orc ADD COLUMNS (point STRUCT DEFAULT 'x:100,y:99', + name STRING DEFAULT 'unknown', + age INT DEFAULT 25) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@ice_orc +PREHOOK: Output: default@ice_orc +POSTHOOK: query: ALTER TABLE ice_orc ADD COLUMNS (point STRUCT DEFAULT 'x:100,y:99', + name STRING DEFAULT 'unknown', + age INT DEFAULT 25) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@ice_orc +POSTHOOK: Output: default@ice_orc +PREHOOK: query: INSERT INTO ice_orc (id) VALUES (2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@ice_orc +POSTHOOK: query: INSERT INTO ice_orc (id) VALUES (2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@ice_orc +PREHOOK: query: SELECT * FROM ice_orc ORDER BY id +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_orc +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT * FROM ice_orc ORDER BY id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_orc +POSTHOOK: Output: hdfs://### HDFS PATH ### +1 NULL NULL NULL +2 {"x":100,"y":99} unknown 25 +PREHOOK: query: DESCRIBE FORMATTED ice_orc +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ice_orc +POSTHOOK: query: DESCRIBE FORMATTED ice_orc +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ice_orc +# col_name data_type comment +id int +point struct +name string +age int + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: EXTERNAL_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + EXTERNAL TRUE + bucketing_version 2 + current-schema {\"type\":\"struct\",\"schema-id\":1,\"fields\":[{\"id\":1,\"name\":\"id\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"point\",\"required\":false,\"type\":{\"type\":\"struct\",\"fields\":[{\"id\":3,\"name\":\"x\",\"required\":false,\"type\":\"int\",\"write-default\":100},{\"id\":4,\"name\":\"y\",\"required\":false,\"type\":\"int\",\"write-default\":99}]}},{\"id\":5,\"name\":\"name\",\"required\":false,\"type\":\"string\",\"write-default\":\"unknown\"},{\"id\":6,\"name\":\"age\",\"required\":false,\"type\":\"int\",\"write-default\":25}]} + current-snapshot-id #Masked# + current-snapshot-summary {\"added-data-files\":\"1\",\"added-records\":\"1\",\"added-files-size\":\"#Masked#\",\"changed-partition-count\":\"1\",\"total-records\":\"2\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"2\",\"total-delete-files\":\"0\",\"total-position-deletes\":\"0\",\"total-equality-deletes\":\"0\",\"iceberg-version\":\"Apache Iceberg 1.10.1 (commit ccb8bc435062171e64bc8b7e5f56e6aed9c5b934)\"} + current-snapshot-timestamp-ms #Masked# + format-version 3 + iceberg.orc.files.only true + metadata_location hdfs://### HDFS PATH ### + numFiles 2 + numRows 2 + parquet.compression zstd + previous_metadata_location hdfs://### HDFS PATH ### + rawDataSize 0 + serialization.format 1 + snapshot-count 2 + storage_handler org.apache.iceberg.mr.hive.HiveIcebergStorageHandler + table_type ICEBERG + totalSize #Masked# +#### A masked pattern was here #### + uuid #Masked# + write.delete.mode merge-on-read + write.format.default orc + write.merge.mode merge-on-read + write.metadata.delete-after-commit.enabled true + write.update.mode merge-on-read + +# Storage Information +SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe +InputFormat: org.apache.iceberg.mr.hive.HiveIcebergInputFormat +OutputFormat: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat +Compressed: No +Sort Columns: [] + +# Constraints + +# Default Constraints +Table: default.ice_orc +Constraint Name: #### A masked pattern was here #### +Column Name:point Initial Default Value: Write Default Value:'x:100,y:99' + +Constraint Name: #### A masked pattern was here #### +Column Name:age Initial Default Value: Write Default Value:25 + +Constraint Name: #### A masked pattern was here #### +Column Name:name Initial Default Value: Write Default Value:'unknown' + diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java index b1dd9738572a..fe0ce74406e0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java @@ -18,8 +18,11 @@ package org.apache.hadoop.hive.ql.ddl.table.info.desc.formatter; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.collections4.MapUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.commons.text.StringEscapeUtils; import org.apache.hadoop.hive.common.MaterializationSnapshot; import org.apache.hadoop.hive.common.StatsSetupConst; @@ -73,6 +76,7 @@ import java.util.stream.Collectors; import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.TABLE_IS_CTAS; +import static org.apache.hadoop.hive.ql.ddl.DDLUtils.isIcebergTable; import static org.apache.hadoop.hive.ql.ddl.ShowUtils.ALIGNMENT; import static org.apache.hadoop.hive.ql.ddl.ShowUtils.DEFAULT_STRINGBUILDER_SIZE; import static org.apache.hadoop.hive.ql.ddl.ShowUtils.FIELD_DELIM; @@ -83,6 +87,7 @@ * Formats DESC TABLE results to text format. */ class TextDescTableFormatter extends DescTableFormatter { + public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); @Override public void describeTable(HiveConf conf, DataOutputStream out, String columnPath, String tableName, Table table, Partition partition, List columns, boolean isFormatted, boolean isExtended, boolean isOutputPadded, @@ -454,7 +459,7 @@ private void displayAllParameters(Map params, StringBuilder tabl } } - private String getConstraintsInformation(Table table) { + private String getConstraintsInformation(Table table) throws IOException { StringBuilder constraintsInfo = new StringBuilder(DEFAULT_STRINGBUILDER_SIZE); constraintsInfo.append(LINE_DELIM).append("# Constraints").append(LINE_DELIM); @@ -476,7 +481,7 @@ private String getConstraintsInformation(Table table) { } if (DefaultConstraint.isNotEmpty(table.getDefaultConstraint())) { constraintsInfo.append(LINE_DELIM).append("# Default Constraints").append(LINE_DELIM); - getDefaultConstraintsInformation(constraintsInfo, table.getDefaultConstraint()); + getDefaultConstraintsInformation(constraintsInfo, table); } if (CheckConstraint.isNotEmpty(table.getCheckConstraint())) { constraintsInfo.append(LINE_DELIM).append("# Check Constraints").append(LINE_DELIM); @@ -558,12 +563,23 @@ private void getNotNullConstraintsInformation(StringBuilder constraintsInfo, Not } } - private void getDefaultConstraintsInformation(StringBuilder constraintsInfo, DefaultConstraint constraint) { + private void getDefaultConstraintsInformation(StringBuilder constraintsInfo, Table table) throws IOException { + DefaultConstraint constraint = table.getDefaultConstraint(); formatOutput("Table:", constraint.getDatabaseName() + "." + constraint.getTableName(), constraintsInfo); Map> defaultConstraints = constraint.getDefaultConstraints(); if (MapUtils.isNotEmpty(defaultConstraints)) { + boolean isIceberg = isIcebergTable(table); + JsonNode fieldsNode = null; + if (isIceberg && table.getParameters().get("current-schema") != null) { + fieldsNode = OBJECT_MAPPER.readTree(table.getParameters().get("current-schema")).get("fields"); + } + for (Map.Entry> entry : defaultConstraints.entrySet()) { - getDefaultConstraintRelInformation(constraintsInfo, entry.getKey(), entry.getValue()); + if (isIceberg) { + getIcebergDefaultConstraintRelInformation(constraintsInfo, entry.getKey(), entry.getValue(), fieldsNode); + } else { + getDefaultConstraintRelInformation(constraintsInfo, entry.getKey(), entry.getValue()); + } } } } @@ -582,6 +598,78 @@ private void getDefaultConstraintRelInformation(StringBuilder constraintsInfo, S constraintsInfo.append(LINE_DELIM); } + private void getIcebergDefaultConstraintRelInformation(StringBuilder constraintsInfo, String constraintName, + List columns, JsonNode fieldsNode) { + formatOutput("Constraint Name:", constraintName, constraintsInfo); + if (CollectionUtils.isNotEmpty(columns)) { + for (DefaultConstraintCol column : columns) { + String[] fields = new String[3]; + fields[0] = "Column Name:" + column.colName; + fields[1] = "Initial Default Value:" + getColumnDefaults(fieldsNode, column.colName, "initial-default"); + fields[2] = "Write Default Value:" + getColumnDefaults(fieldsNode, column.colName, "write-default"); + formatOutput(fields, constraintsInfo); + } + } + constraintsInfo.append(LINE_DELIM); + } + + private String getColumnDefaults(JsonNode node, String colName, String defaultType) { + if (node == null || colName == null) { + return StringUtils.EMPTY; + } + + JsonNode targetNode = node; + if (node.isArray()) { + targetNode = null; + for (JsonNode field : node) { + if (field.has("name") && colName.equalsIgnoreCase(field.get("name").asText())) { + targetNode = field; + break; + } + } + } + + if (targetNode == null) { + return StringUtils.EMPTY; + } else if (targetNode.has(defaultType)) { + JsonNode defaultNode = targetNode.get(defaultType); + if (defaultNode.isTextual()) { + return quoteString(defaultNode.asText()); + } + return defaultNode.asText(); + } + + // In case of struct, extract defaults recursively from its nested fields + JsonNode typeNode = targetNode.get("type"); + if (typeNode != null && typeNode.isObject() && typeNode.has("type") && + "struct".equalsIgnoreCase(typeNode.get("type").asText())) { + JsonNode structFields = typeNode.get("fields"); + if (structFields != null && structFields.isArray()) { + List fieldDefaults = new ArrayList<>(); + boolean hasDefaults = false; + for (JsonNode childField : structFields) { + String childName = childField.has("name") ? childField.get("name").asText() : ""; + String childDefault = getColumnDefaults(childField, childName, defaultType); + if (childDefault != null && !childDefault.isEmpty()) { + hasDefaults = true; + fieldDefaults.add(childName + ":" + childDefault); + } else { + fieldDefaults.add(childName + ":"); + } + } + + if (hasDefaults) { + return quoteString(String.join(",", fieldDefaults)); + } + } + } + return StringUtils.EMPTY; + } + + private static String quoteString(String input) { + return "'" + input + "'"; + } + private void getCheckConstraintsInformation(StringBuilder constraintsInfo, CheckConstraint constraint) { formatOutput("Table:", constraint.getDatabaseName() + "." + constraint.getTableName(), constraintsInfo); Map> checkConstraints = constraint.getCheckConstraints();