diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 65586278e2ec..dbb5565ff239 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -1578,9 +1578,11 @@ public static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statis csList.add(cs); } if (csList.size() == engfd.getChildren().size()) { - Optional res = se.estimate(csList); + Optional res = se.estimate(csList, parentStats); if (res.isPresent()) { ColStatistics newStats = res.get(); + // NDV cannot exceed numRows + newStats.setCountDistint(Math.min(newStats.getCountDistint(), numRows)); colType = colType.toLowerCase(); newStats.setColumnType(colType); newStats.setColumnName(colName); @@ -2109,7 +2111,10 @@ private static List extractNDVGroupingColumns(List colStats for (ColStatistics cs : colStats) { if (cs != null) { long ndv = cs.getCountDistint(); - if (cs.getNumNulls() > 0) { + // NDV needs to be adjusted if a column has a known NDV along with NULL values + // or if a column happens to be "const NULL" + if ((ndv > 0 && cs.getNumNulls() > 0) || + (ndv == 0 && !cs.isEstimated() && cs.getNumNulls() == parentStats.getNumRows())) { ndv = StatsUtils.safeAdd(ndv, 1); } ndvValues.add(ndv); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java index 4de2867de7c0..48bb90820439 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java @@ -21,16 +21,26 @@ import java.util.Optional; import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.stats.StatsUtils; /** * Combines {@link ColStatistics} objects to provide the most pessimistic estimate. */ public class PessimisticStatCombiner { + private final long numRows; private boolean inited; + private boolean hasUnknownNDV; private ColStatistics result; + public PessimisticStatCombiner(long numRows) { + this.numRows = numRows; + } + public void add(ColStatistics stat) { + // NDV==0 means unknown, unless it's a NULL constant (numNulls == numRows) + hasUnknownNDV = hasUnknownNDV || (stat.getCountDistint() == 0 && stat.getNumNulls() != numRows); + if (!inited) { inited = true; result = stat.clone(); @@ -41,8 +51,10 @@ public void add(ColStatistics stat) { if (stat.getAvgColLen() > result.getAvgColLen()) { result.setAvgColLen(stat.getAvgColLen()); } - if (stat.getCountDistint() > result.getCountDistint()) { - result.setCountDistint(stat.getCountDistint()); + if (hasUnknownNDV) { + result.setCountDistint(0); + } else { + result.setCountDistint(StatsUtils.safeAdd(result.getCountDistint(), stat.getCountDistint())); } if (stat.getNumNulls() < 0 || result.getNumNulls() < 0) { result.setNumNulls(-1); @@ -63,8 +75,8 @@ public void add(ColStatistics stat) { result.setFilterColumn(); } } + public Optional getResult() { return Optional.of(result); - } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/StatEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/StatEstimator.java index 94aaa32ecfcb..80846fa24d30 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/StatEstimator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/StatEstimator.java @@ -22,6 +22,7 @@ import java.util.Optional; import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.plan.Statistics; /** * Enables statistics related computation on UDFs @@ -39,5 +40,19 @@ public interface StatEstimator { * @param argStats the statistics for every argument of the UDF * @return {@link ColStatistics} estimate for the actual UDF. */ - public Optional estimate(List argStats); + default Optional estimate(List argStats) { + throw new UnsupportedOperationException("This estimator requires parentStats"); + } + + /** + * Computes the output statistics with access to parent statistics. + * Override this method when the estimator uses more info for accurate estimation. + * + * @param argStats the statistics for every argument of the UDF + * @param parentStats statistics from the parent operator + * @return {@link ColStatistics} estimate for the actual UDF. + */ + default Optional estimate(List argStats, Statistics parentStats) { + return estimate(argStats); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java index bbca9242ecaa..1799669bda57 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressionsSupportDecimal64; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.hive.ql.stats.estimator.StatEstimator; import org.apache.hadoop.hive.ql.stats.estimator.StatEstimatorProvider; import org.apache.hadoop.hive.ql.stats.estimator.PessimisticStatCombiner; @@ -89,8 +90,8 @@ public StatEstimator getStatEstimator() { static class CoalesceStatEstimator implements StatEstimator { @Override - public Optional estimate(List argStats) { - PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + public Optional estimate(List argStats, Statistics parentStats) { + PessimisticStatCombiner combiner = new PessimisticStatCombiner(parentStats.getNumRows()); for (int i = 0; i < argStats.size(); i++) { combiner.add(argStats.get(i)); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java index eaa352317267..74bd2459debf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressionsSupportDecimal64; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.hive.ql.stats.estimator.StatEstimator; import org.apache.hadoop.hive.ql.stats.estimator.StatEstimatorProvider; import org.apache.hadoop.hive.ql.stats.estimator.PessimisticStatCombiner; @@ -187,8 +188,8 @@ public StatEstimator getStatEstimator() { static class IfStatEstimator implements StatEstimator { @Override - public Optional estimate(List argStats) { - PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + public Optional estimate(List argStats, Statistics parentStats) { + PessimisticStatCombiner combiner = new PessimisticStatCombiner(parentStats.getNumRows()); combiner.add(argStats.get(1)); combiner.add(argStats.get(2)); return combiner.getResult(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFWhen.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFWhen.java index e6d3580692d3..5dab62ab959e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFWhen.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFWhen.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.hive.ql.stats.estimator.PessimisticStatCombiner; import org.apache.hadoop.hive.ql.stats.estimator.StatEstimator; import org.apache.hadoop.hive.ql.stats.estimator.StatEstimatorProvider; @@ -143,8 +144,8 @@ public StatEstimator getStatEstimator() { static class WhenStatEstimator implements StatEstimator { @Override - public Optional estimate(List argStats) { - PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + public Optional estimate(List argStats, Statistics parentStats) { + PessimisticStatCombiner combiner = new PessimisticStatCombiner(parentStats.getNumRows()); for (int i = 1; i < argStats.size(); i += 2) { combiner.add(argStats.get(i)); } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java b/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java index 3f76c554d446..48ee3b99cc35 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java @@ -25,6 +25,7 @@ import java.lang.reflect.Field; import java.lang.reflect.Modifier; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Set; @@ -39,7 +40,14 @@ import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; import org.apache.hadoop.hive.ql.plan.ColStatistics; import org.apache.hadoop.hive.ql.plan.ColStatistics.Range; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.Statistics; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCoalesce; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIf; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde.serdeConstants; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; @@ -499,4 +507,223 @@ void testScaleColStatisticsPreservesUnknownNumFalses() { assertEquals(-1, colStats.get(0).getNumFalses(), "Unknown numFalses (-1) should be preserved after scaling"); } + // Tests for computeNDVGroupingColumns / extractNDVGroupingColumns + + @Test + void testComputeNDVGroupingColumnsSourceColumnWithNulls() { + Statistics parentStats = new Statistics(1000, 8000, 0, 0); + parentStats.setColumnStatsState(Statistics.State.COMPLETE); + + ColStatistics cs = new ColStatistics("col1", "string"); + cs.setCountDistint(100); + cs.setNumNulls(50); + cs.setIsEstimated(false); // source column + + long ndv = StatsUtils.computeNDVGroupingColumns(Arrays.asList(cs), parentStats, false); + assertEquals(101, ndv, "Source column with nulls should get +1 for NULL: 100 + 1 = 101"); + } + + @Test + void testComputeNDVGroupingColumnsSourceColumnNoNulls() { + Statistics parentStats = new Statistics(1000, 8000, 0, 0); + parentStats.setColumnStatsState(Statistics.State.COMPLETE); + + ColStatistics cs = new ColStatistics("col1", "string"); + cs.setCountDistint(100); + cs.setNumNulls(0); + cs.setIsEstimated(false); + + long ndv = StatsUtils.computeNDVGroupingColumns(Arrays.asList(cs), parentStats, false); + assertEquals(100, ndv, "Source column without nulls should not get +1"); + } + + @Test + void testComputeNDVGroupingColumnsEstimatedExpression() { + Statistics parentStats = new Statistics(1000, 8000, 0, 0); + parentStats.setColumnStatsState(Statistics.State.COMPLETE); + + ColStatistics cs = new ColStatistics("case_expr", "string"); + cs.setCountDistint(3); + cs.setNumNulls(500); + cs.setIsEstimated(true); // computed expression (e.g., CASE) + + long ndv = StatsUtils.computeNDVGroupingColumns(Arrays.asList(cs), parentStats, false); + assertEquals(4, ndv, "NDV with nulls: 3 + 1 = 4"); + } + + @Test + void testComputeNDVGroupingColumnsAllNullColumn() { + Statistics parentStats = new Statistics(1000, 8000, 0, 0); + parentStats.setColumnStatsState(Statistics.State.COMPLETE); + + ColStatistics cs = new ColStatistics("col1", "string"); + cs.setCountDistint(0); + cs.setNumNulls(1000); // all rows are NULL + cs.setIsEstimated(false); + + long ndv = StatsUtils.computeNDVGroupingColumns(Arrays.asList(cs), parentStats, false); + assertEquals(1, ndv, "All-NULL column: NDV=0 but numNulls==numRows, so NDV becomes 1"); + } + + @Test + void testComputeNDVGroupingColumnsAllNullEstimatedColumn() { + Statistics parentStats = new Statistics(1000, 8000, 0, 0); + parentStats.setColumnStatsState(Statistics.State.COMPLETE); + + ColStatistics cs = new ColStatistics("case_expr", "string"); + cs.setCountDistint(0); + cs.setNumNulls(1000); // all rows are NULL + cs.setIsEstimated(true); // from expression like CASE + + long ndv = StatsUtils.computeNDVGroupingColumns(Arrays.asList(cs), parentStats, false); + assertEquals(0, ndv, "Estimated all-NULL column: NDV stays 0 (unknown from combiner)"); + } + + @Test + void testComputeNDVGroupingColumnsUnknownNdv() { + Statistics parentStats = new Statistics(1000, 8000, 0, 0); + parentStats.setColumnStatsState(Statistics.State.COMPLETE); + + ColStatistics cs = new ColStatistics("col1", "string"); + cs.setCountDistint(0); // unknown NDV + cs.setNumNulls(50); + cs.setIsEstimated(false); + + long ndv = StatsUtils.computeNDVGroupingColumns(Arrays.asList(cs), parentStats, false); + assertEquals(0, ndv, "Unknown NDV (0) should NOT get +1 to avoid false precision"); + } + + @Test + void testComputeNDVGroupingColumnsMultipleColumns() { + Statistics parentStats = new Statistics(1000, 8000, 0, 0); + parentStats.setColumnStatsState(Statistics.State.COMPLETE); + + ColStatistics cs1 = new ColStatistics("col1", "string"); + cs1.setCountDistint(10); + cs1.setNumNulls(50); + cs1.setIsEstimated(false); + + ColStatistics cs2 = new ColStatistics("col2", "int"); + cs2.setCountDistint(5); + cs2.setNumNulls(0); + cs2.setIsEstimated(false); + + long ndv = StatsUtils.computeNDVGroupingColumns(Arrays.asList(cs1, cs2), parentStats, false); + // col1: 10 + 1 = 11 (has nulls), col2: 5 (no nulls) + // Product: 11 * 5 = 55 + assertEquals(55, ndv, "Product of NDVs: (10+1) * 5 = 55"); + } + + @Test + void testComputeNDVGroupingColumnsMixedEstimatedAndSource() { + Statistics parentStats = new Statistics(1000, 8000, 0, 0); + parentStats.setColumnStatsState(Statistics.State.COMPLETE); + + ColStatistics sourceCol = new ColStatistics("col1", "string"); + sourceCol.setCountDistint(10); + sourceCol.setNumNulls(50); + sourceCol.setIsEstimated(false); // source: gets +1 + + ColStatistics caseExpr = new ColStatistics("case_expr", "string"); + caseExpr.setCountDistint(3); + caseExpr.setNumNulls(200); + caseExpr.setIsEstimated(true); // estimated: no +1 + + long ndv = StatsUtils.computeNDVGroupingColumns(Arrays.asList(sourceCol, caseExpr), parentStats, false); + // sourceCol: 10 + 1 = 11, caseExpr: 3 + 1 = 4 + // Product: 11 * 4 = 44 + assertEquals(44, ndv, "Mixed columns: (10+1) * (3+1) = 44"); + } + + @Test + void testGetColStatisticsFromExpressionNdvCappedAtNumRows() { + Statistics parentStats = new Statistics(100, 800, 0, 0); + + ColStatistics col1Stats = new ColStatistics("col1", "string"); + col1Stats.setCountDistint(80); + col1Stats.setNumNulls(0); + col1Stats.setAvgColLen(10); + + ColStatistics col2Stats = new ColStatistics("col2", "string"); + col2Stats.setCountDistint(80); + col2Stats.setNumNulls(0); + col2Stats.setAvgColLen(10); + + parentStats.setColumnStats(Arrays.asList(col1Stats, col2Stats)); + + GenericUDFIf udfIf = new GenericUDFIf(); + ExprNodeConstantDesc condExpr = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, true); + ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "col1", "t", false); + ExprNodeColumnDesc col2Expr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "col2", "t", false); + + ExprNodeGenericFuncDesc ifExpr = new ExprNodeGenericFuncDesc( + TypeInfoFactory.stringTypeInfo, udfIf, "if", + Arrays.asList(condExpr, col1Expr, col2Expr)); + + ColStatistics result = StatsUtils.getColStatisticsFromExpression(new HiveConf(), parentStats, ifExpr); + + assertNotNull(result); + assertEquals(100, result.getCountDistint(), "NDV should be capped at numRows (100), not 160"); + } + + @Test + void testGetColStatisticsFromExpressionWhenNdvCapped() { + Statistics parentStats = new Statistics(100, 800, 0, 0); + + ColStatistics col1Stats = new ColStatistics("col1", "string"); + col1Stats.setCountDistint(60); + col1Stats.setNumNulls(0); + col1Stats.setAvgColLen(10); + + ColStatistics col2Stats = new ColStatistics("col2", "string"); + col2Stats.setCountDistint(70); + col2Stats.setNumNulls(0); + col2Stats.setAvgColLen(10); + + parentStats.setColumnStats(Arrays.asList(col1Stats, col2Stats)); + + GenericUDFWhen udfWhen = new GenericUDFWhen(); + ExprNodeConstantDesc condExpr = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, true); + ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "col1", "t", false); + ExprNodeColumnDesc col2Expr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "col2", "t", false); + + ExprNodeGenericFuncDesc whenExpr = new ExprNodeGenericFuncDesc( + TypeInfoFactory.stringTypeInfo, udfWhen, "when", + Arrays.asList(condExpr, col1Expr, col2Expr)); + + ColStatistics result = StatsUtils.getColStatisticsFromExpression(new HiveConf(), parentStats, whenExpr); + + assertNotNull(result); + assertEquals(100, result.getCountDistint(), "NDV should be capped at numRows (100), not 130"); + } + + @Test + void testGetColStatisticsFromExpressionCoalesceNdvCapped() { + Statistics parentStats = new Statistics(100, 800, 0, 0); + + ColStatistics col1Stats = new ColStatistics("col1", "string"); + col1Stats.setCountDistint(50); + col1Stats.setNumNulls(20); + col1Stats.setAvgColLen(10); + + ColStatistics col2Stats = new ColStatistics("col2", "string"); + col2Stats.setCountDistint(80); + col2Stats.setNumNulls(10); + col2Stats.setAvgColLen(10); + + parentStats.setColumnStats(Arrays.asList(col1Stats, col2Stats)); + + GenericUDFCoalesce udfCoalesce = new GenericUDFCoalesce(); + ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "col1", "t", false); + ExprNodeColumnDesc col2Expr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "col2", "t", false); + + ExprNodeGenericFuncDesc coalesceExpr = new ExprNodeGenericFuncDesc( + TypeInfoFactory.stringTypeInfo, udfCoalesce, "coalesce", + Arrays.asList(col1Expr, col2Expr)); + + ColStatistics result = StatsUtils.getColStatisticsFromExpression(new HiveConf(), parentStats, coalesceExpr); + + assertNotNull(result); + assertEquals(100, result.getCountDistint(), "NDV should be capped at numRows (100), not 130"); + } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestPessimisticStatCombiner.java b/ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestPessimisticStatCombiner.java index 98bc589e40d3..281e7b82c27a 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestPessimisticStatCombiner.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestPessimisticStatCombiner.java @@ -25,12 +25,51 @@ class TestPessimisticStatCombiner { + @Test + void testNdvSumWhenBothKnown() { + ColStatistics stat1 = createStat("col1", "int", 50, 0, 4.0); + ColStatistics stat2 = createStat("col2", "int", 30, 0, 4.0); + + PessimisticStatCombiner combiner = new PessimisticStatCombiner(1000); + combiner.add(stat1); + combiner.add(stat2); + + ColStatistics result = combiner.getResult().get(); + assertEquals(80, result.getCountDistint(), "NDV should be summed: 50 + 30 = 80"); + } + + @Test + void testNdvUnknownPropagatedFromFirst() { + ColStatistics stat1 = createStat("col1", "int", 0, 0, 4.0); + ColStatistics stat2 = createStat("col2", "int", 100, 0, 4.0); + + PessimisticStatCombiner combiner = new PessimisticStatCombiner(1000); + combiner.add(stat1); + combiner.add(stat2); + + ColStatistics result = combiner.getResult().get(); + assertEquals(0, result.getCountDistint(), "Unknown NDV (0) should propagate"); + } + + @Test + void testNdvUnknownPropagatedFromSecond() { + ColStatistics stat1 = createStat("col1", "int", 100, 0, 4.0); + ColStatistics stat2 = createStat("col2", "int", 0, 0, 4.0); + + PessimisticStatCombiner combiner = new PessimisticStatCombiner(1000); + combiner.add(stat1); + combiner.add(stat2); + + ColStatistics result = combiner.getResult().get(); + assertEquals(0, result.getCountDistint(), "Unknown NDV (0) should propagate"); + } + @Test void testCombinePropagatesUnknownNumNullsFromFirst() { ColStatistics stat1 = createStat("col1", "int", 50, -1, 4.0); // unknown numNulls ColStatistics stat2 = createStat("col2", "int", 30, 100, 4.0); - PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + PessimisticStatCombiner combiner = new PessimisticStatCombiner(1000); combiner.add(stat1); combiner.add(stat2); @@ -43,7 +82,7 @@ void testCombinePropagatesUnknownNumNullsFromSecond() { ColStatistics stat1 = createStat("col1", "int", 50, 100, 4.0); ColStatistics stat2 = createStat("col2", "int", 30, -1, 4.0); // unknown numNulls - PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + PessimisticStatCombiner combiner = new PessimisticStatCombiner(1000); combiner.add(stat1); combiner.add(stat2); @@ -61,7 +100,7 @@ void testCombinePropagatesUnknownNumTruesFromFirst() { stat2.setNumTrues(100); stat2.setNumFalses(150); - PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + PessimisticStatCombiner combiner = new PessimisticStatCombiner(1000); combiner.add(stat1); combiner.add(stat2); @@ -79,7 +118,7 @@ void testCombinePropagatesUnknownNumTruesFromSecond() { stat2.setNumTrues(-1); // unknown stat2.setNumFalses(150); - PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + PessimisticStatCombiner combiner = new PessimisticStatCombiner(1000); combiner.add(stat1); combiner.add(stat2); @@ -97,7 +136,7 @@ void testCombinePropagatesUnknownNumFalsesFromFirst() { stat2.setNumTrues(50); stat2.setNumFalses(150); - PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + PessimisticStatCombiner combiner = new PessimisticStatCombiner(1000); combiner.add(stat1); combiner.add(stat2); @@ -115,7 +154,7 @@ void testCombinePropagatesUnknownNumFalsesFromSecond() { stat2.setNumTrues(50); stat2.setNumFalses(-1); // unknown - PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + PessimisticStatCombiner combiner = new PessimisticStatCombiner(1000); combiner.add(stat1); combiner.add(stat2); @@ -128,7 +167,7 @@ void testCombineBothUnknownNumNulls() { ColStatistics stat1 = createStat("col1", "int", 50, -1, 4.0); ColStatistics stat2 = createStat("col2", "int", 30, -1, 4.0); - PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + PessimisticStatCombiner combiner = new PessimisticStatCombiner(1000); combiner.add(stat1); combiner.add(stat2); @@ -146,7 +185,7 @@ void testCombineBothUnknownNumTruesAndNumFalses() { stat2.setNumTrues(-1); stat2.setNumFalses(-1); - PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + PessimisticStatCombiner combiner = new PessimisticStatCombiner(1000); combiner.add(stat1); combiner.add(stat2); @@ -155,6 +194,63 @@ void testCombineBothUnknownNumTruesAndNumFalses() { assertEquals(-1, combined.getNumFalses(), "Both unknown should result in unknown (-1)"); } + @Test + void testNullConstantDoesNotContributeToNdv() { + long numRows = 100; + ColStatistics nullConstant = createStat("null", "int", 0, numRows, 0.0); + ColStatistics regularStat = createStat("col", "int", 50, 10, 4.0); + + PessimisticStatCombiner combiner = new PessimisticStatCombiner(numRows); + combiner.add(nullConstant); + combiner.add(regularStat); + + ColStatistics result = combiner.getResult().get(); + assertEquals(50, result.getCountDistint(), "NULL constant should not contribute to NDV"); + } + + @Test + void testNullConstantAsSecondDoesNotContributeToNdv() { + long numRows = 100; + ColStatistics regularStat = createStat("col", "int", 50, 10, 4.0); + ColStatistics nullConstant = createStat("null", "int", 0, numRows, 0.0); + + PessimisticStatCombiner combiner = new PessimisticStatCombiner(numRows); + combiner.add(regularStat); + combiner.add(nullConstant); + + ColStatistics result = combiner.getResult().get(); + assertEquals(50, result.getCountDistint(), "NULL constant should not contribute to NDV"); + } + + @Test + void testMultipleNullConstantsResultInZeroNdv() { + long numRows = 100; + ColStatistics nullConstant1 = createStat("null1", "int", 0, numRows, 0.0); + ColStatistics nullConstant2 = createStat("null2", "int", 0, numRows, 0.0); + + PessimisticStatCombiner combiner = new PessimisticStatCombiner(numRows); + combiner.add(nullConstant1); + combiner.add(nullConstant2); + + ColStatistics result = combiner.getResult().get(); + assertEquals(0, result.getCountDistint(), "Multiple NULL constants should result in NDV=0"); + assertEquals(numRows, result.getNumNulls(), "numNulls should be numRows"); + } + + @Test + void testUnknownNdvNotConfusedWithNullConstant() { + long numRows = 100; + ColStatistics unknownNdv = createStat("col", "int", 0, 10, 4.0); + ColStatistics regularStat = createStat("col2", "int", 50, 5, 4.0); + + PessimisticStatCombiner combiner = new PessimisticStatCombiner(numRows); + combiner.add(unknownNdv); + combiner.add(regularStat); + + ColStatistics result = combiner.getResult().get(); + assertEquals(0, result.getCountDistint(), "Unknown NDV should propagate as 0"); + } + private ColStatistics createStat(String name, String type, long ndv, long numNulls, double avgColLen) { ColStatistics stat = new ColStatistics(name, type); stat.setCountDistint(ndv); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestStatEstimator.java b/ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestStatEstimator.java new file mode 100644 index 000000000000..7fd715f4a98d --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestStatEstimator.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.stats.estimator; + +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.util.Arrays; +import java.util.List; + +import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.plan.Statistics; +import org.junit.jupiter.api.Test; + +class TestStatEstimator { + + @Test + void testDefaultEstimateThrowsUnsupportedOperation() { + StatEstimator estimator = new StatEstimator() {}; + List argStats = Arrays.asList(new ColStatistics("col", "int")); + + assertThrows(UnsupportedOperationException.class, () -> estimator.estimate(argStats), + "Default estimate(argStats) should throw UnsupportedOperationException"); + } + + @Test + void testDefaultEstimateWithParentStatsCallsEstimate() { + StatEstimator estimator = new StatEstimator() {}; + List argStats = Arrays.asList(new ColStatistics("col", "int")); + Statistics parentStats = new Statistics(100, 800, 0, 0); + + assertThrows(UnsupportedOperationException.class, () -> estimator.estimate(argStats, parentStats), + "Default estimate(argStats, parentStats) should delegate to estimate(argStats) which throws"); + } +} diff --git a/ql/src/test/queries/clientpositive/ndv_case_const.q b/ql/src/test/queries/clientpositive/ndv_case_const.q new file mode 100644 index 000000000000..7132e163aa13 --- /dev/null +++ b/ql/src/test/queries/clientpositive/ndv_case_const.q @@ -0,0 +1,40 @@ +-- Tests for CASE expression NDV estimation in Group By Operator. +-- Verifies that "Statistics: Num rows" reflects accurate NDV computation +-- when CASE branches contain constants, NULLs, and column references. +CREATE TABLE t (cond INT, c2 STRING, c100 STRING); +ALTER TABLE t UPDATE STATISTICS SET('numRows'='10000','rawDataSize'='1000000'); +ALTER TABLE t UPDATE STATISTICS FOR COLUMN cond SET('numDVs'='10','numNulls'='0'); +ALTER TABLE t UPDATE STATISTICS FOR COLUMN c2 SET('numDVs'='2','numNulls'='0','avgColLen'='5','maxColLen'='10'); +ALTER TABLE t UPDATE STATISTICS FOR COLUMN c100 SET('numDVs'='100','numNulls'='0','avgColLen'='5','maxColLen'='10'); + +EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' ELSE 'C' END x FROM t) sub GROUP BY x; + +EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' WHEN cond=3 THEN 'A' ELSE 'B' END x FROM t) sub GROUP BY x; + +EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' ELSE NULL END x FROM t) sub GROUP BY x; + +EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN NULL WHEN cond=2 THEN 'A' ELSE 'B' END x FROM t) sub GROUP BY x; + +EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN NULL WHEN cond=2 THEN NULL WHEN cond=3 THEN 'A' ELSE 'B' END x FROM t) sub GROUP BY x; + +EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN NULL WHEN cond=2 THEN 'A' WHEN cond=3 THEN NULL ELSE 'B' END x FROM t) sub GROUP BY x; + +EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN NULL WHEN cond=2 THEN NULL WHEN cond=3 THEN c100 ELSE 'A' END x FROM t) sub GROUP BY x; + +EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN c2 WHEN cond=2 THEN c100 ELSE 'A' END x FROM t) sub GROUP BY x; + +EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN c2 WHEN cond=2 THEN c100 ELSE c2 END x FROM t) sub GROUP BY x; + +EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' WHEN cond=3 THEN 'C' ELSE c2 END x FROM t) sub GROUP BY x; + +EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' ELSE c100 END x FROM t) sub GROUP BY x; + +-- Test NDV cap: sum of branch NDVs (100+100+100+1=301) exceeds numRows (200) +CREATE TABLE t_small (cond INT, c100a STRING, c100b STRING, c100c STRING); +ALTER TABLE t_small UPDATE STATISTICS SET('numRows'='200','rawDataSize'='20000'); +ALTER TABLE t_small UPDATE STATISTICS FOR COLUMN cond SET('numDVs'='10','numNulls'='0'); +ALTER TABLE t_small UPDATE STATISTICS FOR COLUMN c100a SET('numDVs'='100','numNulls'='0','avgColLen'='5','maxColLen'='10'); +ALTER TABLE t_small UPDATE STATISTICS FOR COLUMN c100b SET('numDVs'='100','numNulls'='0','avgColLen'='5','maxColLen'='10'); +ALTER TABLE t_small UPDATE STATISTICS FOR COLUMN c100c SET('numDVs'='100','numNulls'='0','avgColLen'='5','maxColLen'='10'); + +EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN c100a WHEN cond=2 THEN c100b WHEN cond=3 THEN c100c ELSE 'A' END x FROM t_small) sub GROUP BY x; diff --git a/ql/src/test/results/clientpositive/llap/infer_bucket_sort_dyn_part.q.out b/ql/src/test/results/clientpositive/llap/infer_bucket_sort_dyn_part.q.out index 995733564a08..17db16415c01 100644 --- a/ql/src/test/results/clientpositive/llap/infer_bucket_sort_dyn_part.q.out +++ b/ql/src/test/results/clientpositive/llap/infer_bucket_sort_dyn_part.q.out @@ -492,13 +492,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Reducer 3 Execution mode: vectorized, llap @@ -508,14 +508,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_6.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_6.q.out index dbcf49b202e7..4f4a0b3df537 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_6.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_6.q.out @@ -96,7 +96,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -104,7 +104,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true @@ -199,18 +199,18 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -461,7 +461,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -469,7 +469,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true @@ -564,18 +564,18 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_7.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_7.q.out index ad7051398156..6e45676ba107 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_7.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_7.q.out @@ -96,7 +96,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -104,7 +104,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true @@ -199,18 +199,18 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -461,7 +461,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -469,7 +469,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true @@ -564,18 +564,18 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_8.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_8.q.out index 148303926d66..4e5651cccc53 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_8.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_8.q.out @@ -96,7 +96,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -104,7 +104,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true @@ -199,18 +199,18 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition4.q.out b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition4.q.out index 2c9c9015c173..85f1ea93c068 100644 --- a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition4.q.out +++ b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition4.q.out @@ -180,13 +180,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Execution mode: llap LLAP IO: no inputs @@ -198,14 +198,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition5.q.out b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition5.q.out index 5b1e537b938a..ab9805c19485 100644 --- a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition5.q.out +++ b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition5.q.out @@ -156,13 +156,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Execution mode: llap LLAP IO: no inputs @@ -174,14 +174,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/ndv_case_const.q.out b/ql/src/test/results/clientpositive/llap/ndv_case_const.q.out new file mode 100644 index 000000000000..a25b1e35b17d --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/ndv_case_const.q.out @@ -0,0 +1,868 @@ +PREHOOK: query: CREATE TABLE t (cond INT, c2 STRING, c100 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: CREATE TABLE t (cond INT, c2 STRING, c100 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: ALTER TABLE t UPDATE STATISTICS SET('numRows'='10000','rawDataSize'='1000000') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t +PREHOOK: Output: default@t +POSTHOOK: query: ALTER TABLE t UPDATE STATISTICS SET('numRows'='10000','rawDataSize'='1000000') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t +POSTHOOK: Output: default@t +PREHOOK: query: ALTER TABLE t UPDATE STATISTICS FOR COLUMN cond SET('numDVs'='10','numNulls'='0') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t +PREHOOK: Output: default@t +POSTHOOK: query: ALTER TABLE t UPDATE STATISTICS FOR COLUMN cond SET('numDVs'='10','numNulls'='0') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t +POSTHOOK: Output: default@t +PREHOOK: query: ALTER TABLE t UPDATE STATISTICS FOR COLUMN c2 SET('numDVs'='2','numNulls'='0','avgColLen'='5','maxColLen'='10') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t +PREHOOK: Output: default@t +POSTHOOK: query: ALTER TABLE t UPDATE STATISTICS FOR COLUMN c2 SET('numDVs'='2','numNulls'='0','avgColLen'='5','maxColLen'='10') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t +POSTHOOK: Output: default@t +PREHOOK: query: ALTER TABLE t UPDATE STATISTICS FOR COLUMN c100 SET('numDVs'='100','numNulls'='0','avgColLen'='5','maxColLen'='10') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t +PREHOOK: Output: default@t +POSTHOOK: query: ALTER TABLE t UPDATE STATISTICS FOR COLUMN c100 SET('numDVs'='100','numNulls'='0','avgColLen'='5','maxColLen'='10') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t +POSTHOOK: Output: default@t +PREHOOK: query: EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' ELSE 'C' END x FROM t) sub GROUP BY x +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' ELSE 'C' END x FROM t) sub GROUP BY x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN ((cond = 1)) THEN ('A') WHEN ((cond = 2)) THEN ('B') ELSE ('C') END (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' WHEN cond=3 THEN 'A' ELSE 'B' END x FROM t) sub GROUP BY x +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' WHEN cond=3 THEN 'A' ELSE 'B' END x FROM t) sub GROUP BY x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN ((cond = 1)) THEN ('A') WHEN ((cond = 2)) THEN ('B') WHEN ((cond = 3)) THEN ('A') ELSE ('B') END (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' ELSE NULL END x FROM t) sub GROUP BY x +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' ELSE NULL END x FROM t) sub GROUP BY x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN ((cond = 1)) THEN ('A') WHEN ((cond = 2)) THEN ('B') ELSE (null) END (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN NULL WHEN cond=2 THEN 'A' ELSE 'B' END x FROM t) sub GROUP BY x +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN NULL WHEN cond=2 THEN 'A' ELSE 'B' END x FROM t) sub GROUP BY x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN ((cond = 1)) THEN (null) WHEN ((cond = 2)) THEN ('A') ELSE ('B') END (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN NULL WHEN cond=2 THEN NULL WHEN cond=3 THEN 'A' ELSE 'B' END x FROM t) sub GROUP BY x +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN NULL WHEN cond=2 THEN NULL WHEN cond=3 THEN 'A' ELSE 'B' END x FROM t) sub GROUP BY x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN ((cond) IN (1, 2)) THEN (null) WHEN ((cond = 3)) THEN ('A') ELSE ('B') END (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN NULL WHEN cond=2 THEN 'A' WHEN cond=3 THEN NULL ELSE 'B' END x FROM t) sub GROUP BY x +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN NULL WHEN cond=2 THEN 'A' WHEN cond=3 THEN NULL ELSE 'B' END x FROM t) sub GROUP BY x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN ((cond = 1)) THEN (null) WHEN ((cond = 2)) THEN ('A') WHEN ((cond = 3)) THEN (null) ELSE ('B') END (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN NULL WHEN cond=2 THEN NULL WHEN cond=3 THEN c100 ELSE 'A' END x FROM t) sub GROUP BY x +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN NULL WHEN cond=2 THEN NULL WHEN cond=3 THEN c100 ELSE 'A' END x FROM t) sub GROUP BY x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 930000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN ((cond) IN (1, 2)) THEN (null) WHEN ((cond = 3)) THEN (c100) ELSE ('A') END (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10000 Data size: 930000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.9898 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 102 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 102 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 102 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 102 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN c2 WHEN cond=2 THEN c100 ELSE 'A' END x FROM t) sub GROUP BY x +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN c2 WHEN cond=2 THEN c100 ELSE 'A' END x FROM t) sub GROUP BY x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 1820000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN ((cond = 1)) THEN (c2) WHEN ((cond = 2)) THEN (c100) ELSE ('A') END (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10000 Data size: 1820000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.9897 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 103 Data size: 9167 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 103 Data size: 9167 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 103 Data size: 9167 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 103 Data size: 9167 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN c2 WHEN cond=2 THEN c100 ELSE c2 END x FROM t) sub GROUP BY x +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN c2 WHEN cond=2 THEN c100 ELSE c2 END x FROM t) sub GROUP BY x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 1820000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN ((cond = 1)) THEN (c2) WHEN ((cond = 2)) THEN (c100) ELSE (c2) END (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10000 Data size: 1820000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.9896 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 104 Data size: 9256 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 104 Data size: 9256 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 104 Data size: 9256 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 104 Data size: 9256 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' WHEN cond=3 THEN 'C' ELSE c2 END x FROM t) sub GROUP BY x +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' WHEN cond=3 THEN 'C' ELSE c2 END x FROM t) sub GROUP BY x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 930000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN ((cond = 1)) THEN ('A') WHEN ((cond = 2)) THEN ('B') WHEN ((cond = 3)) THEN ('C') ELSE (c2) END (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10000 Data size: 930000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 445 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 445 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 445 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 445 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' ELSE c100 END x FROM t) sub GROUP BY x +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' ELSE c100 END x FROM t) sub GROUP BY x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 930000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN ((cond = 1)) THEN ('A') WHEN ((cond = 2)) THEN ('B') ELSE (c100) END (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10000 Data size: 930000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.9898 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 102 Data size: 9078 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 102 Data size: 9078 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 102 Data size: 9078 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 102 Data size: 9078 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: CREATE TABLE t_small (cond INT, c100a STRING, c100b STRING, c100c STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t_small +POSTHOOK: query: CREATE TABLE t_small (cond INT, c100a STRING, c100b STRING, c100c STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t_small +PREHOOK: query: ALTER TABLE t_small UPDATE STATISTICS SET('numRows'='200','rawDataSize'='20000') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t_small +PREHOOK: Output: default@t_small +POSTHOOK: query: ALTER TABLE t_small UPDATE STATISTICS SET('numRows'='200','rawDataSize'='20000') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t_small +POSTHOOK: Output: default@t_small +PREHOOK: query: ALTER TABLE t_small UPDATE STATISTICS FOR COLUMN cond SET('numDVs'='10','numNulls'='0') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t_small +PREHOOK: Output: default@t_small +POSTHOOK: query: ALTER TABLE t_small UPDATE STATISTICS FOR COLUMN cond SET('numDVs'='10','numNulls'='0') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t_small +POSTHOOK: Output: default@t_small +PREHOOK: query: ALTER TABLE t_small UPDATE STATISTICS FOR COLUMN c100a SET('numDVs'='100','numNulls'='0','avgColLen'='5','maxColLen'='10') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t_small +PREHOOK: Output: default@t_small +POSTHOOK: query: ALTER TABLE t_small UPDATE STATISTICS FOR COLUMN c100a SET('numDVs'='100','numNulls'='0','avgColLen'='5','maxColLen'='10') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t_small +POSTHOOK: Output: default@t_small +PREHOOK: query: ALTER TABLE t_small UPDATE STATISTICS FOR COLUMN c100b SET('numDVs'='100','numNulls'='0','avgColLen'='5','maxColLen'='10') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t_small +PREHOOK: Output: default@t_small +POSTHOOK: query: ALTER TABLE t_small UPDATE STATISTICS FOR COLUMN c100b SET('numDVs'='100','numNulls'='0','avgColLen'='5','maxColLen'='10') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t_small +POSTHOOK: Output: default@t_small +PREHOOK: query: ALTER TABLE t_small UPDATE STATISTICS FOR COLUMN c100c SET('numDVs'='100','numNulls'='0','avgColLen'='5','maxColLen'='10') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t_small +PREHOOK: Output: default@t_small +POSTHOOK: query: ALTER TABLE t_small UPDATE STATISTICS FOR COLUMN c100c SET('numDVs'='100','numNulls'='0','avgColLen'='5','maxColLen'='10') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t_small +POSTHOOK: Output: default@t_small +PREHOOK: query: EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN c100a WHEN cond=2 THEN c100b WHEN cond=3 THEN c100c ELSE 'A' END x FROM t_small) sub GROUP BY x +PREHOOK: type: QUERY +PREHOOK: Input: default@t_small +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT x FROM (SELECT CASE WHEN cond=1 THEN c100a WHEN cond=2 THEN c100b WHEN cond=3 THEN c100c ELSE 'A' END x FROM t_small) sub GROUP BY x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_small +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t_small + Statistics: Num rows: 200 Data size: 54200 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN ((cond = 1)) THEN (c100a) WHEN ((cond = 2)) THEN (c100b) WHEN ((cond = 3)) THEN (c100c) ELSE ('A') END (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 200 Data size: 54200 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 200 Data size: 17800 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 200 Data size: 17800 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 200 Data size: 17800 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 200 Data size: 17800 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_13.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_13.q.out index 7125704c33d2..2a223991fe36 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_13.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_13.q.out @@ -130,7 +130,7 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 1386 Data size: 194258 Basic stats: COMPLETE Column stats: COMPLETE @@ -487,7 +487,7 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 1386 Data size: 194258 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_14.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_14.q.out index 5acc12c3b71d..12c88df0aebf 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_14.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_14.q.out @@ -120,7 +120,7 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7] keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 758 Data size: 130530 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_15.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_15.q.out index d7d3f4919183..3653c9466248 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_15.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_15.q.out @@ -116,7 +116,7 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 Statistics: Num rows: 6144 Data size: 1216372 Basic stats: COMPLETE Column stats: COMPLETE @@ -154,16 +154,16 @@ STAGE PLANS: keys: KEY._col0 (type: float), KEY._col1 (type: boolean), KEY._col2 (type: double), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int), KEY._col6 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 6144 Data size: 1216372 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6104 Data size: 1208432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), power(((_col7 - ((_col8 * _col8) / _col9)) / if((_col9 = 1L), null, (_col9 - 1))), 0.5) (type: double), (-26.28 - CAST( _col5 AS decimal(10,0))) (type: decimal(13,2)), _col10 (type: double), (_col2 * 79.553D) (type: double), (33.0 % _col0) (type: float), power(((_col11 - ((_col12 * _col12) / _col13)) / if((_col13 = 1L), null, (_col13 - 1))), 0.5) (type: double), ((_col11 - ((_col12 * _col12) / _col13)) / _col13) (type: double), (-23.0D % _col2) (type: double), (- _col4) (type: tinyint), ((_col14 - ((_col15 * _col15) / _col16)) / if((_col16 = 1L), null, (_col16 - 1))) (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - CAST( _col5 AS decimal(10,0)))) (type: decimal(13,2)), power(((_col14 - ((_col15 * _col15) / _col16)) / _col16), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6104 Data size: 2575728 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) null sort order: zzzzzzz sort order: +++++++ - Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6104 Data size: 2575728 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col7 (type: double), _col8 (type: decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 (type: int), _col19 (type: decimal(13,2)), _col20 (type: double) Reducer 3 Execution mode: llap @@ -175,10 +175,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: tinyint), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: timestamp), VALUE._col0 (type: double), VALUE._col1 (type: decimal(13,2)), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: tinyint), VALUE._col9 (type: double), VALUE._col10 (type: float), VALUE._col11 (type: int), VALUE._col12 (type: decimal(13,2)), VALUE._col13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6104 Data size: 2575728 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6104 Data size: 2575728 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_16.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_16.q.out index eeab9c89af72..a457b27af643 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_16.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_16.q.out @@ -93,7 +93,7 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 5979 Data size: 825318 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_9.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_9.q.out index eeab9c89af72..a457b27af643 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_9.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_9.q.out @@ -93,7 +93,7 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 5979 Data size: 825318 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/scratch_col_issue.q.out b/ql/src/test/results/clientpositive/llap/scratch_col_issue.q.out index 5418ef860de3..1e23944fcaf7 100644 --- a/ql/src/test/results/clientpositive/llap/scratch_col_issue.q.out +++ b/ql/src/test/results/clientpositive/llap/scratch_col_issue.q.out @@ -189,7 +189,7 @@ STAGE PLANS: outputColumnNames: _col1, _col2 input vertices: 1 Map 2 - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: if((_col1) IN ('CertificateOfDeposit', 'RecurringDeposit', 'TermDeposit'), COALESCE(from_unixtime(to_unix_timestamp(CAST( _col2 AS DATE)), 'MM-dd-yyyy'),' '), '') (type: string) outputColumnNames: _col0 @@ -198,13 +198,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [14] selectExpressions: IfExprCondExprColumn(col 9:boolean, col 13:string, col 5:string)(children: StringColumnInList(col 1, values CertificateOfDeposit, RecurringDeposit, TermDeposit) -> 9:boolean, VectorCoalesce(columns [5, 12])(children: VectorUDFAdaptor(from_unixtime(to_unix_timestamp(CAST( _col2 AS DATE)), 'MM-dd-yyyy'))(children: VectorUDFUnixTimeStampDate(col 10)(children: CastStringToDate(col 2:string) -> 10:date) -> 11:bigint) -> 5:string, ConstantVectorExpression(val ) -> 12:string) -> 13:string, ConstantVectorExpression(val ) -> 5:string) -> 14:string - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorization_13.q.out b/ql/src/test/results/clientpositive/llap/vectorization_13.q.out index d1911fdb7f8b..0e96fffb2c09 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_13.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_13.q.out @@ -131,7 +131,7 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 1386 Data size: 194258 Basic stats: COMPLETE Column stats: COMPLETE @@ -511,7 +511,7 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 1386 Data size: 194258 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/vectorization_14.q.out b/ql/src/test/results/clientpositive/llap/vectorization_14.q.out index 25bfeb19bfcf..62feb4c66cff 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_14.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_14.q.out @@ -121,7 +121,7 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7] keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 758 Data size: 130530 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/vectorization_15.q.out b/ql/src/test/results/clientpositive/llap/vectorization_15.q.out index 6732aba7edd2..8c20b9f5fca7 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_15.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_15.q.out @@ -117,7 +117,7 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 Statistics: Num rows: 6144 Data size: 1216372 Basic stats: COMPLETE Column stats: COMPLETE @@ -163,16 +163,16 @@ STAGE PLANS: keys: KEY._col0 (type: float), KEY._col1 (type: boolean), KEY._col2 (type: double), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int), KEY._col6 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 6144 Data size: 1216372 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6104 Data size: 1208432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), power(((_col7 - ((_col8 * _col8) / _col9)) / if((_col9 = 1L), null, (_col9 - 1))), 0.5) (type: double), (-26.28 - CAST( _col5 AS decimal(10,0))) (type: decimal(13,2)), _col10 (type: double), (_col2 * 79.553D) (type: double), (33.0 % _col0) (type: float), power(((_col11 - ((_col12 * _col12) / _col13)) / if((_col13 = 1L), null, (_col13 - 1))), 0.5) (type: double), ((_col11 - ((_col12 * _col12) / _col13)) / _col13) (type: double), (-23.0D % _col2) (type: double), (- _col4) (type: tinyint), ((_col14 - ((_col15 * _col15) / _col16)) / if((_col16 = 1L), null, (_col16 - 1))) (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - CAST( _col5 AS decimal(10,0)))) (type: decimal(13,2)), power(((_col14 - ((_col15 * _col15) / _col16)) / _col16), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6104 Data size: 2575728 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) null sort order: zzzzzzz sort order: +++++++ - Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6104 Data size: 2575728 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col7 (type: double), _col8 (type: decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 (type: int), _col19 (type: decimal(13,2)), _col20 (type: double) Reducer 3 Execution mode: llap @@ -184,10 +184,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: tinyint), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: timestamp), VALUE._col0 (type: double), VALUE._col1 (type: decimal(13,2)), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: tinyint), VALUE._col9 (type: double), VALUE._col10 (type: float), VALUE._col11 (type: int), VALUE._col12 (type: decimal(13,2)), VALUE._col13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6104 Data size: 2575728 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6104 Data size: 2575728 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorization_16.q.out b/ql/src/test/results/clientpositive/llap/vectorization_16.q.out index 7e8cb81144fc..7de5092fc76d 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_16.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_16.q.out @@ -94,7 +94,7 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 5979 Data size: 825318 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/vectorization_9.q.out b/ql/src/test/results/clientpositive/llap/vectorization_9.q.out index 7e8cb81144fc..7de5092fc76d 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_9.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_9.q.out @@ -94,7 +94,7 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 5979 Data size: 825318 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out index da82903d7963..8f7d63935f59 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out @@ -2949,10 +2949,10 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] keys: _col0 (type: timestamp), _col1 (type: string) - minReductionHashAggr: 0.5133463 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 - Statistics: Num rows: 5980 Data size: 1579124 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1622368 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: string) null sort order: zz @@ -2962,7 +2962,7 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5980 Data size: 1579124 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1622368 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: tinyint), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: bigint), _col16 (type: bigint), _col17 (type: double), _col18 (type: bigint), _col19 (type: double), _col20 (type: double), _col21 (type: double), _col22 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2997,7 +2997,7 @@ STAGE PLANS: keys: KEY._col0 (type: timestamp), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 - Statistics: Num rows: 5980 Data size: 1579124 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5979 Data size: 1578826 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: timestamp), _col1 (type: string), power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5) (type: double), (UDFToDouble(_col5) / _col6) (type: double), _col7 (type: bigint), _col8 (type: tinyint), ((_col9 - ((_col10 * _col10) / _col11)) / if((_col11 = 1L), null, (_col11 - 1))) (type: double), ((_col12 - ((_col13 * _col13) / _col14)) / _col14) (type: double), (UDFToDouble(_col15) / _col16) (type: double), ((_col12 - ((_col13 * _col13) / _col14)) / if((_col14 = 1L), null, (_col14 - 1))) (type: double), (_col17 / _col18) (type: double), _col19 (type: double), ((_col9 - ((_col10 * _col10) / _col11)) / _col11) (type: double), power(((_col20 - ((_col21 * _col21) / _col22)) / _col22), 0.5) (type: double), _col15 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -3006,12 +3006,12 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 27, 29, 7, 8, 36, 40, 42, 49, 50, 19, 54, 59, 15] selectExpressions: FuncPowerDoubleToDouble(col 26:double)(children: DoubleColDivideLongColumn(col 25:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 24:double)(children: DoubleColDivideLongColumn(col 23:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 23:double) -> 24:double) -> 25:double) -> 26:double) -> 27:double, DoubleColDivideLongColumn(col 28:double, col 6:bigint)(children: CastLongToDouble(col 5:bigint) -> 28:double) -> 29:double, DoubleColDivideLongColumn(col 32:double, col 35:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 31:double)(children: DoubleColDivideLongColumn(col 30:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 30:double) -> 31:double) -> 32:double, IfExprNullCondExpr(col 33:boolean, null, col 34:bigint)(children: LongColEqualLongScalar(col 11:bigint, val 1) -> 33:boolean, LongColSubtractLongScalar(col 11:bigint, val 1) -> 34:bigint) -> 35:bigint) -> 36:double, DoubleColDivideLongColumn(col 39:double, col 14:bigint)(children: DoubleColSubtractDoubleColumn(col 12:double, col 38:double)(children: DoubleColDivideLongColumn(col 37:double, col 14:bigint)(children: DoubleColMultiplyDoubleColumn(col 13:double, col 13:double) -> 37:double) -> 38:double) -> 39:double) -> 40:double, DoubleColDivideLongColumn(col 41:double, col 16:bigint)(children: CastLongToDouble(col 15:bigint) -> 41:double) -> 42:double, DoubleColDivideLongColumn(col 45:double, col 48:bigint)(children: DoubleColSubtractDoubleColumn(col 12:double, col 44:double)(children: DoubleColDivideLongColumn(col 43:double, col 14:bigint)(children: DoubleColMultiplyDoubleColumn(col 13:double, col 13:double) -> 43:double) -> 44:double) -> 45:double, IfExprNullCondExpr(col 46:boolean, null, col 47:bigint)(children: LongColEqualLongScalar(col 14:bigint, val 1) -> 46:boolean, LongColSubtractLongScalar(col 14:bigint, val 1) -> 47:bigint) -> 48:bigint) -> 49:double, DoubleColDivideLongColumn(col 17:double, col 18:bigint) -> 50:double, DoubleColDivideLongColumn(col 53:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 52:double)(children: DoubleColDivideLongColumn(col 51:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 51:double) -> 52:double) -> 53:double) -> 54:double, FuncPowerDoubleToDouble(col 58:double)(children: DoubleColDivideLongColumn(col 57:double, col 22:bigint)(children: DoubleColSubtractDoubleColumn(col 20:double, col 56:double)(children: DoubleColDivideLongColumn(col 55:double, col 22:bigint)(children: DoubleColMultiplyDoubleColumn(col 21:double, col 21:double) -> 55:double) -> 56:double) -> 57:double) -> 58:double) -> 59:double - Statistics: Num rows: 5980 Data size: 1196404 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5979 Data size: 1196170 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: +++++++++++++++++++++++++++++++++++++++ keys: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), (_col2 * 10.175D) (type: double), (- _col2) (type: double), _col3 (type: double), (- _col2) (type: double), (-26.28D - _col2) (type: double), _col4 (type: bigint), (- _col4) (type: bigint), ((-26.28D - _col2) * (- _col2)) (type: double), _col5 (type: tinyint), (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4))) (type: double), (- (_col2 * 10.175D)) (type: double), _col6 (type: double), (_col6 + (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), _col2 (type: double), (UDFToDouble((- _col4)) / _col2) (type: double), _col7 (type: double), (10.175D / _col3) (type: double), _col8 (type: double), _col9 (type: double), ((_col6 + (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (_col2 * 10.175D) (type: double), _col10 (type: double), (((_col6 + (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) * 10.175D) (type: double), (10.175D % (10.175D / _col3)) (type: double), (- _col5) (type: tinyint), _col11 (type: double), _col12 (type: double), (- ((-26.28D - _col2) * (- _col2))) (type: double), ((- _col2) % _col10) (type: double), (-26.28 / CAST( (- _col5) AS decimal(3,0))) (type: decimal(8,6)), _col13 (type: double), _col14 (type: bigint), ((_col6 + (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) / _col7) (type: double), _col4 (type: bigint), _col4 (type: bigint), ((_col6 + (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) % -26.28D) (type: double) null sort order: zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz - Statistics: Num rows: 5980 Data size: 1196404 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5979 Data size: 1196170 Basic stats: COMPLETE Column stats: COMPLETE top n: 50 Top N Key Vectorization: className: VectorTopNKeyOperator @@ -3025,7 +3025,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 27, 23, 24, 29, 25, 26, 7, 35, 31, 8, 30, 32, 36, 28, 27, 38, 40, 37, 42, 49, 41, 39, 50, 43, 45, 48, 19, 54, 44, 52, 145, 59, 15, 53, 7, 7, 55] selectExpressions: DoubleColMultiplyDoubleScalar(col 27:double, val 10.175) -> 23:double, DoubleColUnaryMinus(col 27:double) -> 24:double, DoubleColUnaryMinus(col 27:double) -> 25:double, DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 26:double, LongColUnaryMinus(col 7:bigint) -> 35:bigint, DoubleColMultiplyDoubleColumn(col 28:double, col 30:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 28:double, DoubleColUnaryMinus(col 27:double) -> 30:double) -> 31:double, DoubleColMultiplyDoubleColumn(col 32:double, col 28:double)(children: DoubleColMultiplyDoubleColumn(col 28:double, col 30:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 28:double, DoubleColUnaryMinus(col 27:double) -> 30:double) -> 32:double, CastLongToDouble(col 48:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 48:bigint) -> 28:double) -> 30:double, DoubleColUnaryMinus(col 28:double)(children: DoubleColMultiplyDoubleScalar(col 27:double, val 10.175) -> 28:double) -> 32:double, DoubleColAddDoubleColumn(col 36:double, col 37:double)(children: DoubleColMultiplyDoubleColumn(col 38:double, col 28:double)(children: DoubleColMultiplyDoubleColumn(col 28:double, col 37:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 28:double, DoubleColUnaryMinus(col 27:double) -> 37:double) -> 38:double, CastLongToDouble(col 48:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 48:bigint) -> 28:double) -> 37:double) -> 28:double, DoubleColDivideDoubleColumn(col 37:double, col 27:double)(children: CastLongToDouble(col 48:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 48:bigint) -> 37:double) -> 38:double, DoubleScalarDivideDoubleColumn(val 10.175, col 29:double) -> 37:double, DoubleColSubtractDoubleColumn(col 39:double, col 43:double)(children: DoubleColAddDoubleColumn(col 36:double, col 41:double)(children: DoubleColMultiplyDoubleColumn(col 43:double, col 39:double)(children: DoubleColMultiplyDoubleColumn(col 39:double, col 41:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 39:double, DoubleColUnaryMinus(col 27:double) -> 41:double) -> 43:double, CastLongToDouble(col 48:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 48:bigint) -> 39:double) -> 41:double) -> 39:double, DoubleColMultiplyDoubleColumn(col 44:double, col 41:double)(children: DoubleColMultiplyDoubleColumn(col 41:double, col 43:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 41:double, DoubleColUnaryMinus(col 27:double) -> 43:double) -> 44:double, CastLongToDouble(col 48:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 48:bigint) -> 41:double) -> 43:double) -> 41:double, DoubleColMultiplyDoubleScalar(col 27:double, val 10.175) -> 39:double, DoubleColMultiplyDoubleScalar(col 44:double, val 10.175)(children: DoubleColSubtractDoubleColumn(col 43:double, col 45:double)(children: DoubleColAddDoubleColumn(col 36:double, col 44:double)(children: DoubleColMultiplyDoubleColumn(col 45:double, col 43:double)(children: DoubleColMultiplyDoubleColumn(col 43:double, col 44:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 43:double, DoubleColUnaryMinus(col 27:double) -> 44:double) -> 45:double, CastLongToDouble(col 48:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 48:bigint) -> 43:double) -> 44:double) -> 43:double, DoubleColMultiplyDoubleColumn(col 51:double, col 44:double)(children: DoubleColMultiplyDoubleColumn(col 44:double, col 45:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 44:double, DoubleColUnaryMinus(col 27:double) -> 45:double) -> 51:double, CastLongToDouble(col 48:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 48:bigint) -> 44:double) -> 45:double) -> 44:double) -> 43:double, DoubleScalarModuloDoubleColumn(val 10.175, col 44:double)(children: DoubleScalarDivideDoubleColumn(val 10.175, col 29:double) -> 44:double) -> 45:double, LongColUnaryMinus(col 8:tinyint) -> 48:tinyint, DoubleColUnaryMinus(col 52:double)(children: DoubleColMultiplyDoubleColumn(col 44:double, col 51:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 44:double, DoubleColUnaryMinus(col 27:double) -> 51:double) -> 52:double) -> 44:double, DoubleColModuloDoubleColumn(col 51:double, col 50:double)(children: DoubleColUnaryMinus(col 27:double) -> 51:double) -> 52:double, DecimalScalarDivideDecimalColumn(val -26.28, col 127:decimal(3,0))(children: CastLongToDecimal(col 71:tinyint)(children: LongColUnaryMinus(col 8:tinyint) -> 71:tinyint) -> 127:decimal(3,0)) -> 145:decimal(8,6), DoubleColDivideDoubleColumn(col 51:double, col 40:double)(children: DoubleColAddDoubleColumn(col 36:double, col 53:double)(children: DoubleColMultiplyDoubleColumn(col 55:double, col 51:double)(children: DoubleColMultiplyDoubleColumn(col 51:double, col 53:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 51:double, DoubleColUnaryMinus(col 27:double) -> 53:double) -> 55:double, CastLongToDouble(col 71:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 71:bigint) -> 51:double) -> 53:double) -> 51:double) -> 53:double, DoubleColModuloDoubleScalar(col 51:double, val -26.28)(children: DoubleColAddDoubleColumn(col 36:double, col 55:double)(children: DoubleColMultiplyDoubleColumn(col 56:double, col 51:double)(children: DoubleColMultiplyDoubleColumn(col 51:double, col 55:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 51:double, DoubleColUnaryMinus(col 27:double) -> 55:double) -> 56:double, CastLongToDouble(col 71:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 71:bigint) -> 51:double) -> 55:double) -> 51:double) -> 55:double - Statistics: Num rows: 5980 Data size: 2739514 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5979 Data size: 2738988 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double), _col11 (type: tinyint), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double), _col22 (type: double), _col23 (type: double), _col24 (type: double), _col25 (type: double), _col26 (type: double), _col27 (type: tinyint), _col28 (type: double), _col29 (type: double), _col30 (type: double), _col31 (type: double), _col32 (type: decimal(8,6)), _col33 (type: double), _col34 (type: bigint), _col35 (type: double), _col36 (type: bigint), _col37 (type: bigint), _col38 (type: double) null sort order: zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz @@ -3034,7 +3034,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5980 Data size: 2739514 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5979 Data size: 2738988 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -3051,7 +3051,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15, 2, 17, 18, 19, 20, 21, 22, 3, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 8, 8, 38] - Statistics: Num rows: 5980 Data size: 2739514 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5979 Data size: 2738988 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 50 Limit Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vectorized_stats.q.out b/ql/src/test/results/clientpositive/llap/vectorized_stats.q.out index af0c461861f3..0e1519cf20a9 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_stats.q.out @@ -1207,13 +1207,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 183480 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: timestamp) - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 183480 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1223,10 +1223,10 @@ STAGE PLANS: keys: KEY._col0 (type: timestamp) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 91760 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 91760 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat