diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 index 47ee4eea108c72..2841b31f12c887 100644 --- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 +++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 @@ -463,6 +463,7 @@ REAL: 'REAL'; REBALANCE: 'REBALANCE'; RECENT: 'RECENT'; RECOVER: 'RECOVER'; +RECURSIVE: 'RECURSIVE'; RECYCLE: 'RECYCLE'; REFRESH: 'REFRESH'; REFERENCES: 'REFERENCES'; diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 index eb11ce49cd1bfd..a244e7fd7a6ee8 100644 --- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 +++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 @@ -1272,7 +1272,7 @@ querySpecification ; cte - : WITH aliasQuery (COMMA aliasQuery)* + : WITH RECURSIVE? aliasQuery (COMMA aliasQuery)* ; aliasQuery @@ -2256,6 +2256,7 @@ nonReserved | RANDOM | RECENT | RECOVER + | RECURSIVE | RECYCLE | REFRESH | REPEATABLE diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DescriptorTable.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DescriptorTable.java index 254edea5fbbc89..d793c4f70c52d8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DescriptorTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DescriptorTable.java @@ -20,6 +20,7 @@ package org.apache.doris.analysis; +import org.apache.doris.catalog.RecursiveCteTempTable; import org.apache.doris.catalog.TableIf; import org.apache.doris.common.IdGenerator; import org.apache.doris.thrift.TDescriptorTable; @@ -100,6 +101,10 @@ public TDescriptorTable toThrift() { } for (TableIf tbl : referencedTbls.values()) { + if (tbl instanceof RecursiveCteTempTable) { + // skip recursive cte temp table + continue; + } result.addToTableDescriptors(tbl.toThrift()); } thriftDescTable = result; diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/RecursiveCteTempTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/RecursiveCteTempTable.java new file mode 100644 index 00000000000000..54c819d45dd5dd --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/RecursiveCteTempTable.java @@ -0,0 +1,35 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.catalog; + +import org.apache.doris.common.SystemIdGenerator; + +import com.google.common.collect.ImmutableList; + +import java.util.List; + +public class RecursiveCteTempTable extends Table { + public RecursiveCteTempTable(String tableName, List fullSchema) { + super(SystemIdGenerator.getNextId(), tableName, TableType.RECURSIVE_CTE_TEMP_TABLE, fullSchema); + } + + @Override + public List getFullQualifiers() { + return ImmutableList.of(name); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java index 00146b5514a317..74fa52298281c2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java @@ -320,13 +320,13 @@ default PrimaryKeyConstraint tryGetPrimaryKeyForForeignKeyUnsafe( default void addForeignConstraint(String name, ImmutableList columns, TableIf referencedTable, ImmutableList referencedColumns, boolean replay) { Map constraintMap = getConstraintsMapUnsafe(); - ForeignKeyConstraint foreignKeyConstraint = - new ForeignKeyConstraint(name, columns, referencedTable, referencedColumns); + ForeignKeyConstraint foreignKeyConstraint = new ForeignKeyConstraint(name, columns, referencedTable, + referencedColumns); checkConstraintNotExistenceUnsafe(name, foreignKeyConstraint, constraintMap); PrimaryKeyConstraint requirePrimaryKeyName = new PrimaryKeyConstraint(name, foreignKeyConstraint.getReferencedColumnNames()); - PrimaryKeyConstraint primaryKeyConstraint = - tryGetPrimaryKeyForForeignKeyUnsafe(requirePrimaryKeyName, referencedTable); + PrimaryKeyConstraint primaryKeyConstraint = tryGetPrimaryKeyForForeignKeyUnsafe(requirePrimaryKeyName, + referencedTable); primaryKeyConstraint.addForeignTable(this); constraintMap.put(name, foreignKeyConstraint); if (!replay) { @@ -446,10 +446,13 @@ default boolean needReadLockWhenPlan() { */ enum TableType { MYSQL, ODBC, OLAP, SCHEMA, INLINE_VIEW, VIEW, BROKER, ELASTICSEARCH, HIVE, - @Deprecated ICEBERG, @Deprecated HUDI, JDBC, + @Deprecated + ICEBERG, @Deprecated + HUDI, JDBC, TABLE_VALUED_FUNCTION, HMS_EXTERNAL_TABLE, ES_EXTERNAL_TABLE, MATERIALIZED_VIEW, JDBC_EXTERNAL_TABLE, ICEBERG_EXTERNAL_TABLE, TEST_EXTERNAL_TABLE, PAIMON_EXTERNAL_TABLE, MAX_COMPUTE_EXTERNAL_TABLE, - HUDI_EXTERNAL_TABLE, TRINO_CONNECTOR_EXTERNAL_TABLE, LAKESOUl_EXTERNAL_TABLE, DICTIONARY, DORIS_EXTERNAL_TABLE; + HUDI_EXTERNAL_TABLE, TRINO_CONNECTOR_EXTERNAL_TABLE, LAKESOUl_EXTERNAL_TABLE, DICTIONARY, DORIS_EXTERNAL_TABLE, + RECURSIVE_CTE_TEMP_TABLE; public String toEngineName() { switch (this) { @@ -492,6 +495,8 @@ public String toEngineName() { return "dictionary"; case DORIS_EXTERNAL_TABLE: return "External_Doris"; + case RECURSIVE_CTE_TEMP_TABLE: + return "RecursiveCteTempTable"; default: return null; } @@ -531,6 +536,7 @@ public String toMysqlType() { case MATERIALIZED_VIEW: case TRINO_CONNECTOR_EXTERNAL_TABLE: case DORIS_EXTERNAL_TABLE: + case RECURSIVE_CTE_TEMP_TABLE: return "BASE TABLE"; default: return null; @@ -664,4 +670,3 @@ default Optional getSysTableFunctionRef( return Optional.empty(); } } - diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/CascadesContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/CascadesContext.java index 6980765049f37b..1517d6f2a7a52c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/CascadesContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/CascadesContext.java @@ -133,6 +133,8 @@ public class CascadesContext implements ScheduleContext { private final boolean isEnableExprTrace; private int groupExpressionCount = 0; + private Optional currentRecursiveCteName; + private List recursiveCteOutputs; /** * Constructor of OptimizerContext. @@ -142,7 +144,8 @@ public class CascadesContext implements ScheduleContext { */ private CascadesContext(Optional parent, Optional currentTree, StatementContext statementContext, Plan plan, Memo memo, - CTEContext cteContext, PhysicalProperties requireProperties, boolean isLeadingDisableJoinReorder) { + CTEContext cteContext, PhysicalProperties requireProperties, boolean isLeadingDisableJoinReorder, + Optional currentRecursiveCteName, List recursiveCteOutputs) { this.parent = Objects.requireNonNull(parent, "parent should not null"); this.currentTree = Objects.requireNonNull(currentTree, "currentTree should not null"); this.statementContext = Objects.requireNonNull(statementContext, "statementContext should not null"); @@ -167,6 +170,8 @@ private CascadesContext(Optional parent, Optional curren this.isEnableExprTrace = false; } this.isLeadingDisableJoinReorder = isLeadingDisableJoinReorder; + this.currentRecursiveCteName = currentRecursiveCteName; + this.recursiveCteOutputs = recursiveCteOutputs; } /** init a temporary context to rewrite expression */ @@ -181,7 +186,7 @@ public static CascadesContext initTempContext() { } return newContext(Optional.empty(), Optional.empty(), statementContext, DUMMY_PLAN, - new CTEContext(), PhysicalProperties.ANY, false); + new CTEContext(), PhysicalProperties.ANY, false, Optional.empty(), ImmutableList.of()); } /** @@ -190,24 +195,25 @@ public static CascadesContext initTempContext() { public static CascadesContext initContext(StatementContext statementContext, Plan initPlan, PhysicalProperties requireProperties) { return newContext(Optional.empty(), Optional.empty(), statementContext, - initPlan, new CTEContext(), requireProperties, false); + initPlan, new CTEContext(), requireProperties, false, Optional.empty(), ImmutableList.of()); } /** * use for analyze cte. we must pass CteContext from outer since we need to get right scope of cte */ public static CascadesContext newContextWithCteContext(CascadesContext cascadesContext, - Plan initPlan, CTEContext cteContext) { + Plan initPlan, CTEContext cteContext, Optional currentRecursiveCteName, + List recursiveCteOutputs) { return newContext(Optional.of(cascadesContext), Optional.empty(), cascadesContext.getStatementContext(), initPlan, cteContext, PhysicalProperties.ANY, - cascadesContext.isLeadingDisableJoinReorder - ); + cascadesContext.isLeadingDisableJoinReorder, currentRecursiveCteName, recursiveCteOutputs); } public static CascadesContext newCurrentTreeContext(CascadesContext context) { return CascadesContext.newContext(context.getParent(), context.getCurrentTree(), context.getStatementContext(), context.getRewritePlan(), context.getCteContext(), - context.getCurrentJobContext().getRequiredProperties(), context.isLeadingDisableJoinReorder); + context.getCurrentJobContext().getRequiredProperties(), context.isLeadingDisableJoinReorder, + Optional.empty(), ImmutableList.of()); } /** @@ -216,14 +222,17 @@ public static CascadesContext newCurrentTreeContext(CascadesContext context) { public static CascadesContext newSubtreeContext(Optional subtree, CascadesContext context, Plan plan, PhysicalProperties requireProperties) { return CascadesContext.newContext(Optional.of(context), subtree, context.getStatementContext(), - plan, context.getCteContext(), requireProperties, context.isLeadingDisableJoinReorder); + plan, context.getCteContext(), requireProperties, context.isLeadingDisableJoinReorder, Optional.empty(), + ImmutableList.of()); } private static CascadesContext newContext(Optional parent, Optional subtree, StatementContext statementContext, Plan initPlan, CTEContext cteContext, - PhysicalProperties requireProperties, boolean isLeadingDisableJoinReorder) { + PhysicalProperties requireProperties, boolean isLeadingDisableJoinReorder, + Optional currentRecursiveCteName, List recursiveCteOutputs) { return new CascadesContext(parent, subtree, statementContext, initPlan, null, - cteContext, requireProperties, isLeadingDisableJoinReorder); + cteContext, requireProperties, isLeadingDisableJoinReorder, currentRecursiveCteName, + recursiveCteOutputs); } public CascadesContext getRoot() { @@ -250,6 +259,18 @@ public synchronized boolean isTimeout() { return isTimeout; } + public Optional getCurrentRecursiveCteName() { + return currentRecursiveCteName; + } + + public List getRecursiveCteOutputs() { + return recursiveCteOutputs; + } + + public boolean isAnalyzingRecursiveCteAnchorChild() { + return currentRecursiveCteName.isPresent() && recursiveCteOutputs.isEmpty(); + } + /** * Init memo with plan */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index 028007eab6fcba..d3ffe2491b5bcf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -165,6 +165,9 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalPlan; import org.apache.doris.nereids.trees.plans.physical.PhysicalProject; import org.apache.doris.nereids.trees.plans.physical.PhysicalQuickSort; +import org.apache.doris.nereids.trees.plans.physical.PhysicalRecursiveCte; +import org.apache.doris.nereids.trees.plans.physical.PhysicalRecursiveCteRecursiveChild; +import org.apache.doris.nereids.trees.plans.physical.PhysicalRecursiveCteScan; import org.apache.doris.nereids.trees.plans.physical.PhysicalRelation; import org.apache.doris.nereids.trees.plans.physical.PhysicalRepeat; import org.apache.doris.nereids.trees.plans.physical.PhysicalResultSink; @@ -218,6 +221,8 @@ import org.apache.doris.planner.PartitionSortNode; import org.apache.doris.planner.PlanFragment; import org.apache.doris.planner.PlanNode; +import org.apache.doris.planner.RecursiveCteNode; +import org.apache.doris.planner.RecursiveCteScanNode; import org.apache.doris.planner.RepeatNode; import org.apache.doris.planner.ResultFileSink; import org.apache.doris.planner.ResultSink; @@ -1170,6 +1175,28 @@ public PlanFragment visitPhysicalSchemaScan(PhysicalSchemaScan schemaScan, PlanT return planFragment; } + @Override + public PlanFragment visitPhysicalRecursiveCteScan(PhysicalRecursiveCteScan recursiveCteScan, + PlanTranslatorContext context) { + TableIf table = recursiveCteScan.getTable(); + List slots = ImmutableList.copyOf(recursiveCteScan.getOutput()); + TupleDescriptor tupleDescriptor = generateTupleDesc(slots, null, context); + + RecursiveCteScanNode scanNode = new RecursiveCteScanNode(table != null ? table.getName() : "", + context.nextPlanNodeId(), tupleDescriptor, context.getScanContext()); + scanNode.setNereidsId(recursiveCteScan.getId()); + context.getNereidsIdToPlanNodeIdMap().put(recursiveCteScan.getId(), scanNode.getId()); + Utils.execWithUncheckedException(scanNode::initScanRangeLocations); + + translateRuntimeFilter(recursiveCteScan, scanNode, context); + + context.addScanNode(scanNode, recursiveCteScan); + PlanFragment planFragment = createPlanFragment(scanNode, DataPartition.RANDOM, recursiveCteScan); + context.addPlanFragment(planFragment); + updateLegacyPlanIdToPhysicalPlan(planFragment.getPlanRoot(), recursiveCteScan); + return planFragment; + } + private List translateToExprs(List expressions, PlanTranslatorContext context) { List exprs = Lists.newArrayListWithCapacity(expressions.size()); for (Expression expression : expressions) { @@ -2378,8 +2405,10 @@ public PlanFragment visitPhysicalProject(PhysicalProject project if (inputPlanNode instanceof OlapScanNode) { ((OlapScanNode) inputPlanNode).updateRequiredSlots(context, requiredByProjectSlotIdSet); } - updateScanSlotsMaterialization((ScanNode) inputPlanNode, requiredSlotIdSet, - requiredByProjectSlotIdSet, context); + if (!(inputPlanNode instanceof RecursiveCteScanNode)) { + updateScanSlotsMaterialization((ScanNode) inputPlanNode, requiredSlotIdSet, + requiredByProjectSlotIdSet, context); + } } else { if (project.child() instanceof PhysicalDeferMaterializeTopN) { inputFragment.setOutputExprs(allProjectionExprs); @@ -2392,6 +2421,78 @@ public PlanFragment visitPhysicalProject(PhysicalProject project return inputFragment; } + @Override + public PlanFragment visitPhysicalRecursiveCte(PhysicalRecursiveCte recursiveCte, PlanTranslatorContext context) { + List childrenFragments = new ArrayList<>(); + for (Plan plan : recursiveCte.children()) { + childrenFragments.add(plan.accept(this, context)); + } + + TupleDescriptor setTuple = generateTupleDesc(recursiveCte.getOutput(), null, context); + List outputSlotDescs = new ArrayList<>(setTuple.getSlots()); + + RecursiveCteNode recursiveCteNode = new RecursiveCteNode(context.nextPlanNodeId(), setTuple.getId(), + recursiveCte.getCteName(), recursiveCte.isUnionAll()); + List> distributeExprLists = getDistributeExprs(recursiveCte.children().toArray(new Plan[0])); + recursiveCteNode.setChildrenDistributeExprLists(distributeExprLists); + recursiveCteNode.setNereidsId(recursiveCte.getId()); + List> resultExpressionLists = Lists.newArrayList(); + context.getNereidsIdToPlanNodeIdMap().put(recursiveCte.getId(), recursiveCteNode.getId()); + for (List regularChildrenOutput : recursiveCte.getRegularChildrenOutputs()) { + resultExpressionLists.add(new ArrayList<>(regularChildrenOutput)); + } + + for (PlanFragment childFragment : childrenFragments) { + recursiveCteNode.addChild(childFragment.getPlanRoot()); + } + + List> materializedResultExprLists = Lists.newArrayList(); + for (int i = 0; i < resultExpressionLists.size(); ++i) { + List resultExpressionList = resultExpressionLists.get(i); + List exprList = Lists.newArrayList(); + Preconditions.checkState(resultExpressionList.size() == outputSlotDescs.size()); + for (int j = 0; j < resultExpressionList.size(); ++j) { + exprList.add(ExpressionTranslator.translate(resultExpressionList.get(j), context)); + // TODO: reconsider this, we may change nullable info in previous nereids rules not here. + outputSlotDescs.get(j) + .setIsNullable(outputSlotDescs.get(j).getIsNullable() || exprList.get(j).isNullable()); + } + materializedResultExprLists.add(exprList); + } + recursiveCteNode.setMaterializedResultExprLists(materializedResultExprLists); + Preconditions.checkState(recursiveCteNode.getMaterializedResultExprLists().size() + == recursiveCteNode.getChildren().size()); + + PlanFragment recursiveCteFragment; + if (childrenFragments.isEmpty()) { + recursiveCteFragment = createPlanFragment(recursiveCteNode, + DataPartition.UNPARTITIONED, recursiveCte); + context.addPlanFragment(recursiveCteFragment); + } else { + int childrenSize = childrenFragments.size(); + recursiveCteFragment = childrenFragments.get(childrenSize - 1); + for (int i = childrenSize - 2; i >= 0; i--) { + context.mergePlanFragment(childrenFragments.get(i), recursiveCteFragment); + for (PlanFragment child : childrenFragments.get(i).getChildren()) { + recursiveCteFragment.addChild(child); + } + } + setPlanRoot(recursiveCteFragment, recursiveCteNode, recursiveCte); + } + + recursiveCteFragment.updateDataPartition(DataPartition.UNPARTITIONED); + recursiveCteFragment.setOutputPartition(DataPartition.UNPARTITIONED); + + return recursiveCteFragment; + } + + @Override + public PlanFragment visitPhysicalRecursiveCteRecursiveChild( + PhysicalRecursiveCteRecursiveChild recursiveChild, + PlanTranslatorContext context) { + return recursiveChild.child().accept(this, context); + } + /** * Returns a new fragment with a UnionNode as its root. The data partition of the * returned fragment and how the data of the child fragments is consumed depends on the diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java index f493f8aa97ae7c..71ea167ce0870d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java @@ -1163,6 +1163,8 @@ public class LogicalPlanBuilder extends DorisParserBaseVisitor { private final Map selectHintMap; + private boolean isInRecursiveCteContext = false; + public LogicalPlanBuilder(Map selectHintMap) { this.selectHintMap = selectHintMap; } @@ -2376,7 +2378,11 @@ private LogicalPlan withCte(LogicalPlan plan, CteContext ctx) { if (ctx == null) { return plan; } - return new LogicalCTE<>((List) visit(ctx.aliasQuery(), LogicalSubQueryAlias.class), plan); + isInRecursiveCteContext = ctx.RECURSIVE() != null; + LogicalCTE logicalCTE = new LogicalCTE<>(isInRecursiveCteContext, + (List) visit(ctx.aliasQuery(), LogicalSubQueryAlias.class), plan); + isInRecursiveCteContext = false; + return logicalCTE; } /** @@ -2570,7 +2576,7 @@ public LogicalPlan visitQuery(QueryContext ctx) { public LogicalPlan visitSetOperation(SetOperationContext ctx) { return ParserUtils.withOrigin(ctx, () -> { - if (ctx.UNION() != null) { + if (ctx.UNION() != null && !isInRecursiveCteContext) { Qualifier qualifier = getQualifier(ctx); List contexts = Lists.newArrayList(ctx.right); QueryTermContext current = ctx.left; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/pattern/MemoPatterns.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/pattern/MemoPatterns.java index fa9d191f5e2af3..ec53deafaadbd3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/pattern/MemoPatterns.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/pattern/MemoPatterns.java @@ -28,6 +28,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalIntersect; import org.apache.doris.nereids.trees.plans.logical.LogicalLeaf; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.trees.plans.logical.LogicalRecursiveCte; import org.apache.doris.nereids.trees.plans.logical.LogicalRelation; import org.apache.doris.nereids.trees.plans.logical.LogicalSetOperation; import org.apache.doris.nereids.trees.plans.logical.LogicalUnary; @@ -205,6 +206,29 @@ default PatternDescriptor logicalUnion() { defaultPromise()); } + /** + * create a LogicalRecursiveCte pattern. + */ + default PatternDescriptor + logicalRecursiveCte( + PatternDescriptor... children) { + return new PatternDescriptor( + new TypePattern(LogicalRecursiveCte.class, + Arrays.stream(children) + .map(PatternDescriptor::getPattern) + .toArray(Pattern[]::new)), + defaultPromise()); + } + + /** + * create a logicalRecursiveCte group. + */ + default PatternDescriptor logicalRecursiveCte() { + return new PatternDescriptor( + new TypePattern(LogicalRecursiveCte.class, multiGroup().pattern), + defaultPromise()); + } + /** * create a logicalExcept pattern. */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/RuntimeFilterPruner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/RuntimeFilterPruner.java index 51a9003b78fbac..d7e4e5b67cb34b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/RuntimeFilterPruner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/RuntimeFilterPruner.java @@ -33,6 +33,7 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalIntersect; import org.apache.doris.nereids.trees.plans.physical.PhysicalLimit; import org.apache.doris.nereids.trees.plans.physical.PhysicalNestedLoopJoin; +import org.apache.doris.nereids.trees.plans.physical.PhysicalRecursiveCte; import org.apache.doris.nereids.trees.plans.physical.PhysicalRelation; import org.apache.doris.nereids.trees.plans.physical.PhysicalSetOperation; import org.apache.doris.nereids.trees.plans.physical.PhysicalTopN; @@ -76,6 +77,14 @@ public Plan visit(Plan plan, CascadesContext context) { return plan; } + @Override + public PhysicalRecursiveCte visitPhysicalRecursiveCte(PhysicalRecursiveCte recursiveCte, CascadesContext context) { + for (Plan child : recursiveCte.children()) { + child.accept(this, context); + } + return recursiveCte; + } + @Override public PhysicalSetOperation visitPhysicalSetOperation(PhysicalSetOperation setOperation, CascadesContext context) { for (Plan child : setOperation.children()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/pre/PullUpSubqueryAliasToCTE.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/pre/PullUpSubqueryAliasToCTE.java index 18ac63dc60084b..6af9bc5e0917f0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/pre/PullUpSubqueryAliasToCTE.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/pre/PullUpSubqueryAliasToCTE.java @@ -42,25 +42,25 @@ public class PullUpSubqueryAliasToCTE extends PlanPreprocessor { @Override public Plan visitUnboundResultSink(UnboundResultSink unboundResultSink, - StatementContext context) { + StatementContext context) { return createCteForRootNode(unboundResultSink, context); } @Override public Plan visitUnboundTableSink(UnboundTableSink unboundTableSink, - StatementContext context) { + StatementContext context) { return createCteForRootNode(unboundTableSink, context); } @Override public Plan visitLogicalFileSink(LogicalFileSink logicalFileSink, - StatementContext context) { + StatementContext context) { return createCteForRootNode(logicalFileSink, context); } @Override public Plan visitLogicalSubQueryAlias(LogicalSubQueryAlias alias, - StatementContext context) { + StatementContext context) { if (findLeadingHintIgnoreSortAndLimit(alias.child())) { aliasQueries.add((LogicalSubQueryAlias) alias); List tableName = new ArrayList<>(); @@ -83,7 +83,8 @@ public Plan visitLogicalCTE(LogicalCTE logicalCTE, StatementCont subQueryAliasesOfCte.addAll(logicalCTE.getAliasQueries()); subQueryAliasesOfCte.addAll(aliasQueries); aliasQueries = new ArrayList<>(); - return new LogicalCTE<>(subQueryAliasesOfCte, (LogicalPlan) newLogicalCTE.child()); + return new LogicalCTE<>(newLogicalCTE.isRecursiveCte(), subQueryAliasesOfCte, + (LogicalPlan) newLogicalCTE.child()); } return cte; } @@ -97,10 +98,11 @@ private Plan createCteForRootNode(Plan plan, StatementContext context) { subQueryAliases.addAll(logicalCTE.getAliasQueries()); subQueryAliases.addAll(aliasQueries); return topPlan.withChildren( - new LogicalCTE<>(subQueryAliases, (LogicalPlan) topPlan.child(0))); + new LogicalCTE<>(logicalCTE.isRecursiveCte(), subQueryAliases, + (LogicalPlan) topPlan.child(0))); } return topPlan.withChildren( - new LogicalCTE<>(aliasQueries, (LogicalPlan) topPlan.child(0))); + new LogicalCTE<>(false, aliasQueries, (LogicalPlan) topPlan.child(0))); } return topPlan; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildOutputPropertyDeriver.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildOutputPropertyDeriver.java index 2a1a54435a60c5..4c2cbfaf0bc74a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildOutputPropertyDeriver.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildOutputPropertyDeriver.java @@ -52,6 +52,9 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalOneRowRelation; import org.apache.doris.nereids.trees.plans.physical.PhysicalPartitionTopN; import org.apache.doris.nereids.trees.plans.physical.PhysicalProject; +import org.apache.doris.nereids.trees.plans.physical.PhysicalRecursiveCte; +import org.apache.doris.nereids.trees.plans.physical.PhysicalRecursiveCteRecursiveChild; +import org.apache.doris.nereids.trees.plans.physical.PhysicalRecursiveCteScan; import org.apache.doris.nereids.trees.plans.physical.PhysicalRepeat; import org.apache.doris.nereids.trees.plans.physical.PhysicalSetOperation; import org.apache.doris.nereids.trees.plans.physical.PhysicalSink; @@ -147,6 +150,11 @@ public PhysicalProperties visitPhysicalFileScan(PhysicalFileScan fileScan, PlanC return PhysicalProperties.STORAGE_ANY; } + @Override + public PhysicalProperties visitPhysicalRecursiveCteScan(PhysicalRecursiveCteScan cteScan, PlanContext context) { + return PhysicalProperties.ANY; + } + /** * TODO return ANY after refactor coordinator * return STORAGE_ANY not ANY, in order to generate distribute on jdbc scan. @@ -484,6 +492,18 @@ public PhysicalProperties visitPhysicalSetOperation(PhysicalSetOperation setOper return PhysicalProperties.createHash(request, firstType); } + @Override + public PhysicalProperties visitPhysicalRecursiveCte(PhysicalRecursiveCte recursiveCte, PlanContext context) { + return PhysicalProperties.GATHER; + } + + @Override + public PhysicalProperties visitPhysicalRecursiveCteRecursiveChild( + PhysicalRecursiveCteRecursiveChild recursiveChild, + PlanContext context) { + return PhysicalProperties.MUST_SHUFFLE; + } + @Override public PhysicalProperties visitPhysicalUnion(PhysicalUnion union, PlanContext context) { if (union.getConstantExprsList().isEmpty()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/RequestPropertyDeriver.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/RequestPropertyDeriver.java index 7610d75a22d5fa..255ae29157d6d2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/RequestPropertyDeriver.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/RequestPropertyDeriver.java @@ -57,6 +57,7 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalOlapTableSink; import org.apache.doris.nereids.trees.plans.physical.PhysicalPartitionTopN; import org.apache.doris.nereids.trees.plans.physical.PhysicalProject; +import org.apache.doris.nereids.trees.plans.physical.PhysicalRecursiveCte; import org.apache.doris.nereids.trees.plans.physical.PhysicalResultSink; import org.apache.doris.nereids.trees.plans.physical.PhysicalSetOperation; import org.apache.doris.nereids.trees.plans.physical.PhysicalTVFTableSink; @@ -370,6 +371,16 @@ public Void visitPhysicalUnion(PhysicalUnion union, PlanContext context) { return null; } + @Override + public Void visitPhysicalRecursiveCte(PhysicalRecursiveCte recursiveCte, PlanContext context) { + List requestGather = Lists.newArrayListWithCapacity(context.arity()); + for (int i = context.arity(); i > 0; --i) { + requestGather.add(PhysicalProperties.GATHER); + } + addRequestPropertyToChildren(requestGather); + return null; + } + @Override public Void visitAbstractPhysicalSort(AbstractPhysicalSort sort, PlanContext context) { if (!sort.getSortPhase().isLocal()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleSet.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleSet.java index d1c9166179cab6..5b7e23c60f60ab 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleSet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleSet.java @@ -92,6 +92,9 @@ import org.apache.doris.nereids.rules.implementation.LogicalOneRowRelationToPhysicalOneRowRelation; import org.apache.doris.nereids.rules.implementation.LogicalPartitionTopNToPhysicalPartitionTopN; import org.apache.doris.nereids.rules.implementation.LogicalProjectToPhysicalProject; +import org.apache.doris.nereids.rules.implementation.LogicalRecursiveCteRecursiveChildToPhysicalRecursiveCteRecursiveChild; +import org.apache.doris.nereids.rules.implementation.LogicalRecursiveCteScanToPhysicalRecursiveCteScan; +import org.apache.doris.nereids.rules.implementation.LogicalRecursiveCteToPhysicalRecursiveCte; import org.apache.doris.nereids.rules.implementation.LogicalRepeatToPhysicalRepeat; import org.apache.doris.nereids.rules.implementation.LogicalResultSinkToPhysicalResultSink; import org.apache.doris.nereids.rules.implementation.LogicalSchemaScanToPhysicalSchemaScan; @@ -209,6 +212,7 @@ public class RuleSet { .add(new LogicalJdbcScanToPhysicalJdbcScan()) .add(new LogicalOdbcScanToPhysicalOdbcScan()) .add(new LogicalEsScanToPhysicalEsScan()) + .add(new LogicalRecursiveCteScanToPhysicalRecursiveCteScan()) .add(new LogicalProjectToPhysicalProject()) .add(new LogicalLimitToPhysicalLimit()) .add(new LogicalWindowToPhysicalWindow()) @@ -224,6 +228,8 @@ public class RuleSet { .add(SplitAggWithoutDistinct.INSTANCE) .add(SplitAggMultiPhase.INSTANCE) .add(SplitAggMultiPhaseWithoutGbyKey.INSTANCE) + .add(new LogicalRecursiveCteToPhysicalRecursiveCte()) + .add(new LogicalRecursiveCteRecursiveChildToPhysicalRecursiveCteRecursiveChild()) .add(new LogicalUnionToPhysicalUnion()) .add(new LogicalExceptToPhysicalExcept()) .add(new LogicalIntersectToPhysicalIntersect()) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java index 6d7f8f25b23571..139b02a23fac87 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java @@ -555,6 +555,7 @@ public enum RuleType { LOGICAL_JDBC_SCAN_TO_PHYSICAL_JDBC_SCAN_RULE(RuleTypeClass.IMPLEMENTATION), LOGICAL_ODBC_SCAN_TO_PHYSICAL_ODBC_SCAN_RULE(RuleTypeClass.IMPLEMENTATION), LOGICAL_ES_SCAN_TO_PHYSICAL_ES_SCAN_RULE(RuleTypeClass.IMPLEMENTATION), + LOGICAL_RECURSIVE_CTE_SCAN_TO_PHYSICAL_RECUSIVE_CTE_SCAN_RULE(RuleTypeClass.IMPLEMENTATION), LOGICAL_BLACKHOLE_SINK_TO_PHYSICAL_BLACKHOLE_SINK_RULE(RuleTypeClass.IMPLEMENTATION), LOGICAL_OLAP_TABLE_SINK_TO_PHYSICAL_OLAP_TABLE_SINK_RULE(RuleTypeClass.IMPLEMENTATION), LOGICAL_HIVE_TABLE_SINK_TO_PHYSICAL_HIVE_TABLE_SINK_RULE(RuleTypeClass.IMPLEMENTATION), @@ -580,6 +581,8 @@ public enum RuleType { COUNT_ON_INDEX_WITHOUT_PROJECT(RuleTypeClass.IMPLEMENTATION), TWO_PHASE_AGGREGATE_WITH_DISTINCT(RuleTypeClass.IMPLEMENTATION), LOGICAL_UNION_TO_PHYSICAL_UNION(RuleTypeClass.IMPLEMENTATION), + LOGICAL_RECURSIVE_CTE_TO_PHYSICAL_RECURSIVE_CTE(RuleTypeClass.IMPLEMENTATION), + LOGICAL_RECURSIVE_CTE_RECURSIVE_CHILD_TO_PHYSICAL_RECURSIVE_CTE_RECURSIVE_CHILD(RuleTypeClass.IMPLEMENTATION), LOGICAL_EXCEPT_TO_PHYSICAL_EXCEPT(RuleTypeClass.IMPLEMENTATION), LOGICAL_INTERSECT_TO_PHYSICAL_INTERSECT(RuleTypeClass.IMPLEMENTATION), LOGICAL_GENERATE_TO_PHYSICAL_GENERATE(RuleTypeClass.IMPLEMENTATION), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/AnalyzeCTE.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/AnalyzeCTE.java index c49b3f01c51039..0c65c8af560001 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/AnalyzeCTE.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/AnalyzeCTE.java @@ -23,22 +23,36 @@ import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.expressions.Alias; import org.apache.doris.nereids.trees.expressions.CTEId; +import org.apache.doris.nereids.trees.expressions.NamedExpression; import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.expressions.StatementScopeIdGenerator; +import org.apache.doris.nereids.trees.expressions.functions.scalar.Nullable; import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.algebra.SetOperation; import org.apache.doris.nereids.trees.plans.logical.LogicalCTE; import org.apache.doris.nereids.trees.plans.logical.LogicalCTEAnchor; import org.apache.doris.nereids.trees.plans.logical.LogicalCTEProducer; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.logical.LogicalRecursiveCte; +import org.apache.doris.nereids.trees.plans.logical.LogicalRecursiveCteRecursiveChild; +import org.apache.doris.nereids.trees.plans.logical.LogicalRecursiveCteScan; import org.apache.doris.nereids.trees.plans.logical.LogicalSubQueryAlias; +import org.apache.doris.nereids.trees.plans.logical.LogicalUnion; +import org.apache.doris.nereids.trees.plans.logical.ProjectProcessor; +import org.apache.doris.nereids.types.DataType; +import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.collect.Sets; import java.util.ArrayList; import java.util.HashSet; import java.util.List; +import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; @@ -70,7 +84,7 @@ public Rule build() { // step 1. analyzed all cte plan Pair>> result = analyzeCte(logicalCTE, ctx.cascadesContext); CascadesContext outerCascadesCtx = CascadesContext.newContextWithCteContext( - ctx.cascadesContext, logicalCTE.child(), result.first); + ctx.cascadesContext, logicalCTE.child(), result.first, Optional.empty(), ImmutableList.of()); outerCascadesCtx.withPlanProcess(ctx.cascadesContext.showPlanProcess(), () -> { outerCascadesCtx.newAnalyzer().analyze(); }); @@ -95,27 +109,146 @@ private Pair>> analyzeCte( List> cteProducerPlans = new ArrayList<>(); for (LogicalSubQueryAlias aliasQuery : aliasQueries) { // we should use a chain to ensure visible of cte - LogicalPlan parsedCtePlan = (LogicalPlan) aliasQuery.child(); - CascadesContext innerCascadesCtx = CascadesContext.newContextWithCteContext( - cascadesContext, parsedCtePlan, outerCteCtx); - innerCascadesCtx.withPlanProcess(cascadesContext.showPlanProcess(), () -> { - innerCascadesCtx.newAnalyzer().analyze(); - }); - cascadesContext.addPlanProcesses(innerCascadesCtx.getPlanProcesses()); - LogicalPlan analyzedCtePlan = (LogicalPlan) innerCascadesCtx.getRewritePlan(); - checkColumnAlias(aliasQuery, analyzedCtePlan.getOutput()); - CTEId cteId = StatementScopeIdGenerator.newCTEId(); - LogicalSubQueryAlias logicalSubQueryAlias = - aliasQuery.withChildren(ImmutableList.of(analyzedCtePlan)); - outerCteCtx = new CTEContext(cteId, logicalSubQueryAlias, outerCteCtx); - outerCteCtx.setAnalyzedPlan(logicalSubQueryAlias); - LogicalCTEProducer cteProducer = new LogicalCTEProducer<>(cteId, logicalSubQueryAlias); - cascadesContext.getStatementContext().setCteProducer(cteId, cteProducer); - cteProducerPlans.add(cteProducer); + if (aliasQuery.isRecursiveCte() && logicalCTE.isRecursiveCte()) { + Pair> result = analyzeRecursiveCte(aliasQuery, outerCteCtx, + cascadesContext); + outerCteCtx = result.first; + cteProducerPlans.add(result.second); + } else { + LogicalPlan parsedCtePlan = (LogicalPlan) aliasQuery.child(); + CascadesContext innerCascadesCtx = CascadesContext.newContextWithCteContext( + cascadesContext, parsedCtePlan, outerCteCtx, Optional.empty(), ImmutableList.of()); + innerCascadesCtx.withPlanProcess(cascadesContext.showPlanProcess(), () -> { + innerCascadesCtx.newAnalyzer().analyze(); + }); + cascadesContext.addPlanProcesses(innerCascadesCtx.getPlanProcesses()); + LogicalPlan analyzedCtePlan = (LogicalPlan) innerCascadesCtx.getRewritePlan(); + checkColumnAlias(aliasQuery, analyzedCtePlan.getOutput()); + CTEId cteId = StatementScopeIdGenerator.newCTEId(); + LogicalSubQueryAlias logicalSubQueryAlias = aliasQuery + .withChildren(ImmutableList.of(analyzedCtePlan)); + outerCteCtx = new CTEContext(cteId, logicalSubQueryAlias, outerCteCtx); + outerCteCtx.setAnalyzedPlan(logicalSubQueryAlias); + cteProducerPlans.add(new LogicalCTEProducer<>(cteId, logicalSubQueryAlias)); + } } return Pair.of(outerCteCtx, cteProducerPlans); } + private Pair> analyzeRecursiveCte(LogicalSubQueryAlias aliasQuery, + CTEContext outerCteCtx, CascadesContext cascadesContext) { + Preconditions.checkArgument(aliasQuery.isRecursiveCte(), "alias query must be recursive cte"); + LogicalPlan parsedCtePlan = (LogicalPlan) aliasQuery.child(); + if (!(parsedCtePlan instanceof LogicalUnion) || parsedCtePlan.children().size() != 2) { + throw new AnalysisException(String.format("recursive cte must be union, don't support %s", + parsedCtePlan.getClass().getSimpleName())); + } + // analyze anchor child, its output list will be recursive cte temp table's schema + LogicalPlan anchorChild = (LogicalPlan) parsedCtePlan.child(0); + CascadesContext innerAnchorCascadesCtx = CascadesContext.newContextWithCteContext( + cascadesContext, anchorChild, outerCteCtx, Optional.of(aliasQuery.getAlias()), ImmutableList.of()); + innerAnchorCascadesCtx.withPlanProcess(cascadesContext.showPlanProcess(), () -> { + innerAnchorCascadesCtx.newAnalyzer().analyze(); + }); + cascadesContext.addPlanProcesses(innerAnchorCascadesCtx.getPlanProcesses()); + LogicalPlan analyzedAnchorChild = (LogicalPlan) innerAnchorCascadesCtx.getRewritePlan(); + Set recursiveCteScans = analyzedAnchorChild + .collect(LogicalRecursiveCteScan.class::isInstance); + for (LogicalRecursiveCteScan cteScan : recursiveCteScans) { + if (cteScan.getTable().getName().equalsIgnoreCase(aliasQuery.getAlias())) { + throw new AnalysisException( + String.format("recursive reference to query %s must not appear within its non-recursive term", + aliasQuery.getAlias())); + } + } + checkColumnAlias(aliasQuery, analyzedAnchorChild.getOutput()); + // make all output nullable + analyzedAnchorChild = forceOutputNullable(analyzedAnchorChild, + aliasQuery.getColumnAliases().orElse(ImmutableList.of())); + // analyze recursive child + LogicalPlan recursiveChild = (LogicalPlan) parsedCtePlan.child(1); + CascadesContext innerRecursiveCascadesCtx = CascadesContext.newContextWithCteContext( + cascadesContext, recursiveChild, outerCteCtx, Optional.of(aliasQuery.getAlias()), + analyzedAnchorChild.getOutput()); + innerRecursiveCascadesCtx.withPlanProcess(cascadesContext.showPlanProcess(), () -> { + innerRecursiveCascadesCtx.newAnalyzer().analyze(); + }); + cascadesContext.addPlanProcesses(innerRecursiveCascadesCtx.getPlanProcesses()); + LogicalPlan analyzedRecursiveChild = (LogicalPlan) innerRecursiveCascadesCtx.getRewritePlan(); + List recursiveCteScanList = analyzedRecursiveChild + .collectToList(LogicalRecursiveCteScan.class::isInstance); + if (recursiveCteScanList.size() > 1) { + throw new AnalysisException(String.format("recursive reference to query %s must not appear more than once", + aliasQuery.getAlias())); + } + List anchorChildOutputs = analyzedAnchorChild.getOutput(); + List anchorChildOutputTypes = new ArrayList<>(anchorChildOutputs.size()); + for (Slot slot : anchorChildOutputs) { + anchorChildOutputTypes.add(slot.getDataType()); + } + List recursiveChildOutputs = analyzedRecursiveChild.getOutput(); + for (int i = 0; i < recursiveChildOutputs.size(); ++i) { + if (!recursiveChildOutputs.get(i).getDataType().equals(anchorChildOutputTypes.get(i))) { + throw new AnalysisException(String.format("%s recursive child's %d column's datatype in select list %s " + + "is different from anchor child's output datatype %s, please add cast manually " + + "to get expect datatype", aliasQuery.getAlias(), i + 1, + recursiveChildOutputs.get(i).getDataType(), anchorChildOutputTypes.get(i))); + } + } + analyzedRecursiveChild = new LogicalRecursiveCteRecursiveChild<>(aliasQuery.getAlias(), + forceOutputNullable(analyzedRecursiveChild, ImmutableList.of())); + + // create LogicalRecursiveCte + LogicalUnion logicalUnion = (LogicalUnion) parsedCtePlan; + LogicalRecursiveCte analyzedCtePlan = new LogicalRecursiveCte(aliasQuery.getAlias(), + logicalUnion.getQualifier() == SetOperation.Qualifier.ALL, + ImmutableList.of(analyzedAnchorChild, analyzedRecursiveChild)); + List> childrenProjections = analyzedCtePlan.collectChildrenProjections(); + int childrenProjectionSize = childrenProjections.size(); + ImmutableList.Builder> childrenOutputs = ImmutableList + .builderWithExpectedSize(childrenProjectionSize); + ImmutableList.Builder newChildren = ImmutableList.builderWithExpectedSize(childrenProjectionSize); + for (int i = 0; i < childrenProjectionSize; i++) { + Plan newChild; + Plan child = analyzedCtePlan.child(i); + if (childrenProjections.get(i).stream().allMatch(SlotReference.class::isInstance)) { + newChild = child; + } else { + List parentProject = childrenProjections.get(i); + newChild = ProjectProcessor.tryProcessProject(parentProject, child) + .orElseGet(() -> new LogicalProject<>(parentProject, child)); + } + newChildren.add(newChild); + childrenOutputs.add((List) (List) newChild.getOutput()); + } + analyzedCtePlan = analyzedCtePlan.withChildrenAndTheirOutputs(newChildren.build(), childrenOutputs.build()); + List newOutputs = analyzedCtePlan.buildNewOutputs(); + analyzedCtePlan = analyzedCtePlan.withNewOutputs(newOutputs); + + CTEId cteId = StatementScopeIdGenerator.newCTEId(); + LogicalSubQueryAlias logicalSubQueryAlias = aliasQuery.withChildren(ImmutableList.of(analyzedCtePlan)); + outerCteCtx = new CTEContext(cteId, logicalSubQueryAlias, outerCteCtx); + outerCteCtx.setAnalyzedPlan(logicalSubQueryAlias); + LogicalCTEProducer cteProducer = new LogicalCTEProducer<>(cteId, logicalSubQueryAlias); + return Pair.of(outerCteCtx, cteProducer); + } + + private LogicalPlan forceOutputNullable(LogicalPlan logicalPlan, List aliasNames) { + List oldOutputs = logicalPlan.getOutput(); + int size = oldOutputs.size(); + List newOutputs = new ArrayList<>(oldOutputs.size()); + if (!aliasNames.isEmpty()) { + for (int i = 0; i < size; ++i) { + newOutputs.add(new Alias(new Nullable(oldOutputs.get(i)), aliasNames.get(i))); + } + } else { + for (Slot slot : oldOutputs) { + newOutputs.add(new Alias(new Nullable(slot), slot.getName())); + } + } + return new LogicalProject<>(newOutputs, logicalPlan); + } + /** * check columnAliases' size and name */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java index 53a9e24ad50af7..fc9883a9711fa4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java @@ -27,6 +27,7 @@ import org.apache.doris.catalog.KeysType; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; +import org.apache.doris.catalog.RecursiveCteTempTable; import org.apache.doris.catalog.SchemaTable; import org.apache.doris.catalog.SchemaTable.SchemaColumn; import org.apache.doris.catalog.TableIf; @@ -91,6 +92,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalOdbcScan; import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.trees.plans.logical.LogicalRecursiveCteScan; import org.apache.doris.nereids.trees.plans.logical.LogicalSchemaScan; import org.apache.doris.nereids.trees.plans.logical.LogicalSubQueryAlias; import org.apache.doris.nereids.trees.plans.logical.LogicalTVFRelation; @@ -172,16 +174,32 @@ private LogicalPlan bindWithCurrentDb(CascadesContext cascadesContext, UnboundRe return consumer; } } - List tableQualifier = RelationUtil.getQualifierName( - cascadesContext.getConnectContext(), unboundRelation.getNameParts()); - TableIf table = cascadesContext.getStatementContext().getAndCacheTable(tableQualifier, TableFrom.QUERY, - Optional.of(unboundRelation)); + LogicalPlan scan; + if (tableName.equalsIgnoreCase(cascadesContext.getCurrentRecursiveCteName().orElse(""))) { + if (cascadesContext.isAnalyzingRecursiveCteAnchorChild()) { + throw new AnalysisException( + String.format("recursive reference to query %s must not appear within its non-recursive term", + cascadesContext.getCurrentRecursiveCteName().get())); + } + ImmutableList.Builder schema = new ImmutableList.Builder<>(); + for (Slot slot : cascadesContext.getRecursiveCteOutputs()) { + schema.add(new Column(slot.getName(), slot.getDataType().toCatalogDataType(), slot.nullable())); + } + RecursiveCteTempTable cteTempTable = new RecursiveCteTempTable(tableName, schema.build()); + scan = new LogicalRecursiveCteScan(cascadesContext.getStatementContext().getNextRelationId(), + cteTempTable, unboundRelation.getNameParts()); + } else { + List tableQualifier = RelationUtil.getQualifierName( + cascadesContext.getConnectContext(), unboundRelation.getNameParts()); + TableIf table = cascadesContext.getStatementContext().getAndCacheTable(tableQualifier, TableFrom.QUERY, + Optional.of(unboundRelation)); - LogicalPlan scan = getLogicalPlan(table, unboundRelation, tableQualifier, cascadesContext); - if (cascadesContext.isLeadingJoin()) { - LeadingHint leading = (LeadingHint) cascadesContext.getHintMap().get("Leading"); - leading.putRelationIdAndTableName(Pair.of(unboundRelation.getRelationId(), tableName)); - leading.getRelationIdToScanMap().put(unboundRelation.getRelationId(), scan); + scan = getLogicalPlan(table, unboundRelation, tableQualifier, cascadesContext); + if (cascadesContext.isLeadingJoin()) { + LeadingHint leading = (LeadingHint) cascadesContext.getHintMap().get("Leading"); + leading.putRelationIdAndTableName(Pair.of(unboundRelation.getRelationId(), tableName)); + leading.getRelationIdToScanMap().put(unboundRelation.getRelationId(), scan); + } } return scan; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CollectRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CollectRelation.java index 0e15e4dbec027c..52418a85f5f9c9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CollectRelation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CollectRelation.java @@ -108,8 +108,10 @@ private CTEContext collectFromCte( for (LogicalSubQueryAlias aliasQuery : aliasQueries) { // we should use a chain to ensure visible of cte LogicalPlan parsedCtePlan = (LogicalPlan) aliasQuery.child(); + // 看起来需要在CascadesContext中添加当前CTE的name,以便判断自引用 CascadesContext innerCascadesCtx = CascadesContext.newContextWithCteContext( - cascadesContext, parsedCtePlan, outerCteCtx); + cascadesContext, parsedCtePlan, outerCteCtx, aliasQuery.isRecursiveCte() + ? Optional.of(aliasQuery.getAlias()) : Optional.empty(), ImmutableList.of()); innerCascadesCtx.newTableCollector(true).collect(); LogicalPlan analyzedCtePlan = (LogicalPlan) innerCascadesCtx.getRewritePlan(); // cteId is not used in CollectTable stage @@ -128,7 +130,8 @@ private Plan collectFromAny(MatchingContext ctx) { if (e instanceof SubqueryExpr) { SubqueryExpr subqueryExpr = (SubqueryExpr) e; CascadesContext subqueryContext = CascadesContext.newContextWithCteContext( - ctx.cascadesContext, subqueryExpr.getQueryPlan(), ctx.cteContext); + ctx.cascadesContext, subqueryExpr.getQueryPlan(), ctx.cteContext, Optional.empty(), + ImmutableList.of()); subqueryContext.keepOrShowPlanProcess(ctx.cascadesContext.showPlanProcess(), () -> subqueryContext.newTableCollector(true).collect()); ctx.cascadesContext.addPlanProcesses(subqueryContext.getPlanProcesses()); @@ -184,6 +187,10 @@ private void collectFromUnboundRelation(CascadesContext cascadesContext, List nameParts, TableFrom tableFrom, Optional unboundRelation) { if (nameParts.size() == 1) { String tableName = nameParts.get(0); + if (cascadesContext.getCurrentRecursiveCteName().isPresent() + && tableName.equalsIgnoreCase(cascadesContext.getCurrentRecursiveCteName().get())) { + return; + } // check if it is a CTE's name CTEContext cteContext = cascadesContext.getCteContext().findCTEContext(tableName).orElse(null); if (cteContext != null) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubExprAnalyzer.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubExprAnalyzer.java index 69d79934992c54..05a35dc6ed8460 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubExprAnalyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubExprAnalyzer.java @@ -52,6 +52,7 @@ import java.util.HashSet; import java.util.List; import java.util.Objects; +import java.util.Optional; import java.util.Set; /** @@ -227,7 +228,8 @@ private AnalyzedResult analyzeSubquery(SubqueryExpr expr) { throw new IllegalStateException("Missing CascadesContext"); } CascadesContext subqueryContext = CascadesContext.newContextWithCteContext( - cascadesContext, expr.getQueryPlan(), cascadesContext.getCteContext()); + cascadesContext, expr.getQueryPlan(), cascadesContext.getCteContext(), Optional.empty(), + ImmutableList.of()); // don't use `getScope()` because we only need `getScope().getOuterScope()` and `getScope().getSlots()` // otherwise unexpected errors may occur Scope subqueryScope = new Scope(getScope().getOuterScope(), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalRecursiveCteRecursiveChildToPhysicalRecursiveCteRecursiveChild.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalRecursiveCteRecursiveChildToPhysicalRecursiveCteRecursiveChild.java new file mode 100644 index 00000000000000..689e550fa3a113 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalRecursiveCteRecursiveChildToPhysicalRecursiveCteRecursiveChild.java @@ -0,0 +1,37 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.implementation; + +import org.apache.doris.nereids.rules.Rule; +import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.plans.physical.PhysicalRecursiveCteRecursiveChild; + +/** + * Implementation rule that convert logical recursive cte's recursive child to physical recursive child. + */ +public class LogicalRecursiveCteRecursiveChildToPhysicalRecursiveCteRecursiveChild + extends OneImplementationRuleFactory { + @Override + public Rule build() { + return logicalRecursiveCteRecursiveChild().then(recursiveCte -> new PhysicalRecursiveCteRecursiveChild( + recursiveCte.getCteName(), + recursiveCte.getLogicalProperties(), + recursiveCte.child())) + .toRule(RuleType.LOGICAL_RECURSIVE_CTE_RECURSIVE_CHILD_TO_PHYSICAL_RECURSIVE_CTE_RECURSIVE_CHILD); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalRecursiveCteScanToPhysicalRecursiveCteScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalRecursiveCteScanToPhysicalRecursiveCteScan.java new file mode 100644 index 00000000000000..8714c280bb9064 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalRecursiveCteScanToPhysicalRecursiveCteScan.java @@ -0,0 +1,42 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.implementation; + +import org.apache.doris.nereids.rules.Rule; +import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.plans.physical.PhysicalRecursiveCteScan; + +import java.util.Optional; + +/** + * Implementation rule that convert logical Recursive Cte Scan to physical Recursive Cte Scan. + */ +public class LogicalRecursiveCteScanToPhysicalRecursiveCteScan extends OneImplementationRuleFactory { + @Override + public Rule build() { + return logicalRecursiveCteScan().then(recursiveCteScan -> + new PhysicalRecursiveCteScan( + recursiveCteScan.getRelationId(), + recursiveCteScan.getTable(), + recursiveCteScan.getQualifier(), + Optional.empty(), + recursiveCteScan.getLogicalProperties(), + recursiveCteScan.getOperativeSlots()) + ).toRule(RuleType.LOGICAL_RECURSIVE_CTE_SCAN_TO_PHYSICAL_RECUSIVE_CTE_SCAN_RULE); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalRecursiveCteToPhysicalRecursiveCte.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalRecursiveCteToPhysicalRecursiveCte.java new file mode 100644 index 00000000000000..12ac89df19c644 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalRecursiveCteToPhysicalRecursiveCte.java @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.implementation; + +import org.apache.doris.nereids.rules.Rule; +import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.plans.physical.PhysicalRecursiveCte; + +/** + * Implementation rule that convert logical Recursive Cte to Physical Recursive Cte. + */ +public class LogicalRecursiveCteToPhysicalRecursiveCte extends OneImplementationRuleFactory { + @Override + public Rule build() { + return logicalRecursiveCte().then(recursiveCte -> + new PhysicalRecursiveCte(recursiveCte.getCteName(), + recursiveCte.isUnionAll(), + recursiveCte.getOutputs(), + recursiveCte.getRegularChildrenOutputs(), + recursiveCte.getLogicalProperties(), + recursiveCte.children()) + ).toRule(RuleType.LOGICAL_RECURSIVE_CTE_TO_PHYSICAL_RECURSIVE_CTE); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AdjustNullable.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AdjustNullable.java index c8016a493c678d..24d70ca3bef76e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AdjustNullable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AdjustNullable.java @@ -39,6 +39,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalPartitionTopN; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.logical.LogicalRecursiveCte; import org.apache.doris.nereids.trees.plans.logical.LogicalRepeat; import org.apache.doris.nereids.trees.plans.logical.LogicalSetOperation; import org.apache.doris.nereids.trees.plans.logical.LogicalSink; @@ -288,6 +289,53 @@ public Plan visitLogicalRepeat(LogicalRepeat repeat, Map replaceMap) { + recursiveCte = (LogicalRecursiveCte) super.visit(recursiveCte, replaceMap); + ImmutableList.Builder> newChildrenOutputs = ImmutableList.builder(); + List inputNullable = null; + if (!recursiveCte.children().isEmpty()) { + inputNullable = Lists.newArrayListWithCapacity(recursiveCte.getOutputs().size()); + for (int i = 0; i < recursiveCte.getOutputs().size(); i++) { + inputNullable.add(false); + } + for (int i = 0; i < recursiveCte.arity(); i++) { + List childOutput = recursiveCte.child(i).getOutput(); + List setChildOutput = recursiveCte.getRegularChildOutput(i); + ImmutableList.Builder newChildOutputs = ImmutableList.builder(); + for (int j = 0; j < setChildOutput.size(); j++) { + for (Slot slot : childOutput) { + if (slot.getExprId().equals(setChildOutput.get(j).getExprId())) { + inputNullable.set(j, slot.nullable() || inputNullable.get(j)); + newChildOutputs.add((SlotReference) slot); + break; + } + } + } + newChildrenOutputs.add(newChildOutputs.build()); + } + } + if (inputNullable == null) { + // this is a fail-safe + // means there is no children and having no getConstantExprsList + // no way to update the nullable flag, so just do nothing + return recursiveCte; + } + List outputs = recursiveCte.getOutputs(); + List newOutputs = Lists.newArrayListWithCapacity(outputs.size()); + for (int i = 0; i < inputNullable.size(); i++) { + NamedExpression ne = outputs.get(i); + Slot slot = ne instanceof Alias ? (Slot) ((Alias) ne).child() : (Slot) ne; + slot = slot.withNullable(inputNullable.get(i)); + NamedExpression newOutput = ne instanceof Alias ? (NamedExpression) ne.withChildren(slot) : slot; + newOutputs.add(newOutput); + replaceMap.put(newOutput.getExprId(), newOutput.toSlot()); + } + return recursiveCte.withNewOutputs(newOutputs) + .withChildrenAndTheirOutputs(recursiveCte.children(), newChildrenOutputs.build()) + .recomputeLogicalProperties(); + } + @Override public Plan visitLogicalSetOperation(LogicalSetOperation setOperation, Map replaceMap) { setOperation = (LogicalSetOperation) super.visit(setOperation, replaceMap); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/CTEInline.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/CTEInline.java index 22ec72c99c5559..9bbcb2e1e8d131 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/CTEInline.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/CTEInline.java @@ -31,15 +31,19 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalCTEProducer; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.logical.LogicalRecursiveCteRecursiveChild; import org.apache.doris.nereids.trees.plans.visitor.CustomRewriter; import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter; import org.apache.doris.qe.ConnectContext; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; +import com.google.common.collect.Sets; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Set; /** * pull up LogicalCteAnchor to the top of plan to avoid CteAnchor break other rewrite rules pattern @@ -48,9 +52,13 @@ * and put all of them to the top of plan depends on dependency tree of them. */ public class CTEInline extends DefaultPlanRewriter> implements CustomRewriter { + // all cte used by recursive cte's recursive child should be inline + private Set mustInlineCteConsumers = new HashSet<>(); @Override public Plan rewriteRoot(Plan plan, JobContext jobContext) { + collectMustInlineCteConsumers(plan, false, mustInlineCteConsumers); + Plan root = plan.accept(this, null); // collect cte id to consumer root.foreach(p -> { @@ -78,17 +86,24 @@ public Plan visitLogicalCTEAnchor(LogicalCTEAnchor connectContext.getSessionVariable().inlineCTEReferencedThreshold) { - // not inline - Plan right = cteAnchor.right().accept(this, null); - return cteAnchor.withChildren(cteAnchor.left(), right); - } else { + if (!Sets.intersection(mustInlineCteConsumers, Sets.newHashSet(consumers)).isEmpty()) { // should inline Plan root = cteAnchor.right().accept(this, (LogicalCTEProducer) cteAnchor.left()); // process child return root.accept(this, null); + } else { + ConnectContext connectContext = ConnectContext.get(); + if (connectContext.getSessionVariable().enableCTEMaterialize + && consumers.size() > connectContext.getSessionVariable().inlineCTEReferencedThreshold) { + // not inline + Plan right = cteAnchor.right().accept(this, null); + return cteAnchor.withChildren(cteAnchor.left(), right); + } else { + // should inline + Plan root = cteAnchor.right().accept(this, (LogicalCTEProducer) cteAnchor.left()); + // process child + return root.accept(this, null); + } } } } @@ -113,4 +128,19 @@ public Plan visitLogicalCTEConsumer(LogicalCTEConsumer cteConsumer, LogicalCTEPr } return cteConsumer; } + + private void collectMustInlineCteConsumers(Plan planNode, boolean needCollect, + Set cteConsumers) { + if (planNode instanceof LogicalCTEConsumer) { + if (needCollect) { + cteConsumers.add((LogicalCTEConsumer) planNode); + } + } else if (planNode instanceof LogicalRecursiveCteRecursiveChild) { + collectMustInlineCteConsumers(planNode.child(0), true, cteConsumers); + } else { + for (Plan child : planNode.children()) { + collectMustInlineCteConsumers(child, needCollect, cteConsumers); + } + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ColumnPruning.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ColumnPruning.java index 4597e3d270714a..7b4a8d9480f139 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ColumnPruning.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ColumnPruning.java @@ -44,6 +44,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; import org.apache.doris.nereids.trees.plans.logical.LogicalIntersect; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.logical.LogicalRecursiveCte; import org.apache.doris.nereids.trees.plans.logical.LogicalRepeat; import org.apache.doris.nereids.trees.plans.logical.LogicalSink; import org.apache.doris.nereids.trees.plans.logical.LogicalUnion; @@ -211,6 +212,12 @@ public Plan visitLogicalProject(LogicalProject project, PruneCon return pruneChildren(plan, new RoaringBitmap()); } + @Override + public Plan visitLogicalRecursiveCte(LogicalRecursiveCte recursiveCte, PruneContext context) { + // keep LogicalRecursiveCte's output unchanged + return skipPruneThis(recursiveCte); + } + // union can not prune children by the common logic, we must override visit method to write special code. @Override public Plan visitLogicalUnion(LogicalUnion union, PruneContext context) { @@ -388,6 +395,54 @@ public

P pruneOutput(P plan, List originOutput } } + private LogicalRecursiveCte pruneRecursiveCteOutput(LogicalRecursiveCte recursiveCte, PruneContext context) { + List originOutput = recursiveCte.getOutputs(); + if (originOutput.isEmpty()) { + return recursiveCte; + } + List prunedOutputs = Lists.newArrayList(); + List> regularChildrenOutputs = recursiveCte.getRegularChildrenOutputs(); + List children = recursiveCte.children(); + List extractColumnIndex = Lists.newArrayList(); + for (int i = 0; i < originOutput.size(); i++) { + NamedExpression output = originOutput.get(i); + if (context.requiredSlotsIds.contains(output.getExprId().asInt())) { + prunedOutputs.add(output); + extractColumnIndex.add(i); + } + } + + if (prunedOutputs.isEmpty()) { + // process prune all columns + NamedExpression originSlot = originOutput.get(0); + prunedOutputs = ImmutableList.of(new SlotReference(originSlot.getExprId(), originSlot.getName(), + TinyIntType.INSTANCE, false, originSlot.getQualifier())); + regularChildrenOutputs = Lists.newArrayListWithCapacity(regularChildrenOutputs.size()); + children = Lists.newArrayListWithCapacity(children.size()); + for (int i = 0; i < recursiveCte.children().size(); i++) { + Plan child = recursiveCte.child(i); + List newProjectOutput = ImmutableList.of(new Alias(new TinyIntLiteral((byte) 1))); + LogicalProject project; + if (child instanceof LogicalProject) { + LogicalProject childProject = (LogicalProject) child; + List mergeProjections = PlanUtils.mergeProjections( + childProject.getProjects(), newProjectOutput); + project = new LogicalProject<>(mergeProjections, childProject.child()); + } else { + project = new LogicalProject<>(newProjectOutput, child); + } + regularChildrenOutputs.add((List) project.getOutput()); + children.add(project); + } + } + + if (prunedOutputs.equals(originOutput) && !context.requiredSlotsIds.isEmpty()) { + return recursiveCte; + } else { + return recursiveCte.withNewOutputsAndChildren(prunedOutputs, children, regularChildrenOutputs); + } + } + private LogicalUnion pruneUnionOutput(LogicalUnion union, PruneContext context) { List originOutput = union.getOutputs(); if (originOutput.isEmpty()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index fc95e3a4078e7a..7f112c4b4f8d0e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -60,6 +60,7 @@ import org.apache.doris.nereids.trees.plans.algebra.OlapScan; import org.apache.doris.nereids.trees.plans.algebra.PartitionTopN; import org.apache.doris.nereids.trees.plans.algebra.Project; +import org.apache.doris.nereids.trees.plans.algebra.RecursiveCte; import org.apache.doris.nereids.trees.plans.algebra.Relation; import org.apache.doris.nereids.trees.plans.algebra.Repeat; import org.apache.doris.nereids.trees.plans.algebra.SetOperation; @@ -90,6 +91,9 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalOneRowRelation; import org.apache.doris.nereids.trees.plans.logical.LogicalPartitionTopN; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.logical.LogicalRecursiveCte; +import org.apache.doris.nereids.trees.plans.logical.LogicalRecursiveCteRecursiveChild; +import org.apache.doris.nereids.trees.plans.logical.LogicalRecursiveCteScan; import org.apache.doris.nereids.trees.plans.logical.LogicalRepeat; import org.apache.doris.nereids.trees.plans.logical.LogicalSchemaScan; import org.apache.doris.nereids.trees.plans.logical.LogicalSink; @@ -123,6 +127,9 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalPartitionTopN; import org.apache.doris.nereids.trees.plans.physical.PhysicalProject; import org.apache.doris.nereids.trees.plans.physical.PhysicalQuickSort; +import org.apache.doris.nereids.trees.plans.physical.PhysicalRecursiveCte; +import org.apache.doris.nereids.trees.plans.physical.PhysicalRecursiveCteRecursiveChild; +import org.apache.doris.nereids.trees.plans.physical.PhysicalRecursiveCteScan; import org.apache.doris.nereids.trees.plans.physical.PhysicalRelation; import org.apache.doris.nereids.trees.plans.physical.PhysicalRepeat; import org.apache.doris.nereids.trees.plans.physical.PhysicalSchemaScan; @@ -876,6 +883,12 @@ public Statistics visitLogicalEsScan(LogicalEsScan esScan, Void context) { return computeCatalogRelation(esScan); } + @Override + public Statistics visitLogicalRecursiveCteScan(LogicalRecursiveCteScan recursiveCteScan, Void context) { + recursiveCteScan.getExpressions(); + return computeCatalogRelation(recursiveCteScan); + } + @Override public Statistics visitLogicalProject(LogicalProject project, Void context) { return computeProject(project, groupExpression.childStatistics(0)); @@ -919,6 +932,20 @@ public Statistics visitLogicalAssertNumRows( return computeAssertNumRows(assertNumRows.getAssertNumRowsElement(), groupExpression.childStatistics(0)); } + @Override + public Statistics visitLogicalRecursiveCte( + LogicalRecursiveCte recursiveCte, Void context) { + return computeRecursiveCte(recursiveCte, + groupExpression.children() + .stream().map(Group::getStatistics).collect(Collectors.toList())); + } + + @Override + public Statistics visitLogicalRecursiveCteRecursiveChild(LogicalRecursiveCteRecursiveChild recursiveChild, + Void context) { + return groupExpression.childStatistics(0); + } + @Override public Statistics visitLogicalUnion( LogicalUnion union, Void context) { @@ -1004,6 +1031,11 @@ public Statistics visitPhysicalSchemaScan(PhysicalSchemaScan schemaScan, Void co return computeCatalogRelation(schemaScan); } + @Override + public Statistics visitPhysicalRecursiveCteScan(PhysicalRecursiveCteScan recursiveCteScan, Void context) { + return computeCatalogRelation(recursiveCteScan); + } + @Override public Statistics visitPhysicalFileScan(PhysicalFileScan fileScan, Void context) { return computeCatalogRelation(fileScan); @@ -1090,6 +1122,18 @@ public Statistics visitPhysicalAssertNumRows(PhysicalAssertNumRows childStats) { + // TODO: refactor this for one row relation + List head; + Statistics headStats; + List> childOutputs = Lists.newArrayList(recursiveCte.getRegularChildrenOutputs()); + + head = childOutputs.get(0); + headStats = new StatisticsBuilder(childStats.get(0)).build(); + + StatisticsBuilder statisticsBuilder = new StatisticsBuilder(); + List unionOutput = recursiveCte.getOutputs(); + double unionRowCount = childStats.stream().mapToDouble(Statistics::getRowCount).sum(); + statisticsBuilder.setRowCount(unionRowCount); + + for (int i = 0; i < head.size(); i++) { + Slot headSlot = head.get(i); + ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder( + headStats.findColumnStatistics(headSlot)); + for (int j = 1; j < childOutputs.size(); j++) { + Slot slot = childOutputs.get(j).get(i); + ColumnStatistic rightStatistic = childStats.get(j).findColumnStatistics(slot); + double rightRowCount = childStats.get(j).getRowCount(); + colStatsBuilder = unionColumn(colStatsBuilder, + headStats.getRowCount(), rightStatistic, rightRowCount, headSlot.getDataType()); + } + + //update hot values + Map unionHotValues = new HashMap<>(); + for (int j = 0; j < childOutputs.size(); j++) { + Slot slot = childOutputs.get(j).get(i); + ColumnStatistic slotStats = childStats.get(j).findColumnStatistics(slot); + if (slotStats.getHotValues() != null) { + for (Map.Entry entry : slotStats.getHotValues().entrySet()) { + Float value = unionHotValues.get(entry.getKey()); + if (value == null) { + unionHotValues.put(entry.getKey(), + (float) (entry.getValue() * childStats.get(j).getRowCount())); + } else { + unionHotValues.put(entry.getKey(), + (float) (value + entry.getValue() * childStats.get(j).getRowCount())); + } + } + } + } + + Map resultHotValues = new LinkedHashMap<>(); + for (Literal hot : unionHotValues.keySet()) { + float ratio = (float) (unionHotValues.get(hot) / unionRowCount); + if (ratio * colStatsBuilder.getNdv() >= SessionVariable.getSkewValueThreshold() + || ratio >= SessionVariable.getHotValueThreshold()) { + resultHotValues.put(hot, ratio); + } + } + if (!resultHotValues.isEmpty()) { + colStatsBuilder.setHotValues(resultHotValues); + } + statisticsBuilder.putColumnStatistics(unionOutput.get(i), colStatsBuilder.build()); + } + + return statisticsBuilder.setWidthInJoinCluster(1).build(); + } + /** * computeUnion */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/copier/LogicalPlanDeepCopier.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/copier/LogicalPlanDeepCopier.java index b4e13c7d4b9e48..a29ba438a49b01 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/copier/LogicalPlanDeepCopier.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/copier/LogicalPlanDeepCopier.java @@ -53,6 +53,8 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalPartitionTopN; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.logical.LogicalRecursiveCte; +import org.apache.doris.nereids.trees.plans.logical.LogicalRecursiveCteRecursiveChild; import org.apache.doris.nereids.trees.plans.logical.LogicalRelation; import org.apache.doris.nereids.trees.plans.logical.LogicalRepeat; import org.apache.doris.nereids.trees.plans.logical.LogicalSink; @@ -371,6 +373,30 @@ public Plan visitLogicalUnion(LogicalUnion union, DeepCopierContext context) { constantExprsList, union.hasPushedFilter(), children); } + @Override + public Plan visitLogicalRecursiveCte(LogicalRecursiveCte recursiveCte, DeepCopierContext context) { + List children = recursiveCte.children().stream() + .map(c -> c.accept(this, context)) + .collect(ImmutableList.toImmutableList()); + List outputs = recursiveCte.getOutputs().stream() + .map(o -> (NamedExpression) ExpressionDeepCopier.INSTANCE.deepCopy(o, context)) + .collect(ImmutableList.toImmutableList()); + List> childrenOutputs = recursiveCte.getRegularChildrenOutputs().stream() + .map(childOutputs -> childOutputs.stream() + .map(o -> (SlotReference) ExpressionDeepCopier.INSTANCE.deepCopy(o, context)) + .collect(ImmutableList.toImmutableList())) + .collect(ImmutableList.toImmutableList()); + return new LogicalRecursiveCte(recursiveCte.getCteName(), recursiveCte.isUnionAll(), outputs, + childrenOutputs, children); + } + + @Override + public Plan visitLogicalRecursiveCteRecursiveChild(LogicalRecursiveCteRecursiveChild recursiveChild, + DeepCopierContext context) { + Plan child = recursiveChild.child().accept(this, context); + return new LogicalRecursiveCteRecursiveChild<>(recursiveChild.getCteName(), child); + } + @Override public Plan visitLogicalExcept(LogicalExcept except, DeepCopierContext context) { List children = except.children().stream() diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java index 949283816a2536..23b35776b12605 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java @@ -89,6 +89,9 @@ public enum PlanType { LOGICAL_PARTITION_TOP_N, LOGICAL_PROJECT, LOGICAL_QUALIFY, + LOGICAL_RECURSIVE_CTE, + LOGICAL_RECURSIVE_CTE_RECURSIVE_CHILD, + LOGICAL_RECURSIVE_CTE_SCAN, LOGICAL_REPEAT, LOGICAL_SELECT_HINT, LOGICAL_SUBQUERY_ALIAS, @@ -113,6 +116,7 @@ public enum PlanType { PHYSICAL_OLAP_SCAN, PHYSICAL_SCHEMA_SCAN, PHYSICAL_TVF_RELATION, + PHYSICAL_RECURSIVE_CTE_SCAN, // physical sinks PHYSICAL_FILE_SINK, @@ -133,6 +137,8 @@ public enum PlanType { PHYSICAL_ASSERT_NUM_ROWS, PHYSICAL_CTE_PRODUCER, PHYSICAL_CTE_ANCHOR, + PHYSICAL_RECURSIVE_CTE, + PHYSICAL_RECURSIVE_CTE_RECURSIVE_CHILD, PHYSICAL_DISTRIBUTE, PHYSICAL_EXCEPT, PHYSICAL_FILTER, diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/RecursiveCte.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/RecursiveCte.java new file mode 100644 index 00000000000000..d7fd8f985a1d93 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/RecursiveCte.java @@ -0,0 +1,36 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.plans.algebra; + +import org.apache.doris.nereids.trees.expressions.NamedExpression; +import org.apache.doris.nereids.trees.expressions.SlotReference; + +import java.util.List; + +/** + * Common interface for logical/physical recursive cte. + */ +public interface RecursiveCte { + boolean isUnionAll(); + + List getRegularChildOutput(int i); + + List getOutputs(); + + List> getRegularChildrenOutputs(); +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/UpdateMvByPartitionCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/UpdateMvByPartitionCommand.java index 3ec8aa36f3b363..31f11a903f1b21 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/UpdateMvByPartitionCommand.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/UpdateMvByPartitionCommand.java @@ -269,7 +269,8 @@ public Plan visitLogicalCTE(LogicalCTE cte, PredicateAddContext ); rewrittenSubQueryAlias.add(subQueryAlias.withChildren(subQueryAliasChildren)); } - return super.visitLogicalCTE(new LogicalCTE<>(rewrittenSubQueryAlias, cte.child()), predicates); + return super.visitLogicalCTE(new LogicalCTE<>(cte.isRecursiveCte(), + rewrittenSubQueryAlias, cte.child()), predicates); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/distribute/worker/job/UnassignedJobBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/distribute/worker/job/UnassignedJobBuilder.java index bc20d3efa17590..1229bc59455c72 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/distribute/worker/job/UnassignedJobBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/distribute/worker/job/UnassignedJobBuilder.java @@ -29,6 +29,7 @@ import org.apache.doris.planner.PlanFragment; import org.apache.doris.planner.PlanFragmentId; import org.apache.doris.planner.PlanNodeId; +import org.apache.doris.planner.RecursiveCteScanNode; import org.apache.doris.planner.ScanNode; import org.apache.doris.planner.SchemaScanNode; import org.apache.doris.thrift.TExplainLevel; @@ -126,6 +127,10 @@ private UnassignedJob buildLeafOrScanJob( unassignedJob = buildScanMetadataJob( statementContext, planFragment, (SchemaScanNode) scanNode, scanWorkerSelector ); + } else if (scanNode instanceof RecursiveCteScanNode) { + unassignedJob = buildScanRecursiveCteJob( + statementContext, planFragment, (RecursiveCteScanNode) scanNode, inputJobs, scanWorkerSelector + ); } else { // only scan external tables or cloud tables or table valued functions // e,g. select * from numbers('number'='100') @@ -196,6 +201,14 @@ private UnassignedJob buildScanMetadataJob( return new UnassignedScanMetadataJob(statementContext, fragment, schemaScanNode, scanWorkerSelector); } + private UnassignedJob buildScanRecursiveCteJob( + StatementContext statementContext, PlanFragment fragment, + RecursiveCteScanNode recursiveCteScanNode, + ListMultimap inputJobs, ScanWorkerSelector scanWorkerSelector) { + return new UnassignedRecursiveCteScanJob(statementContext, fragment, recursiveCteScanNode, + inputJobs, scanWorkerSelector); + } + private UnassignedJob buildScanRemoteTableJob( StatementContext statementContext, PlanFragment planFragment, List scanNodes, ListMultimap inputJobs, diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/distribute/worker/job/UnassignedRecursiveCteScanJob.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/distribute/worker/job/UnassignedRecursiveCteScanJob.java new file mode 100644 index 00000000000000..cfd3ebe7b2b336 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/distribute/worker/job/UnassignedRecursiveCteScanJob.java @@ -0,0 +1,65 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.plans.distribute.worker.job; + +import org.apache.doris.nereids.StatementContext; +import org.apache.doris.nereids.trees.plans.distribute.DistributeContext; +import org.apache.doris.nereids.trees.plans.distribute.worker.DistributedPlanWorker; +import org.apache.doris.nereids.trees.plans.distribute.worker.DistributedPlanWorkerManager; +import org.apache.doris.nereids.trees.plans.distribute.worker.ScanWorkerSelector; +import org.apache.doris.planner.ExchangeNode; +import org.apache.doris.planner.PlanFragment; +import org.apache.doris.planner.ScanNode; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ListMultimap; + +import java.util.List; +import java.util.Map; +import java.util.Objects; + +/** + * UnassignedRecursiveCteScanJob + */ +public class UnassignedRecursiveCteScanJob extends AbstractUnassignedScanJob { + private final ScanWorkerSelector scanWorkerSelector; + + public UnassignedRecursiveCteScanJob( + StatementContext statementContext, PlanFragment fragment, ScanNode scanNode, + ListMultimap exchangeToChildJob, ScanWorkerSelector scanWorkerSelector) { + super(statementContext, fragment, ImmutableList.of(scanNode), exchangeToChildJob); + this.scanWorkerSelector = Objects.requireNonNull(scanWorkerSelector, "scanWorkerSelector is not null"); + } + + @Override + protected Map multipleMachinesParallelization( + DistributeContext distributeContext, ListMultimap inputJobs) { + return scanWorkerSelector.selectReplicaAndWorkerWithoutBucket( + scanNodes.get(0), statementContext.getConnectContext() + ); + } + + @Override + protected List fillUpAssignedJobs(List assignedJobs, + DistributedPlanWorkerManager workerManager, ListMultimap inputJobs) { + Preconditions.checkArgument(!assignedJobs.isEmpty(), + "assignedJobs is empty for UnassignedRecursiveCteScanJob"); + return assignedJobs; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalCTE.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalCTE.java index 4f810c3b6def8b..5ceb0e55bca5e2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalCTE.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalCTE.java @@ -41,21 +41,28 @@ public class LogicalCTE extends LogicalUnary implements PropagateFuncDeps { private final List> aliasQueries; + private final boolean isRecursiveCte; - public LogicalCTE(List> aliasQueries, CHILD_TYPE child) { - this(aliasQueries, Optional.empty(), Optional.empty(), child); + public LogicalCTE(boolean isRecursiveCte, List> aliasQueries, CHILD_TYPE child) { + this(isRecursiveCte, aliasQueries, Optional.empty(), Optional.empty(), child); } - public LogicalCTE(List> aliasQueries, Optional groupExpression, - Optional logicalProperties, CHILD_TYPE child) { + public LogicalCTE(boolean isRecursiveCte, List> aliasQueries, + Optional groupExpression, Optional logicalProperties, + CHILD_TYPE child) { super(PlanType.LOGICAL_CTE, groupExpression, logicalProperties, child); this.aliasQueries = ImmutableList.copyOf(Objects.requireNonNull(aliasQueries, "aliasQueries can not be null")); + this.isRecursiveCte = isRecursiveCte; } public List> getAliasQueries() { return aliasQueries; } + public boolean isRecursiveCte() { + return isRecursiveCte; + } + @Override public List extraPlans() { return aliasQueries; @@ -74,6 +81,7 @@ public List computeOutput() { @Override public String toString() { return Utils.toSqlString("LogicalCTE", + "isRecursiveCte", isRecursiveCte, "aliasQueries", aliasQueries ); } @@ -105,18 +113,18 @@ public boolean equals(Object o) { return false; } LogicalCTE that = (LogicalCTE) o; - return aliasQueries.equals(that.aliasQueries); + return aliasQueries.equals(that.aliasQueries) && isRecursiveCte == that.isRecursiveCte; } @Override public int hashCode() { - return Objects.hash(aliasQueries); + return Objects.hash(aliasQueries, isRecursiveCte); } @Override public Plan withChildren(List children) { Preconditions.checkArgument(aliasQueries.size() > 0); - return new LogicalCTE<>(aliasQueries, children.get(0)); + return new LogicalCTE<>(isRecursiveCte, aliasQueries, children.get(0)); } @Override @@ -131,13 +139,14 @@ public List getExpressions() { @Override public LogicalCTE withGroupExpression(Optional groupExpression) { - return new LogicalCTE<>(aliasQueries, groupExpression, Optional.of(getLogicalProperties()), child()); + return new LogicalCTE<>(isRecursiveCte, aliasQueries, groupExpression, + Optional.of(getLogicalProperties()), child()); } @Override public Plan withGroupExprLogicalPropChildren(Optional groupExpression, Optional logicalProperties, List children) { Preconditions.checkArgument(aliasQueries.size() > 0); - return new LogicalCTE<>(aliasQueries, groupExpression, logicalProperties, children.get(0)); + return new LogicalCTE<>(isRecursiveCte, aliasQueries, groupExpression, logicalProperties, children.get(0)); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalRecursiveCte.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalRecursiveCte.java new file mode 100644 index 00000000000000..bef00db5f72092 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalRecursiveCte.java @@ -0,0 +1,284 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.plans.logical; + +import org.apache.doris.nereids.exceptions.AnalysisException; +import org.apache.doris.nereids.memo.GroupExpression; +import org.apache.doris.nereids.properties.DataTrait; +import org.apache.doris.nereids.properties.LogicalProperties; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.Cast; +import org.apache.doris.nereids.trees.expressions.ExprId; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.NamedExpression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.expressions.StatementScopeIdGenerator; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.PlanType; +import org.apache.doris.nereids.trees.plans.algebra.RecursiveCte; +import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; +import org.apache.doris.nereids.types.DataType; +import org.apache.doris.nereids.util.TypeCoercionUtils; +import org.apache.doris.nereids.util.Utils; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; + +import java.util.List; +import java.util.Objects; +import java.util.Optional; + +/** + * LogicalRecursiveCte is basically like LogicalUnion + */ +public class LogicalRecursiveCte extends AbstractLogicalPlan implements RecursiveCte, OutputPrunable { + private final String cteName; + private final List outputs; + private final List> regularChildrenOutputs; + private final boolean isUnionAll; + + /** LogicalRecursiveCte */ + public LogicalRecursiveCte(String cteName, boolean isUnionAll, List children) { + this(cteName, isUnionAll, ImmutableList.of(), ImmutableList.of(), children); + } + + /** LogicalRecursiveCte */ + public LogicalRecursiveCte(String cteName, boolean isUnionAll, List outputs, + List> childrenOutputs, List children) { + this(cteName, isUnionAll, outputs, childrenOutputs, Optional.empty(), + Optional.empty(), + children); + } + + /** LogicalRecursiveCte */ + public LogicalRecursiveCte(String cteName, boolean isUnionAll, List outputs, + List> childrenOutputs, + Optional groupExpression, Optional logicalProperties, + List children) { + super(PlanType.LOGICAL_RECURSIVE_CTE, groupExpression, logicalProperties, children); + this.cteName = cteName; + this.isUnionAll = isUnionAll; + this.outputs = ImmutableList.copyOf(outputs); + this.regularChildrenOutputs = ImmutableList.copyOf(childrenOutputs); + } + + @Override + public boolean isUnionAll() { + return isUnionAll; + } + + public String getCteName() { + return cteName; + } + + @Override + public List getRegularChildOutput(int i) { + return regularChildrenOutputs.get(i); + } + + @Override + public List> getRegularChildrenOutputs() { + return regularChildrenOutputs; + } + + public List> collectChildrenProjections() { + return castCommonDataTypeOutputs(); + } + + private List> castCommonDataTypeOutputs() { + int childOutputSize = child(0).getOutput().size(); + ImmutableList.Builder newLeftOutputs = ImmutableList.builderWithExpectedSize( + childOutputSize); + ImmutableList.Builder newRightOutputs = ImmutableList.builderWithExpectedSize( + childOutputSize + ); + // Ensure that the output types of the left and right children are consistent and expand upward. + for (int i = 0; i < childOutputSize; ++i) { + Slot left = child(0).getOutput().get(i); + Slot right = child(1).getOutput().get(i); + DataType compatibleType; + try { + compatibleType = LogicalSetOperation.getAssignmentCompatibleType(left.getDataType(), + right.getDataType()); + } catch (Exception e) { + throw new AnalysisException( + "Can not find compatible type for " + left + " and " + right + ", " + e.getMessage()); + } + Expression newLeft = TypeCoercionUtils.castIfNotSameTypeStrict(left, compatibleType); + Expression newRight = TypeCoercionUtils.castIfNotSameTypeStrict(right, compatibleType); + if (newLeft instanceof Cast) { + newLeft = new Alias(newLeft, left.getName()); + } + if (newRight instanceof Cast) { + newRight = new Alias(newRight, right.getName()); + } + newLeftOutputs.add((NamedExpression) newLeft); + newRightOutputs.add((NamedExpression) newRight); + } + + return ImmutableList.of(newLeftOutputs.build(), newRightOutputs.build()); + } + + /** + * Generate new output for Recursive Cte. + */ + public List buildNewOutputs() { + List slots = resetNullableForLeftOutputs(); + ImmutableList.Builder newOutputs = ImmutableList.builderWithExpectedSize(slots.size()); + + for (int i = 0; i < slots.size(); i++) { + Slot slot = slots.get(i); + ExprId exprId = i < outputs.size() ? outputs.get(i).getExprId() : StatementScopeIdGenerator.newExprId(); + newOutputs.add( + new SlotReference(exprId, slot.toSql(), slot.getDataType(), slot.nullable(), ImmutableList.of()) + ); + } + return newOutputs.build(); + } + + // If the right child is nullable, need to ensure that the left child is also nullable + private List resetNullableForLeftOutputs() { + int rightChildOutputSize = child(1).getOutput().size(); + ImmutableList.Builder resetNullableForLeftOutputs + = ImmutableList.builderWithExpectedSize(rightChildOutputSize); + for (int i = 0; i < rightChildOutputSize; ++i) { + if (child(1).getOutput().get(i).nullable() && !child(0).getOutput().get(i).nullable()) { + resetNullableForLeftOutputs.add(child(0).getOutput().get(i).withNullable(true)); + } else { + resetNullableForLeftOutputs.add(child(0).getOutput().get(i)); + } + } + return resetNullableForLeftOutputs.build(); + } + + @Override + public String toString() { + return Utils.toSqlStringSkipNull("LogicalRecursiveCte", + "cteName", cteName, + "isUnionAll", isUnionAll, + "outputs", outputs, + "regularChildrenOutputs", regularChildrenOutputs, + "stats", statistics); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + LogicalRecursiveCte that = (LogicalRecursiveCte) o; + return cteName.equals(that.cteName) && isUnionAll == that.isUnionAll && Objects.equals(outputs, that.outputs) + && Objects.equals(regularChildrenOutputs, that.regularChildrenOutputs); + } + + @Override + public int hashCode() { + return Objects.hash(cteName, isUnionAll, outputs, regularChildrenOutputs); + } + + @Override + public R accept(PlanVisitor visitor, C context) { + return visitor.visitLogicalRecursiveCte(this, context); + } + + @Override + public List getExpressions() { + return regularChildrenOutputs.stream().flatMap(List::stream).collect(ImmutableList.toImmutableList()); + } + + @Override + public List computeOutput() { + return outputs.stream() + .map(NamedExpression::toSlot) + .collect(ImmutableList.toImmutableList()); + } + + @Override + public LogicalRecursiveCte withChildren(List children) { + return new LogicalRecursiveCte(cteName, isUnionAll, outputs, regularChildrenOutputs, children); + } + + public LogicalRecursiveCte withChildrenAndTheirOutputs(List children, + List> childrenOutputs) { + Preconditions.checkArgument(children.size() == childrenOutputs.size(), + "children size %s is not equals with children outputs size %s", + children.size(), childrenOutputs.size()); + return new LogicalRecursiveCte(cteName, isUnionAll, outputs, childrenOutputs, children); + } + + @Override + public LogicalRecursiveCte withGroupExpression(Optional groupExpression) { + return new LogicalRecursiveCte(cteName, isUnionAll, outputs, regularChildrenOutputs, + groupExpression, Optional.of(getLogicalProperties()), children); + } + + @Override + public Plan withGroupExprLogicalPropChildren(Optional groupExpression, + Optional logicalProperties, List children) { + return new LogicalRecursiveCte(cteName, isUnionAll, outputs, regularChildrenOutputs, + groupExpression, logicalProperties, children); + } + + public LogicalRecursiveCte withNewOutputs(List newOutputs) { + return new LogicalRecursiveCte(cteName, isUnionAll, newOutputs, regularChildrenOutputs, + Optional.empty(), Optional.empty(), children); + } + + public LogicalRecursiveCte withNewOutputsAndChildren(List newOutputs, + List children, + List> childrenOutputs) { + return new LogicalRecursiveCte(cteName, isUnionAll, newOutputs, childrenOutputs, + Optional.empty(), Optional.empty(), children); + } + + @Override + public List getOutputs() { + return outputs; + } + + @Override + public LogicalRecursiveCte pruneOutputs(List prunedOutputs) { + return withNewOutputs(prunedOutputs); + } + + @Override + public void computeUnique(DataTrait.Builder builder) { + } + + @Override + public void computeUniform(DataTrait.Builder builder) { + } + + @Override + public boolean hasUnboundExpression() { + return outputs.isEmpty(); + } + + @Override + public void computeEqualSet(DataTrait.Builder builder) { + } + + @Override + public void computeFd(DataTrait.Builder builder) { + // don't generate + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalRecursiveCteRecursiveChild.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalRecursiveCteRecursiveChild.java new file mode 100644 index 00000000000000..d2766a86f95acf --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalRecursiveCteRecursiveChild.java @@ -0,0 +1,117 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.plans.logical; + +import org.apache.doris.nereids.memo.GroupExpression; +import org.apache.doris.nereids.properties.DataTrait; +import org.apache.doris.nereids.properties.LogicalProperties; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.PlanType; +import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; +import org.apache.doris.nereids.util.Utils; + +import com.google.common.collect.ImmutableList; + +import java.util.List; +import java.util.Optional; + +/** + * LogicalRecursiveCteRecursiveChild is sentinel plan for must_shuffle + */ +public class LogicalRecursiveCteRecursiveChild extends LogicalUnary { + private final String cteName; + + public LogicalRecursiveCteRecursiveChild(String cteName, CHILD_TYPE child) { + this(cteName, Optional.empty(), Optional.empty(), child); + } + + public LogicalRecursiveCteRecursiveChild(String cteName, Optional groupExpression, + Optional logicalProperties, CHILD_TYPE child) { + this(cteName, groupExpression, logicalProperties, ImmutableList.of(child)); + } + + public LogicalRecursiveCteRecursiveChild(String cteName, Optional groupExpression, + Optional logicalProperties, List child) { + super(PlanType.LOGICAL_RECURSIVE_CTE_RECURSIVE_CHILD, groupExpression, logicalProperties, child); + this.cteName = cteName; + } + + public String getCteName() { + return cteName; + } + + @Override + public Plan withChildren(List children) { + return new LogicalRecursiveCteRecursiveChild<>(cteName, Optional.empty(), Optional.empty(), children); + } + + @Override + public R accept(PlanVisitor visitor, C context) { + return visitor.visitLogicalRecursiveCteRecursiveChild(this, context); + } + + @Override + public List getExpressions() { + return ImmutableList.of(); + } + + @Override + public Plan withGroupExpression(Optional groupExpression) { + return new LogicalRecursiveCteRecursiveChild<>(cteName, groupExpression, + Optional.of(getLogicalProperties()), children); + } + + @Override + public Plan withGroupExprLogicalPropChildren(Optional groupExpression, + Optional logicalProperties, List children) { + return new LogicalRecursiveCteRecursiveChild<>(cteName, groupExpression, logicalProperties, children); + } + + @Override + public String toString() { + return Utils.toSqlStringSkipNull("LogicalRecursiveCteRecursiveChild", + "cteName", cteName); + } + + @Override + public void computeUnique(DataTrait.Builder builder) { + + } + + @Override + public void computeUniform(DataTrait.Builder builder) { + + } + + @Override + public void computeEqualSet(DataTrait.Builder builder) { + + } + + @Override + public void computeFd(DataTrait.Builder builder) { + + } + + @Override + public List computeOutput() { + return child().getOutput(); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalRecursiveCteScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalRecursiveCteScan.java new file mode 100644 index 00000000000000..5d1ed487847a80 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalRecursiveCteScan.java @@ -0,0 +1,73 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.plans.logical; + +import org.apache.doris.catalog.TableIf; +import org.apache.doris.nereids.memo.GroupExpression; +import org.apache.doris.nereids.properties.LogicalProperties; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.PlanType; +import org.apache.doris.nereids.trees.plans.RelationId; +import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; +import org.apache.doris.nereids.util.Utils; + +import java.util.List; +import java.util.Optional; + +/** + * LogicalRecursiveCteScan. + */ +public class LogicalRecursiveCteScan extends LogicalCatalogRelation { + public LogicalRecursiveCteScan(RelationId relationId, TableIf table, List qualifier) { + this(relationId, table, qualifier, Optional.empty(), Optional.empty()); + } + + private LogicalRecursiveCteScan(RelationId relationId, TableIf table, List qualifier, + Optional groupExpression, Optional logicalProperties) { + super(relationId, PlanType.LOGICAL_RECURSIVE_CTE_SCAN, table, qualifier, groupExpression, logicalProperties); + } + + @Override + public String toString() { + return Utils.toSqlString("LogicalRecursiveCteScan", + "cteName", table.getName()); + } + + @Override + public Plan withGroupExpression(Optional groupExpression) { + return new LogicalRecursiveCteScan(relationId, table, qualifier, + groupExpression, Optional.ofNullable(getLogicalProperties())); + } + + @Override + public Plan withGroupExprLogicalPropChildren(Optional groupExpression, + Optional logicalProperties, List children) { + return new LogicalRecursiveCteScan(relationId, table, qualifier, groupExpression, logicalProperties); + } + + @Override + public LogicalCatalogRelation withRelationId(RelationId relationId) { + return new LogicalRecursiveCteScan(relationId, table, qualifier, + groupExpression, Optional.ofNullable(getLogicalProperties())); + } + + @Override + public R accept(PlanVisitor visitor, C context) { + return visitor.visitLogicalRecursiveCteScan(this, context); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSubQueryAlias.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSubQueryAlias.java index 2f49dc1ce85102..8b18d3ce49d95b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSubQueryAlias.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSubQueryAlias.java @@ -17,6 +17,7 @@ package org.apache.doris.nereids.trees.plans.logical; +import org.apache.doris.nereids.analyzer.UnboundRelation; import org.apache.doris.nereids.memo.GroupExpression; import org.apache.doris.nereids.properties.DataTrait; import org.apache.doris.nereids.properties.LogicalProperties; @@ -27,6 +28,7 @@ import org.apache.doris.nereids.trees.plans.PlanType; import org.apache.doris.nereids.trees.plans.RelationId; import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; +import org.apache.doris.nereids.util.LazyCompute; import org.apache.doris.nereids.util.Utils; import com.google.common.base.Preconditions; @@ -42,6 +44,7 @@ import java.util.Objects; import java.util.Optional; import java.util.Set; +import java.util.function.Supplier; import java.util.stream.Collectors; /** @@ -55,6 +58,7 @@ public class LogicalSubQueryAlias extends LogicalUnary< protected RelationId relationId; private final List qualifier; private final Optional> columnAliases; + private final Supplier isRecursiveCte; public LogicalSubQueryAlias(String tableAlias, CHILD_TYPE child) { this(ImmutableList.of(tableAlias), Optional.empty(), Optional.empty(), Optional.empty(), child); @@ -78,6 +82,7 @@ public LogicalSubQueryAlias(List qualifier, Optional> colum super(PlanType.LOGICAL_SUBQUERY_ALIAS, groupExpression, logicalProperties, child); this.qualifier = ImmutableList.copyOf(Objects.requireNonNull(qualifier, "qualifier is null")); this.columnAliases = columnAliases; + this.isRecursiveCte = computeIsRecursiveCte(); } @Override @@ -121,6 +126,23 @@ private List computeOutputInternal(boolean asteriskOutput) { return currentOutput.build(); } + private Supplier computeIsRecursiveCte() { + return LazyCompute.of(() -> { + List relationList = collectToList(UnboundRelation.class::isInstance); + for (UnboundRelation relation : relationList) { + List nameParts = relation.getNameParts(); + if (nameParts.size() == 1 && nameParts.get(0).equalsIgnoreCase(getAlias())) { + return true; + } + } + return false; + }); + } + + public boolean isRecursiveCte() { + return isRecursiveCte.get(); + } + public String getAlias() { return qualifier.get(qualifier.size() - 1); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalRecursiveCte.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalRecursiveCte.java new file mode 100644 index 00000000000000..44aab38fc304b1 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalRecursiveCte.java @@ -0,0 +1,292 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.plans.physical; + +import org.apache.doris.nereids.memo.GroupExpression; +import org.apache.doris.nereids.properties.DataTrait; +import org.apache.doris.nereids.properties.LogicalProperties; +import org.apache.doris.nereids.properties.PhysicalProperties; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.NamedExpression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.PlanType; +import org.apache.doris.nereids.trees.plans.algebra.RecursiveCte; +import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; +import org.apache.doris.nereids.util.Utils; +import org.apache.doris.qe.ConnectContext; +import org.apache.doris.statistics.Statistics; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; + +import java.util.ArrayList; +import java.util.BitSet; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; + +/** + * PhysicalRecursiveCte is basically like PhysicalUnion + */ +public class PhysicalRecursiveCte extends AbstractPhysicalPlan implements RecursiveCte { + private final String cteName; + private final List outputs; + private final List> regularChildrenOutputs; + private final boolean isUnionAll; + + /** PhysicalRecursiveCte */ + public PhysicalRecursiveCte(String cteName, boolean isUnionAll, + List outputs, + List> childrenOutputs, + LogicalProperties logicalProperties, + List children) { + this(cteName, isUnionAll, outputs, childrenOutputs, Optional.empty(), logicalProperties, children); + } + + /** PhysicalRecursiveCte */ + public PhysicalRecursiveCte(String cteName, boolean isUnionAll, + List outputs, + List> childrenOutputs, + Optional groupExpression, + LogicalProperties logicalProperties, + List children) { + this(cteName, isUnionAll, outputs, childrenOutputs, groupExpression, logicalProperties, + PhysicalProperties.ANY, null, children); + } + + /** PhysicalRecursiveCte */ + public PhysicalRecursiveCte(String cteName, boolean isUnionAll, List outputs, + List> childrenOutputs, + Optional groupExpression, LogicalProperties logicalProperties, + PhysicalProperties physicalProperties, Statistics statistics, List children) { + super(PlanType.PHYSICAL_RECURSIVE_CTE, groupExpression, logicalProperties, physicalProperties, + statistics, children.toArray(new Plan[0])); + this.cteName = cteName; + this.isUnionAll = isUnionAll; + this.outputs = ImmutableList.copyOf(outputs); + this.regularChildrenOutputs = ImmutableList.copyOf(childrenOutputs); + } + + @Override + public boolean isUnionAll() { + return isUnionAll; + } + + public String getCteName() { + return cteName; + } + + @Override + public List getRegularChildOutput(int i) { + return regularChildrenOutputs.get(i); + } + + @Override + public List getOutputs() { + return outputs; + } + + @Override + public List computeOutput() { + return outputs.stream() + .map(NamedExpression::toSlot) + .collect(ImmutableList.toImmutableList()); + } + + @Override + public List> getRegularChildrenOutputs() { + return regularChildrenOutputs; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + PhysicalRecursiveCte that = (PhysicalRecursiveCte) o; + return cteName.equals(that.cteName) && isUnionAll == that.isUnionAll && Objects.equals(outputs, that.outputs) + && Objects.equals(regularChildrenOutputs, that.regularChildrenOutputs); + } + + @Override + public int hashCode() { + return Objects.hash(cteName, isUnionAll, outputs, regularChildrenOutputs); + } + + @Override + public R accept(PlanVisitor visitor, C context) { + return visitor.visitPhysicalRecursiveCte(this, context); + } + + @Override + public List getExpressions() { + return regularChildrenOutputs.stream().flatMap(List::stream).collect(ImmutableList.toImmutableList()); + } + + @Override + public String toString() { + return Utils.toSqlString("PhysicalRecursiveCte" + "[" + id.asInt() + "]" + getGroupIdWithPrefix(), + "stats", statistics, + "cteName", cteName, + "isUnionAll", isUnionAll, + "outputs", outputs, + "regularChildrenOutputs", regularChildrenOutputs); + } + + @Override + public String shapeInfo() { + ConnectContext context = ConnectContext.get(); + if (context != null + && context.getSessionVariable().getDetailShapePlanNodesSet().contains(getClass().getSimpleName())) { + StringBuilder builder = new StringBuilder(); + builder.append(getClass().getSimpleName()); + builder.append(")"); + return builder.toString(); + } else { + return super.shapeInfo(); + } + } + + @Override + public PhysicalRecursiveCte withChildren(List children) { + return new PhysicalRecursiveCte(cteName, isUnionAll, outputs, regularChildrenOutputs, groupExpression, + getLogicalProperties(), children); + } + + @Override + public PhysicalRecursiveCte withGroupExpression(Optional groupExpression) { + return new PhysicalRecursiveCte(cteName, isUnionAll, outputs, regularChildrenOutputs, + groupExpression, getLogicalProperties(), children); + } + + @Override + public Plan withGroupExprLogicalPropChildren(Optional groupExpression, + Optional logicalProperties, List children) { + return new PhysicalRecursiveCte(cteName, isUnionAll, outputs, regularChildrenOutputs, + groupExpression, logicalProperties.get(), children); + } + + @Override + public PhysicalRecursiveCte withPhysicalPropertiesAndStats( + PhysicalProperties physicalProperties, Statistics statistics) { + return new PhysicalRecursiveCte(cteName, isUnionAll, outputs, regularChildrenOutputs, + groupExpression, getLogicalProperties(), physicalProperties, statistics, children); + } + + @Override + public PhysicalRecursiveCte resetLogicalProperties() { + return new PhysicalRecursiveCte(cteName, isUnionAll, outputs, regularChildrenOutputs, + Optional.empty(), null, physicalProperties, statistics, children); + } + + @Override + public void computeUnique(DataTrait.Builder builder) { + if (!isUnionAll) { + builder.addUniqueSlot(ImmutableSet.copyOf(getOutput())); + } + } + + @Override + public void computeUniform(DataTrait.Builder builder) { + // don't propagate uniform slots + } + + private List mapSlotToIndex(Plan plan, List> equalSlotsList) { + Map slotToIndex = new HashMap<>(); + for (int i = 0; i < plan.getOutput().size(); i++) { + slotToIndex.put(plan.getOutput().get(i), i); + } + List equalSlotIndicesList = new ArrayList<>(); + for (Set equalSlots : equalSlotsList) { + BitSet equalSlotIndices = new BitSet(); + for (Slot slot : equalSlots) { + if (slotToIndex.containsKey(slot)) { + equalSlotIndices.set(slotToIndex.get(slot)); + } + } + if (equalSlotIndices.cardinality() > 1) { + equalSlotIndicesList.add(equalSlotIndices); + } + } + return equalSlotIndicesList; + } + + @Override + public void computeEqualSet(DataTrait.Builder builder) { + if (children.isEmpty()) { + return; + } + + // Get the list of equal slot sets and their corresponding index mappings for the first child + List> childEqualSlotsList = child(0).getLogicalProperties() + .getTrait().calAllEqualSet(); + List childEqualSlotsIndicesList = mapSlotToIndex(child(0), childEqualSlotsList); + List unionEqualSlotIndicesList = new ArrayList<>(childEqualSlotsIndicesList); + + // Traverse all children and find the equal sets that exist in all children + for (int i = 1; i < children.size(); i++) { + Plan child = children.get(i); + + // Get the equal slot sets for the current child + childEqualSlotsList = child.getLogicalProperties().getTrait().calAllEqualSet(); + + // Map slots to indices for the current child + childEqualSlotsIndicesList = mapSlotToIndex(child, childEqualSlotsList); + + // Only keep the equal pairs that exist in all children of the union + // This is done by calculating the intersection of all children's equal slot indices + for (BitSet unionEqualSlotIndices : unionEqualSlotIndicesList) { + BitSet intersect = new BitSet(); + for (BitSet childEqualSlotIndices : childEqualSlotsIndicesList) { + if (unionEqualSlotIndices.intersects(childEqualSlotIndices)) { + intersect = childEqualSlotIndices; + break; + } + } + unionEqualSlotIndices.and(intersect); + } + } + + // Build the functional dependencies for the output slots + List outputList = getOutput(); + for (BitSet equalSlotIndices : unionEqualSlotIndicesList) { + if (equalSlotIndices.cardinality() <= 1) { + continue; + } + int first = equalSlotIndices.nextSetBit(0); + int next = equalSlotIndices.nextSetBit(first + 1); + while (next > 0) { + builder.addEqualPair(outputList.get(first), outputList.get(next)); + next = equalSlotIndices.nextSetBit(next + 1); + } + } + } + + @Override + public void computeFd(DataTrait.Builder builder) { + // don't generate + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalRecursiveCteRecursiveChild.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalRecursiveCteRecursiveChild.java new file mode 100644 index 00000000000000..9aef71e7ee9404 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalRecursiveCteRecursiveChild.java @@ -0,0 +1,121 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.plans.physical; + +import org.apache.doris.nereids.memo.GroupExpression; +import org.apache.doris.nereids.properties.DataTrait; +import org.apache.doris.nereids.properties.LogicalProperties; +import org.apache.doris.nereids.properties.PhysicalProperties; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.PlanType; +import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; +import org.apache.doris.nereids.util.Utils; +import org.apache.doris.statistics.Statistics; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import org.jetbrains.annotations.Nullable; + +import java.util.List; +import java.util.Optional; + +/** + * PhysicalRecursiveCteRecursiveChild is sentinel plan for must_shuffle + */ +public class PhysicalRecursiveCteRecursiveChild extends PhysicalUnary { + private final String cteName; + + public PhysicalRecursiveCteRecursiveChild(String cteName, LogicalProperties logicalProperties, CHILD_TYPE child) { + this(cteName, Optional.empty(), logicalProperties, child); + } + + public PhysicalRecursiveCteRecursiveChild(String cteName, Optional groupExpression, + LogicalProperties logicalProperties, CHILD_TYPE child) { + this(cteName, groupExpression, logicalProperties, PhysicalProperties.ANY, null, child); + } + + public PhysicalRecursiveCteRecursiveChild(String cteName, Optional groupExpression, + LogicalProperties logicalProperties, @Nullable PhysicalProperties physicalProperties, Statistics statistics, + CHILD_TYPE child) { + super(PlanType.PHYSICAL_RECURSIVE_CTE_RECURSIVE_CHILD, groupExpression, logicalProperties, physicalProperties, + statistics, child); + this.cteName = cteName; + } + + @Override + public String toString() { + return Utils.toSqlStringSkipNull("PhysicalRecursiveCteRecursiveChild", + "cteName", cteName); + } + + @Override + public Plan withChildren(List children) { + Preconditions.checkArgument(children.size() == 1); + return new PhysicalRecursiveCteRecursiveChild<>(cteName, groupExpression, getLogicalProperties(), + children.get(0)); + } + + @Override + public R accept(PlanVisitor visitor, C context) { + return visitor.visitPhysicalRecursiveCteRecursiveChild(this, context); + } + + @Override + public List getExpressions() { + return ImmutableList.of(); + } + + @Override + public Plan withGroupExpression(Optional groupExpression) { + return new PhysicalRecursiveCteRecursiveChild<>(cteName, groupExpression, getLogicalProperties(), child()); + } + + @Override + public Plan withGroupExprLogicalPropChildren(Optional groupExpression, + Optional logicalProperties, List children) { + Preconditions.checkArgument(children.size() == 1); + return new PhysicalRecursiveCteRecursiveChild<>(cteName, groupExpression, logicalProperties.get(), child()); + } + + @Override + public void computeUnique(DataTrait.Builder builder) { + + } + + @Override + public void computeUniform(DataTrait.Builder builder) { + + } + + @Override + public void computeEqualSet(DataTrait.Builder builder) { + + } + + @Override + public void computeFd(DataTrait.Builder builder) { + + } + + @Override + public PhysicalPlan withPhysicalPropertiesAndStats(PhysicalProperties physicalProperties, Statistics statistics) { + return new PhysicalRecursiveCteRecursiveChild<>(cteName, groupExpression, getLogicalProperties(), + physicalProperties, statistics, child()); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalRecursiveCteScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalRecursiveCteScan.java new file mode 100644 index 00000000000000..3450ae0de189b8 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalRecursiveCteScan.java @@ -0,0 +1,85 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.plans.physical; + +import org.apache.doris.catalog.TableIf; +import org.apache.doris.nereids.memo.GroupExpression; +import org.apache.doris.nereids.properties.LogicalProperties; +import org.apache.doris.nereids.properties.PhysicalProperties; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.PlanType; +import org.apache.doris.nereids.trees.plans.RelationId; +import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; +import org.apache.doris.nereids.util.Utils; +import org.apache.doris.statistics.Statistics; + +import java.util.Collection; +import java.util.List; +import java.util.Optional; + +/** + * PhysicalRecursiveCteScan. + */ +public class PhysicalRecursiveCteScan extends PhysicalCatalogRelation { + public PhysicalRecursiveCteScan(RelationId relationId, TableIf table, List qualifier, + Optional groupExpression, LogicalProperties logicalProperties, + Collection operativeSlots) { + this(relationId, table, qualifier, groupExpression, logicalProperties, PhysicalProperties.ANY, null, + operativeSlots); + } + + public PhysicalRecursiveCteScan(RelationId relationId, TableIf table, List qualifier, + Optional groupExpression, LogicalProperties logicalProperties, + PhysicalProperties physicalProperties, Statistics statistics, Collection operativeSlots) { + super(relationId, PlanType.PHYSICAL_RECURSIVE_CTE_SCAN, table, qualifier, groupExpression, logicalProperties, + physicalProperties, statistics, operativeSlots); + } + + @Override + public R accept(PlanVisitor visitor, C context) { + return visitor.visitPhysicalRecursiveCteScan(this, context); + } + + @Override + public Plan withGroupExpression(Optional groupExpression) { + return new PhysicalRecursiveCteScan(relationId, table, qualifier, groupExpression, getLogicalProperties(), + physicalProperties, statistics, operativeSlots); + } + + @Override + public Plan withGroupExprLogicalPropChildren(Optional groupExpression, + Optional logicalProperties, List children) { + return new PhysicalRecursiveCteScan(relationId, table, qualifier, groupExpression, getLogicalProperties(), + physicalProperties, statistics, operativeSlots); + } + + @Override + public PhysicalPlan withPhysicalPropertiesAndStats(PhysicalProperties physicalProperties, Statistics statistics) { + return new PhysicalRecursiveCteScan(relationId, table, qualifier, groupExpression, getLogicalProperties(), + physicalProperties, statistics, operativeSlots); + } + + @Override + public String toString() { + return Utils.toSqlString("PhysicalRecursiveCteScan[" + table.getName() + "]" + getGroupIdWithPrefix(), + "stats", statistics, + "qualified", Utils.qualifiedName(qualifier, table.getName()), + "operativeCols", getOperativeSlots()); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/PlanVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/PlanVisitor.java index 2a1c8c4dc59dd6..f2c9ec69bde22c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/PlanVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/PlanVisitor.java @@ -45,6 +45,8 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalPreFilter; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import org.apache.doris.nereids.trees.plans.logical.LogicalQualify; +import org.apache.doris.nereids.trees.plans.logical.LogicalRecursiveCte; +import org.apache.doris.nereids.trees.plans.logical.LogicalRecursiveCteRecursiveChild; import org.apache.doris.nereids.trees.plans.logical.LogicalRelation; import org.apache.doris.nereids.trees.plans.logical.LogicalRepeat; import org.apache.doris.nereids.trees.plans.logical.LogicalSelectHint; @@ -79,6 +81,8 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalPartitionTopN; import org.apache.doris.nereids.trees.plans.physical.PhysicalProject; import org.apache.doris.nereids.trees.plans.physical.PhysicalQuickSort; +import org.apache.doris.nereids.trees.plans.physical.PhysicalRecursiveCte; +import org.apache.doris.nereids.trees.plans.physical.PhysicalRecursiveCteRecursiveChild; import org.apache.doris.nereids.trees.plans.physical.PhysicalRelation; import org.apache.doris.nereids.trees.plans.physical.PhysicalRepeat; import org.apache.doris.nereids.trees.plans.physical.PhysicalSetOperation; @@ -212,6 +216,15 @@ public R visitLogicalJoin(LogicalJoin join, C co return visit(join, context); } + public R visitLogicalRecursiveCte(LogicalRecursiveCte recursiveCte, C context) { + return visit(recursiveCte, context); + } + + public R visitLogicalRecursiveCteRecursiveChild(LogicalRecursiveCteRecursiveChild recursiveChild, + C context) { + return visit(recursiveChild, context); + } + public R visitLogicalLimit(LogicalLimit limit, C context) { return visit(limit, context); } @@ -381,6 +394,15 @@ public R visitPhysicalUnion(PhysicalUnion union, C context) { return visitPhysicalSetOperation(union, context); } + public R visitPhysicalRecursiveCte(PhysicalRecursiveCte recursiveCte, C context) { + return visit(recursiveCte, context); + } + + public R visitPhysicalRecursiveCteRecursiveChild(PhysicalRecursiveCteRecursiveChild recursiveChild, + C context) { + return visit(recursiveChild, context); + } + public R visitAbstractPhysicalSort(AbstractPhysicalSort sort, C context) { return visit(sort, context); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/RelationVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/RelationVisitor.java index fef94ff52f97c3..b325849b1b1022 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/RelationVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/RelationVisitor.java @@ -30,6 +30,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalOdbcScan; import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; import org.apache.doris.nereids.trees.plans.logical.LogicalOneRowRelation; +import org.apache.doris.nereids.trees.plans.logical.LogicalRecursiveCteScan; import org.apache.doris.nereids.trees.plans.logical.LogicalRelation; import org.apache.doris.nereids.trees.plans.logical.LogicalSchemaScan; import org.apache.doris.nereids.trees.plans.logical.LogicalTVFRelation; @@ -45,6 +46,7 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalOdbcScan; import org.apache.doris.nereids.trees.plans.physical.PhysicalOlapScan; import org.apache.doris.nereids.trees.plans.physical.PhysicalOneRowRelation; +import org.apache.doris.nereids.trees.plans.physical.PhysicalRecursiveCteScan; import org.apache.doris.nereids.trees.plans.physical.PhysicalRelation; import org.apache.doris.nereids.trees.plans.physical.PhysicalSchemaScan; import org.apache.doris.nereids.trees.plans.physical.PhysicalTVFRelation; @@ -139,6 +141,10 @@ default R visitLogicalTestScan(LogicalTestScan testScan, C context) { return visitLogicalCatalogRelation(testScan, context); } + default R visitLogicalRecursiveCteScan(LogicalRecursiveCteScan recursiveCteScan, C context) { + return visitLogicalCatalogRelation(recursiveCteScan, context); + } + // ******************************* // physical relations // ******************************* @@ -176,6 +182,10 @@ default R visitPhysicalDeferMaterializeOlapScan( return visitPhysicalCatalogRelation(deferMaterializeOlapScan, context); } + default R visitPhysicalRecursiveCteScan(PhysicalRecursiveCteScan recursiveCteScan, C context) { + return visitPhysicalCatalogRelation(recursiveCteScan, context); + } + default R visitPhysicalOneRowRelation(PhysicalOneRowRelation oneRowRelation, C context) { return visitPhysicalRelation(oneRowRelation, context); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/RecursiveCteNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/RecursiveCteNode.java new file mode 100644 index 00000000000000..c1531813d3cea9 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/RecursiveCteNode.java @@ -0,0 +1,87 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.planner; + +import org.apache.doris.analysis.Expr; +import org.apache.doris.analysis.TupleId; +import org.apache.doris.statistics.StatisticalType; +import org.apache.doris.thrift.TExplainLevel; +import org.apache.doris.thrift.TPlanNode; +import org.apache.doris.thrift.TPlanNodeType; +import org.apache.doris.thrift.TRecCTENode; + +import com.google.common.base.MoreObjects; +import com.google.common.collect.Lists; + +import java.util.List; + +public class RecursiveCteNode extends PlanNode { + private String cteName; + private boolean isUnionAll; + private List> materializedResultExprLists = Lists.newArrayList(); + private TRecCTENode tRecCTENode; + + public RecursiveCteNode(PlanNodeId id, TupleId tupleId, String cteName, boolean isUnionAll) { + super(id, tupleId.asList(), "RECURSIVE_CTE", StatisticalType.REC_CTE_NODE); + this.cteName = cteName; + this.isUnionAll = isUnionAll; + } + + public boolean isUnionAll() { + return isUnionAll; + } + + public void setMaterializedResultExprLists(List> exprs) { + this.materializedResultExprLists = exprs; + } + + public List> getMaterializedResultExprLists() { + return materializedResultExprLists; + } + + public void settRecCTENode(TRecCTENode tRecCTENode) { + this.tRecCTENode = tRecCTENode; + } + + @Override + protected void toThrift(TPlanNode msg) { + msg.node_type = TPlanNodeType.REC_CTE_NODE; + msg.rec_cte_node = tRecCTENode; + } + + @Override + public String getNodeExplainString(String prefix, TExplainLevel detailLevel) { + StringBuilder output = new StringBuilder(); + output.append(prefix).append("Recursive Cte: ").append(cteName).append("\n"); + output.append(prefix).append("isUnionAll: ").append(isUnionAll).append("\n"); + if (!conjuncts.isEmpty()) { + Expr expr = convertConjunctsToAndCompoundPredicate(conjuncts); + output.append(prefix).append("PREDICATES: ").append(expr.toSql()).append("\n"); + } + return output.toString(); + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("name", cteName) + .add("id", getId().asInt()) + .add("tid", tupleIds.get(0).asInt()) + .add("isUnionAll", isUnionAll).toString(); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/RecursiveCteScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/RecursiveCteScanNode.java new file mode 100644 index 00000000000000..97aedcb1f9dafd --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/RecursiveCteScanNode.java @@ -0,0 +1,123 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.planner; + +import org.apache.doris.analysis.Expr; +import org.apache.doris.analysis.TupleDescriptor; +import org.apache.doris.catalog.Env; +import org.apache.doris.common.UserException; +import org.apache.doris.statistics.StatisticalType; +import org.apache.doris.system.Backend; +import org.apache.doris.thrift.TExplainLevel; +import org.apache.doris.thrift.TNetworkAddress; +import org.apache.doris.thrift.TPlanNode; +import org.apache.doris.thrift.TPlanNodeType; +import org.apache.doris.thrift.TScanRange; +import org.apache.doris.thrift.TScanRangeLocation; +import org.apache.doris.thrift.TScanRangeLocations; + +import com.google.common.base.MoreObjects; +import com.google.common.collect.Lists; + +import java.util.Collections; +import java.util.List; + +// Full scan of recursive cte temp table +public class RecursiveCteScanNode extends ScanNode { + private final String recursiveCteName; + + public RecursiveCteScanNode(String recursiveCteName, PlanNodeId id, TupleDescriptor desc, + ScanContext scanContext) { + super(id, desc, "RECURSIVE_CTE_SCAN", scanContext, StatisticalType.REC_CTE_SCAN_NODE); + this.recursiveCteName = recursiveCteName; + } + + public void initScanRangeLocations() throws UserException { + createScanRangeLocations(); + } + + @Override + protected void createScanRangeLocations() throws UserException { + scanRangeLocations = Lists.newArrayList(); + // randomly select 1 backend + List backendList = Lists.newArrayList(); + for (Backend be : Env.getCurrentSystemInfo().getBackendsByCurrentCluster().values()) { + if (be.isAlive()) { + backendList.add(be); + } + } + if (backendList.isEmpty()) { + throw new UserException("No Alive backends"); + } + Collections.shuffle(backendList); + Backend selectedBackend = backendList.get(0); + + // create scan range locations + TScanRangeLocation location = new TScanRangeLocation(); + location.setBackendId(selectedBackend.getId()); + location.setServer(new TNetworkAddress(selectedBackend.getHost(), selectedBackend.getBePort())); + TScanRangeLocations locations = new TScanRangeLocations(); + locations.addToLocations(location); + locations.setScanRange(new TScanRange()); + scanRangeLocations.add(locations); + } + + @Override + public List getScanRangeLocations(long maxScanRangeLength) { + return scanRangeLocations; + } + + @Override + public int getNumInstances() { + return 1; + } + + @Override + public int getScanRangeNum() { + return 1; + } + + @Override + public String getNodeExplainString(String prefix, TExplainLevel detailLevel) { + StringBuilder output = new StringBuilder(); + output.append(prefix).append("Recursive Cte: ").append(recursiveCteName).append("\n"); + if (!conjuncts.isEmpty()) { + Expr expr = convertConjunctsToAndCompoundPredicate(conjuncts); + output.append(prefix).append("PREDICATES: ").append(expr.toSql()).append("\n"); + } + return output.toString(); + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("recursiveCteName", recursiveCteName) + .add("id", getId().asInt()) + .add("tid", desc.getId().asInt()).toString(); + } + + @Override + protected void toThrift(TPlanNode msg) { + msg.node_type = TPlanNodeType.REC_CTE_SCAN_NODE; + } + + @Override + public boolean isSerialOperator() { + return true; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index cf6e8b443567b8..b683faa3f0f900 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -838,6 +838,8 @@ public class SessionVariable implements Serializable, Writable { public static final String READ_HIVE_JSON_IN_ONE_COLUMN = "read_hive_json_in_one_column"; + public static final String CTE_MAX_RECURSION_DEPTH = "cte_max_recursion_depth"; + /** * Inserting overwrite for auto partition table allows creating partition for * datas which cannot find partition to overwrite. @@ -1078,6 +1080,11 @@ public static double getHotValueThreshold() { }, varType = VariableAnnotation.DEPRECATED) public int minScanSchedulerConcurrency = 0; + @VariableMgr.VarAttr(name = CTE_MAX_RECURSION_DEPTH, needForward = true, description = { + "CTE递归的最大深度,默认值100", + "The maximum depth of CTE recursion. Default is 100" }) + public int cteMaxRecursionDepth = 100; + // By default, the number of Limit items after OrderBy is changed from 65535 items // before v1.2.0 (not included), to return all items by default @VariableMgr.VarAttr(name = DEFAULT_ORDER_BY_LIMIT, affectQueryResultInExecution = true) @@ -5263,6 +5270,7 @@ public TQueryOptions toThrift() { tResult.setInvertedIndexCompatibleRead(invertedIndexCompatibleRead); tResult.setEnableInvertedIndexWandQuery(enableInvertedIndexWandQuery); + tResult.setCteMaxRecursionDepth(cteMaxRecursionDepth); tResult.setEnableParallelScan(enableParallelScan); tResult.setEnableLeftSemiDirectReturnOpt(enableLeftSemiDirectReturnOpt); tResult.setEnableAggregateFunctionNullV2(enableAggregateFunctionNullV2); diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/runtime/ThriftPlansBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/qe/runtime/ThriftPlansBuilder.java index 13391013f52679..0738876282f51a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/runtime/ThriftPlansBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/runtime/ThriftPlansBuilder.java @@ -17,6 +17,7 @@ package org.apache.doris.qe.runtime; +import org.apache.doris.analysis.Expr; import org.apache.doris.catalog.AIResource; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.Resource; @@ -41,6 +42,11 @@ import org.apache.doris.planner.OlapScanNode; import org.apache.doris.planner.OlapTableSink; import org.apache.doris.planner.PlanFragment; +import org.apache.doris.planner.PlanFragmentId; +import org.apache.doris.planner.PlanNode; +import org.apache.doris.planner.RecursiveCteNode; +import org.apache.doris.planner.RecursiveCteScanNode; +import org.apache.doris.planner.RuntimeFilter; import org.apache.doris.planner.ScanNode; import org.apache.doris.planner.SortNode; import org.apache.doris.qe.ConnectContext; @@ -48,6 +54,7 @@ import org.apache.doris.thrift.PaloInternalServiceVersion; import org.apache.doris.thrift.TAIResource; import org.apache.doris.thrift.TDataSinkType; +import org.apache.doris.thrift.TExpr; import org.apache.doris.thrift.TFileScanRangeParams; import org.apache.doris.thrift.TNetworkAddress; import org.apache.doris.thrift.TPipelineFragmentParams; @@ -56,10 +63,14 @@ import org.apache.doris.thrift.TPlanFragment; import org.apache.doris.thrift.TPlanFragmentDestination; import org.apache.doris.thrift.TQueryOptions; +import org.apache.doris.thrift.TRecCTENode; +import org.apache.doris.thrift.TRecCTEResetInfo; +import org.apache.doris.thrift.TRecCTETarget; import org.apache.doris.thrift.TRuntimeFilterInfo; import org.apache.doris.thrift.TRuntimeFilterParams; import org.apache.doris.thrift.TScanRangeParams; import org.apache.doris.thrift.TTopnFilterDesc; +import org.apache.doris.thrift.TUniqueId; import com.google.common.base.Suppliers; import com.google.common.collect.ArrayListMultimap; @@ -75,10 +86,13 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; +import java.util.TreeMap; +import java.util.TreeSet; import java.util.function.BiConsumer; import java.util.function.Supplier; @@ -89,6 +103,8 @@ public static Map plansToThr CoordinatorContext coordinatorContext) { List distributedPlans = coordinatorContext.distributedPlans; + Set fragmentToNotifyClose = setParamsForRecursiveCteNode(distributedPlans, + coordinatorContext.runtimeFilters); // Determine whether this query is assigned to a single backend and propagate it to // TQueryOptions so that BE can apply more appropriate optimization strategies (e.g. @@ -119,7 +135,7 @@ public static Map plansToThr TPipelineFragmentParams currentFragmentParam = fragmentToThriftIfAbsent( currentFragmentPlan, instanceJob, workerToCurrentFragment, instancesPerWorker, exchangeSenderNum, sharedFileScanRangeParams, - workerProcessInstanceNum, coordinatorContext); + workerProcessInstanceNum, fragmentToNotifyClose, coordinatorContext); TPipelineInstanceParams instanceParam = instanceToThrift( currentFragmentParam, instanceJob, currentInstanceIndex++); @@ -330,6 +346,7 @@ private static TPipelineFragmentParams fragmentToThriftIfAbsent( Map exchangeSenderNum, Map fileScanRangeParamsMap, Multiset workerProcessInstanceNum, + Set fragmentToNotifyClose, CoordinatorContext coordinatorContext) { DistributedPlanWorker worker = assignedJob.getAssignedWorker(); return workerToFragmentParams.computeIfAbsent(worker, w -> { @@ -343,6 +360,9 @@ private static TPipelineFragmentParams fragmentToThriftIfAbsent( params.setDescTbl(coordinatorContext.descriptorTable); params.setQueryId(coordinatorContext.queryId); params.setFragmentId(fragment.getFragmentId().asInt()); + if (fragmentToNotifyClose.contains(params.getFragmentId())) { + params.setNeedNotifyClose(true); + } // Each tParam will set the total number of Fragments that need to be executed on the same BE, // and the BE will determine whether all Fragments have been executed based on this information. @@ -581,6 +601,215 @@ private static void filterInstancesWhichReceiveDataFromRemote( } } + private static Set setParamsForRecursiveCteNode(List distributedPlans, + List runtimeFilters) { + /* + * Populate and attach recursive-CTE related Thrift structures used by + * backends (BE) to coordinate recursive Common Table Expression (CTE) + * execution across fragments and fragment instances. + * + * This method performs the following responsibilities: + * - Traverse the provided `distributedPlans` in bottom-up order (this + * ordering is expected by callers) and collect the set of network + * addresses (host + brpcPort) for every fragment. These addresses are + * used to reset and control recursive CTE child fragments from the + * producer side. + * - Detect `RecursiveCteScanNode` within fragments and build a + * `TRecCTETarget` for each such scan. A `TRecCTETarget` captures the + * network address and a representative fragment instance id and the + * scan node id that the recursive producer should send data to. + * Exactly one `RecursiveCteScanNode` is expected per fragment that + * contains a scan for a recursive CTE; otherwise an + * IllegalStateException is thrown. + * - For every `RecursiveCteNode` (producer/union node), collect its + * child fragments that implement the recursive side. For each child + * fragment, add the corresponding `TRecCTETarget` (if present) to + * the producer's target list and create `TRecCTEResetInfo` entries + * for all instances of that fragment. `TRecCTEResetInfo` entries + * carry the fragment id and addresses to be reset by the producer + * when a new recursion iteration begins. + * - Populate the `TRecCTENode` object attached to the + * `RecursiveCteNode`, including: whether it's `UNION ALL`, result + * expression lists (materialized result expressions converted to + * Thrift `TExpr`), list of targets, fragments-to-reset, runtime + * filter ids that must be reset on the recursive side, and a flag + * indicating whether this recursive CTE node is used by other + * recursive CTEs. + * + * How runtime filters are handled: + * - Build `runtimeFiltersToReset` by scanning provided + * `runtimeFilters`. A filter id is added if the filter has remote + * targets and if the recursive side (right child) contains the + * runtime-filter builder node. These filter ids are attached to the + * `TRecCTENode` so BE can reset the corresponding runtime filters + * between recursive iterations. + * + * Important assumptions and invariants: + * - `distributedPlans` must be ordered bottom-up so that child + * fragments (containing `RecursiveCteScanNode`) are visited before + * their producers. The implementation relies on this to pop + * consumed `TRecCTETarget` entries from `fragmentIdToRecCteTargetMap` + * to avoid a parent producer incorrectly picking up grandchild + * scan nodes. + * - Each fragment containing a `RecursiveCteScanNode` must have at + * least one assigned job (instance). If not, an + * IllegalStateException is thrown. + * - At most one `RecursiveCteScanNode` per fragment is supported; if + * more than one is found an IllegalStateException is thrown. + * + * @param distributedPlans ordered list of PipelineDistributedPlan in + * bottom-up traversal order + * @param runtimeFilters list of runtime filters to consider for reset + * @return set of fragment ids (as integers) that need to be notified + * to close for recursive CTE handling + */ + // fragments whose child recursive fragments need to be notified to close + Set fragmentToNotifyClose = new HashSet<>(); + // mapping from fragment id -> TRecCTETarget (the scan node target info) + Map> fragmentIdToRecCteTargetMap = new TreeMap<>(); + // mapping from fragment id -> set of network addresses for all instances + Map> fragmentIdToNetworkAddressMap = new TreeMap<>(); + // distributedPlans is ordered in bottom up way, so does the fragments + for (PipelineDistributedPlan plan : distributedPlans) { + // collect all assigned instance network addresses for this fragment + List fragmentAssignedJobs = plan.getInstanceJobs(); + Set networkAddresses = new TreeSet<>(); + Map addressTUniqueIdMap = new TreeMap<>(); + for (AssignedJob assignedJob : fragmentAssignedJobs) { + DistributedPlanWorker distributedPlanWorker = assignedJob.getAssignedWorker(); + // use brpc port + host as the address used by BE for control/reset + TNetworkAddress networkAddress = new TNetworkAddress(distributedPlanWorker.host(), + distributedPlanWorker.brpcPort()); + if (networkAddresses.add(networkAddress)) { + addressTUniqueIdMap.put(networkAddress, assignedJob.instanceId()); + } + } + PlanFragment planFragment = plan.getFragmentJob().getFragment(); + // remember addresses for later when building reset infos + fragmentIdToNetworkAddressMap.put(planFragment.getFragmentId(), networkAddresses); + + // find RecursiveCteScanNode in this fragment (scan side of recursive CTE) + List recursiveCteScanNodes = planFragment.getPlanRoot() + .collectInCurrentFragment(RecursiveCteScanNode.class::isInstance); + if (!recursiveCteScanNodes.isEmpty()) { + // validate there is exactly one scan node per fragment + if (recursiveCteScanNodes.size() != 1) { + throw new IllegalStateException( + String.format("one fragment can only have 1 recursive cte scan node, but there is %d", + recursiveCteScanNodes.size())); + } + // scan fragments must have at least one assigned instance + if (fragmentAssignedJobs.isEmpty()) { + throw new IllegalStateException( + "fragmentAssignedJobs is empty for recursive cte scan node"); + } + // Build a TRecCTETargets + List recCTETargets = new ArrayList<>(addressTUniqueIdMap.size()); + for (Entry entry : addressTUniqueIdMap.entrySet()) { + TRecCTETarget tRecCTETarget = new TRecCTETarget(); + tRecCTETarget.setAddr(entry.getKey()); + tRecCTETarget.setFragmentInstanceId(entry.getValue()); + tRecCTETarget.setNodeId(recursiveCteScanNodes.get(0).getId().asInt()); + recCTETargets.add(tRecCTETarget); + } + // store the target for producers to reference later + fragmentIdToRecCteTargetMap.put(planFragment.getFragmentId(), recCTETargets); + } + + List recursiveCteNodes = planFragment.getPlanRoot() + .collectInCurrentFragment(RecursiveCteNode.class::isInstance); + for (RecursiveCteNode recursiveCteNode : recursiveCteNodes) { + // list of scan targets this producer should send recursive rows to + List targets = new ArrayList<>(); + // reset infos for all instances of child fragments (used to reset state) + List fragmentsToReset = new ArrayList<>(); + // The recursive side is under the right child; collect all fragments + List childFragments = new ArrayList<>(); + recursiveCteNode.getChild(1).getChild(0).getFragment().collectAll(PlanFragment.class::isInstance, + childFragments); + for (PlanFragment child : childFragments) { + PlanFragmentId childFragmentId = child.getFragmentId(); + // mark this child fragment id so it will be notified to close + fragmentToNotifyClose.add(childFragmentId.asInt()); + // add target if a matching RecursiveCteScanNode was recorded + List recCTETargets = fragmentIdToRecCteTargetMap.getOrDefault(childFragmentId, null); + if (recCTETargets != null) { + // each producer can only map to one scan node target per child + targets.addAll(recCTETargets); + // remove the entry so ancestor producers won't reuse a grandchild scan node + fragmentIdToRecCteTargetMap.remove(childFragmentId); + } + // get all instance addresses for this child fragment and build reset infos + Set tNetworkAddresses = fragmentIdToNetworkAddressMap.get(childFragmentId); + if (tNetworkAddresses == null) { + throw new IllegalStateException( + String.format("can't find TNetworkAddress for fragment %d", childFragmentId)); + } + for (TNetworkAddress address : tNetworkAddresses) { + TRecCTEResetInfo tRecCTEResetInfo = new TRecCTEResetInfo(); + tRecCTEResetInfo.setFragmentId(childFragmentId.asInt()); + tRecCTEResetInfo.setAddr(address); + fragmentsToReset.add(tRecCTEResetInfo); + } + } + + // convert materialized result expression lists to Thrift TExpr lists + List> materializedResultExprLists = recursiveCteNode.getMaterializedResultExprLists(); + List> texprLists = new ArrayList<>(materializedResultExprLists.size()); + for (List exprList : materializedResultExprLists) { + texprLists.add(Expr.treesToThrift(exprList)); + } + // the recursive side's rf need to be reset + // determine which runtime filters on the recursive side must be reset + List runtimeFiltersToReset = new ArrayList<>(runtimeFilters.size()); + for (RuntimeFilter rf : runtimeFilters) { + // only consider filters that have remote targets and whose builder + // node is present in the recursive side (right child) + if (rf.hasRemoteTargets() + && recursiveCteNode.getChild(1).contains(node -> node == rf.getBuilderNode())) { + runtimeFiltersToReset.add(rf.getFilterId().asInt()); + } + } + // find recursiveCte used by other recursive cte + // detect whether this recursive CTE node is referenced by other + // recursive CTEs in the recursive side; needed to correctly + // indicate sharing/usage across recursive nodes + Set recursiveCteNodesInRecursiveSide = new HashSet<>(); + PlanNode rootPlan = distributedPlans.get(distributedPlans.size() - 1) + .getFragmentJob().getFragment().getPlanRoot(); + collectAllRecursiveCteNodesInRecursiveSide(rootPlan, false, recursiveCteNodesInRecursiveSide); + boolean isUsedByOtherRecCte = recursiveCteNodesInRecursiveSide.contains(recursiveCteNode); + + // build the Thrift TRecCTENode and attach it to the RecursiveCteNode + TRecCTENode tRecCTENode = new TRecCTENode(); + tRecCTENode.setIsUnionAll(recursiveCteNode.isUnionAll()); + tRecCTENode.setTargets(targets); + tRecCTENode.setFragmentsToReset(fragmentsToReset); + tRecCTENode.setResultExprLists(texprLists); + tRecCTENode.setRecSideRuntimeFilterIds(runtimeFiltersToReset); + tRecCTENode.setIsUsedByOtherRecCte(isUsedByOtherRecCte); + // attach Thrift node to plan node for BE consumption + recursiveCteNode.settRecCTENode(tRecCTENode); + } + } + return fragmentToNotifyClose; + } + + private static void collectAllRecursiveCteNodesInRecursiveSide(PlanNode planNode, boolean needCollect, + Set recursiveCteNodes) { + if (planNode instanceof RecursiveCteNode) { + if (needCollect) { + recursiveCteNodes.add((RecursiveCteNode) planNode); + } + collectAllRecursiveCteNodesInRecursiveSide(planNode.getChild(0), needCollect, recursiveCteNodes); + collectAllRecursiveCteNodesInRecursiveSide(planNode.getChild(1), true, recursiveCteNodes); + } else { + for (PlanNode child : planNode.getChildren()) { + collectAllRecursiveCteNodesInRecursiveSide(child, needCollect, recursiveCteNodes); + } + } + } + private static class PerNodeScanParams { Map> perNodeScanRanges; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticalType.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticalType.java index 0ec7a518078fed..a14cabd6a4a010 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticalType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticalType.java @@ -42,6 +42,8 @@ public enum StatisticalType { OLAP_SCAN_NODE, PARTITION_TOPN_NODE, REPEAT_NODE, + REC_CTE_NODE, + REC_CTE_SCAN_NODE, SELECT_NODE, SET_OPERATION_NODE, SCHEMA_SCAN_NODE, diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/postprocess/RuntimeFilterTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/postprocess/RuntimeFilterTest.java index 3d6964d8d7e8b2..f1164ece13e14a 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/postprocess/RuntimeFilterTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/postprocess/RuntimeFilterTest.java @@ -18,13 +18,17 @@ package org.apache.doris.nereids.postprocess; import org.apache.doris.common.Pair; +import org.apache.doris.nereids.NereidsPlanner; +import org.apache.doris.nereids.StatementContext; import org.apache.doris.nereids.datasets.ssb.SSBTestBase; import org.apache.doris.nereids.datasets.ssb.SSBUtils; import org.apache.doris.nereids.glue.translator.PhysicalPlanTranslator; import org.apache.doris.nereids.glue.translator.PlanTranslatorContext; import org.apache.doris.nereids.hint.DistributeHint; +import org.apache.doris.nereids.parser.NereidsParser; import org.apache.doris.nereids.processor.post.PlanPostProcessors; import org.apache.doris.nereids.processor.post.RuntimeFilterContext; +import org.apache.doris.nereids.properties.PhysicalProperties; import org.apache.doris.nereids.trees.expressions.Alias; import org.apache.doris.nereids.trees.expressions.EqualTo; import org.apache.doris.nereids.trees.expressions.ExprId; @@ -34,13 +38,17 @@ import org.apache.doris.nereids.trees.plans.DistributeType; import org.apache.doris.nereids.trees.plans.JoinType; import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.commands.ExplainCommand; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; import org.apache.doris.nereids.trees.plans.physical.AbstractPhysicalPlan; import org.apache.doris.nereids.trees.plans.physical.PhysicalHashJoin; import org.apache.doris.nereids.trees.plans.physical.PhysicalOlapScan; import org.apache.doris.nereids.trees.plans.physical.PhysicalPlan; import org.apache.doris.nereids.trees.plans.physical.PhysicalProject; import org.apache.doris.nereids.trees.plans.physical.RuntimeFilter; +import org.apache.doris.nereids.util.MemoTestUtils; import org.apache.doris.nereids.util.PlanChecker; +import org.apache.doris.qe.OriginStatement; import com.google.common.collect.ImmutableList; import com.google.common.collect.Sets; @@ -420,4 +428,25 @@ public void testNotGenerateRfOnDanglingSlot() { Assertions.assertEquals(0, ((AbstractPhysicalPlan) plan.child(0).child(1).child(0)) .getAppliedRuntimeFilters().size()); } + + @Test + public void testRuntimeFilterBlockByRecCte() { + String sql = new StringBuilder().append("with recursive xx as (\n").append(" select\n") + .append(" c_custkey as c1\n").append(" from\n").append(" customer\n").append(" union\n") + .append(" select\n").append(" xx.c1 as c1\n").append(" from\n").append(" xx\n").append(")\n") + .append("select\n").append(" xx.c1\n").append(" from\n").append(" xx\n") + .append(" join lineorder on lineorder.lo_custkey = xx.c1").toString(); + LogicalPlan unboundPlan = new NereidsParser().parseSingle(sql); + StatementContext statementContext = new StatementContext(connectContext, + new OriginStatement(sql, 0)); + NereidsPlanner planner = new NereidsPlanner(statementContext); + planner.planWithLock(unboundPlan, PhysicalProperties.ANY, + ExplainCommand.ExplainLevel.OPTIMIZED_PLAN); + MemoTestUtils.initMemoAndValidState(planner.getCascadesContext()); + new PhysicalPlanTranslator(new PlanTranslatorContext(planner.getCascadesContext())) + .translatePlan((PhysicalPlan) planner.getOptimizedPlan()); + RuntimeFilterContext context = planner.getCascadesContext().getRuntimeFilterContext(); + List filters = context.getNereidsRuntimeFilter(); + Assertions.assertEquals(0, filters.size()); + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/AnalyzeCTETest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/AnalyzeCTETest.java index a91c0dd47126fc..1921ae9025d2ff 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/AnalyzeCTETest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/AnalyzeCTETest.java @@ -33,10 +33,14 @@ import org.apache.doris.nereids.rules.rewrite.PullUpProjectUnderApply; import org.apache.doris.nereids.rules.rewrite.UnCorrelatedApplyFilter; import org.apache.doris.nereids.trees.expressions.StatementScopeIdGenerator; +import org.apache.doris.nereids.trees.expressions.functions.scalar.Nullable; +import org.apache.doris.nereids.trees.plans.commands.ExplainCommand; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; import org.apache.doris.nereids.trees.plans.physical.PhysicalPlan; import org.apache.doris.nereids.util.MemoPatternMatchSupported; import org.apache.doris.nereids.util.MemoTestUtils; import org.apache.doris.nereids.util.PlanChecker; +import org.apache.doris.qe.OriginStatement; import org.apache.doris.utframe.TestWithFeService; import com.google.common.collect.ImmutableList; @@ -264,6 +268,92 @@ public void testCteNested() { ); } + @Test + public void testRecCteOutputNullable() { + String sql = new StringBuilder() + .append("WITH RECURSIVE test_table AS (\n") + .append(" SELECT 1 UNION ALL\n") + .append(" SELECT 2 FROM test_table\n") + .append(")\n") + .append("SELECT * FROM test_table;") + .toString(); + PlanChecker.from(connectContext) + .analyze(sql) + .matches( + logicalRecursiveCte( + logicalProject( + logicalOneRowRelation( + ) + ).when(project -> project.getProjects().get(0).child(0) instanceof Nullable), + logicalRecursiveCteRecursiveChild( + logicalProject( + logicalProject( + logicalCTEConsumer() + ) + ).when(project -> project.getProjects().get(0).child(0) instanceof Nullable) + ) + ) + ); + } + + @Test + public void testRecCteWithoutRecKeyword() { + String sql = new StringBuilder() + .append("WITH RECURSIVE t1 AS (\n") + .append(" SELECT 1\n") + .append("UNION ALL\n") + .append(" SELECT 2 FROM t1\n") + .append("),\n").append("t2 AS (\n") + .append(" SELECT 3\n") + .append("UNION ALL\n") + .append(" SELECT 4 FROM t1, t2\n") + .append(")\n") + .append("SELECT * FROM t2;") + .toString(); + PlanChecker.from(connectContext) + .analyze(sql) + .matches( + logicalRecursiveCte( + logicalProject( + logicalOneRowRelation( + ) + ), + logicalRecursiveCteRecursiveChild( + logicalProject( + logicalProject( + logicalJoin() + ) + ) + ) + ).when(cte -> cte.getCteName().equals("t2")) + ); + } + + @Test + public void testRecCteMultipleUnion() { + String sql = new StringBuilder().append("with recursive t1 as (\n").append(" select\n") + .append(" 1 as c1,\n").append(" 1 as c2\n").append("),\n").append("t2 as (\n") + .append(" select\n").append(" 2 as c1,\n").append(" 2 as c2\n").append("),\n") + .append("xx as (\n").append(" select\n").append(" c1,\n").append(" c2\n") + .append(" from\n").append(" t1\n").append(" union\n").append(" select\n") + .append(" c1,\n").append(" c2\n").append(" from\n").append(" t2\n") + .append(" union\n").append(" select\n").append(" c1,\n").append(" c2\n") + .append(" from\n").append(" xx\n").append(")\n").append("select\n").append(" *\n") + .append("from\n").append(" xx;").toString(); + LogicalPlan unboundPlan = new NereidsParser().parseSingle(sql); + StatementContext statementContext = new StatementContext(connectContext, + new OriginStatement(sql, 0)); + NereidsPlanner planner = new NereidsPlanner(statementContext); + planner.planWithLock(unboundPlan, PhysicalProperties.ANY, + ExplainCommand.ExplainLevel.ANALYZED_PLAN); + MemoTestUtils.initMemoAndValidState(planner.getCascadesContext()); + PlanChecker.from(planner.getCascadesContext()).matches( + logicalRecursiveCte( + logicalProject( + logicalUnion()), + logicalRecursiveCteRecursiveChild()).when(cte -> cte.getCteName().equals("xx"))); + } + /* ******************************************************************************************** * Test CTE Exceptions @@ -333,4 +423,133 @@ public void testCTEExceptionOfRefterCTENameNotInScope() { () -> PlanChecker.from(connectContext).analyze(sql), "Not throw expected exception."); Assertions.assertTrue(exception.getMessage().contains("Table [cte2] does not exist in database")); } + + @Test + public void testRecCteWithoutRecKeywordException() { + String sql = new StringBuilder() + .append("WITH t1 AS (\n") + .append(" SELECT 1 UNION ALL\n") + .append(" SELECT 2 FROM t1\n") + .append(")\n") + .append("SELECT * FROM t1;") + .toString(); + LogicalPlan unboundPlan = new NereidsParser().parseSingle(sql); + StatementContext statementContext = new StatementContext(connectContext, + new OriginStatement(sql, 0)); + NereidsPlanner planner = new NereidsPlanner(statementContext); + AnalysisException exception = Assertions.assertThrows(AnalysisException.class, + () -> planner.planWithLock(unboundPlan, PhysicalProperties.ANY, + ExplainCommand.ExplainLevel.ANALYZED_PLAN), "Not throw expected exception."); + Assertions.assertTrue(exception.getMessage().contains("Table [t1] does not exist in database")); + } + + @Test + public void testRecCteDatatypeException() { + String sql = new StringBuilder().append("WITH RECURSIVE t1 AS (\n").append(" SELECT 1 AS number\n") + .append("UNION ALL\n").append(" SELECT number + 1 FROM t1 WHERE number < 100\n").append(")\n") + .append("SELECT number FROM t1;").toString(); + LogicalPlan unboundPlan = new NereidsParser().parseSingle(sql); + StatementContext statementContext = new StatementContext(connectContext, + new OriginStatement(sql, 0)); + NereidsPlanner planner = new NereidsPlanner(statementContext); + AnalysisException exception = Assertions.assertThrows(AnalysisException.class, + () -> planner.planWithLock(unboundPlan, PhysicalProperties.ANY, + ExplainCommand.ExplainLevel.ANALYZED_PLAN), + "Not throw expected exception."); + Assertions.assertTrue(exception.getMessage().contains("please add cast manually to get expect datatype")); + } + + @Test + public void testRecCteMultipleUnionException() { + String sql = new StringBuilder().append("with recursive t1 as (\n").append(" select\n") + .append(" 1 as c1,\n").append(" 1 as c2\n").append("),\n").append("t2 as (\n") + .append(" select\n").append(" 2 as c1,\n").append(" 2 as c2\n").append("),\n") + .append("xx as (\n").append(" select\n").append(" c1,\n").append(" c2\n") + .append(" from\n").append(" t1\n").append(" union\n").append(" select\n") + .append(" c1,\n").append(" c2\n").append(" from\n").append(" xx\n") + .append(" union\n").append(" select\n").append(" c1,\n").append(" c2\n") + .append(" from\n").append(" t2\n").append(")\n").append("select\n").append(" *\n") + .append("from\n").append(" xx").toString(); + LogicalPlan unboundPlan = new NereidsParser().parseSingle(sql); + StatementContext statementContext = new StatementContext(connectContext, + new OriginStatement(sql, 0)); + NereidsPlanner planner = new NereidsPlanner(statementContext); + AnalysisException exception = Assertions.assertThrows(AnalysisException.class, + () -> planner.planWithLock(unboundPlan, PhysicalProperties.ANY, + ExplainCommand.ExplainLevel.ANALYZED_PLAN), + "Not throw expected exception."); + Assertions.assertTrue(exception.getMessage() + .contains("recursive reference to query xx must not appear within its non-recursive term")); + } + + @Test + public void testRecCteNoUnionException() { + String sql = new StringBuilder().append("with recursive t1 as (\n").append(" select 1 \n") + .append(" intersect\n").append(" select 2 from t1\n").append(")\n").append("select\n") + .append(" *\n").append("from\n").append(" t1").toString(); + LogicalPlan unboundPlan = new NereidsParser().parseSingle(sql); + StatementContext statementContext = new StatementContext(connectContext, + new OriginStatement(sql, 0)); + NereidsPlanner planner = new NereidsPlanner(statementContext); + AnalysisException exception = Assertions.assertThrows(AnalysisException.class, + () -> planner.planWithLock(unboundPlan, PhysicalProperties.ANY, + ExplainCommand.ExplainLevel.ANALYZED_PLAN), + "Not throw expected exception."); + Assertions.assertTrue(exception.getMessage().contains("recursive cte must be union")); + } + + @Test + public void testRecCteAnchorException() { + String sql = new StringBuilder().append("with recursive t1 as (\n").append(" select 1 from t1\n") + .append(" union\n").append(" select 2 from t1\n").append(")\n").append("select\n") + .append(" *\n").append("from\n").append(" t1;").toString(); + LogicalPlan unboundPlan = new NereidsParser().parseSingle(sql); + StatementContext statementContext = new StatementContext(connectContext, + new OriginStatement(sql, 0)); + NereidsPlanner planner = new NereidsPlanner(statementContext); + AnalysisException exception = Assertions.assertThrows(AnalysisException.class, + () -> planner.planWithLock(unboundPlan, PhysicalProperties.ANY, + ExplainCommand.ExplainLevel.ANALYZED_PLAN), + "Not throw expected exception."); + Assertions.assertTrue(exception.getMessage() + .contains("recursive reference to query t1 must not appear within its non-recursive term")); + } + + @Test + public void testRecCteMoreThanOnceException() { + String sql = new StringBuilder().append("with recursive t1 as (\n").append(" select 1\n") + .append(" union\n").append(" select 2 from t1 x, t1 y\n").append(")\n").append("select\n") + .append(" *\n").append("from\n").append(" t1").toString(); + LogicalPlan unboundPlan = new NereidsParser().parseSingle(sql); + StatementContext statementContext = new StatementContext(connectContext, + new OriginStatement(sql, 0)); + NereidsPlanner planner = new NereidsPlanner(statementContext); + AnalysisException exception = Assertions.assertThrows(AnalysisException.class, + () -> planner.planWithLock(unboundPlan, PhysicalProperties.ANY, + ExplainCommand.ExplainLevel.ANALYZED_PLAN), + "Not throw expected exception."); + Assertions.assertTrue( + exception.getMessage().contains("recursive reference to query t1 must not appear more than once")); + } + + @Test + public void testRecCteInSubqueryException() { + String sql = new StringBuilder().append("with recursive t1 as (\n").append(" select\n") + .append(" 1 as c1,\n").append(" 1 as c2\n").append("),\n").append("xx as (\n") + .append(" select\n").append(" 2 as c1,\n").append(" 2 as c2\n").append(" from\n") + .append(" t1\n").append(" union\n").append(" select\n").append(" 3 as c1,\n") + .append(" 3 as c2\n").append(" from\n") + .append(" t1 where t1.c1 in (select c1 from xx)\n").append(")\n").append("select\n") + .append(" *\n").append("from\n").append(" xx").toString(); + LogicalPlan unboundPlan = new NereidsParser().parseSingle(sql); + StatementContext statementContext = new StatementContext(connectContext, + new OriginStatement(sql, 0)); + NereidsPlanner planner = new NereidsPlanner(statementContext); + AnalysisException exception = Assertions.assertThrows(AnalysisException.class, + () -> planner.planWithLock(unboundPlan, PhysicalProperties.ANY, + ExplainCommand.ExplainLevel.ANALYZED_PLAN), + "Not throw expected exception."); + Assertions.assertTrue( + exception.getMessage().contains("Table [xx] does not exist in database")); + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/CTEInlineTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/CTEInlineTest.java new file mode 100644 index 00000000000000..0a0efc8b5db3a6 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/CTEInlineTest.java @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite; + +import org.apache.doris.nereids.NereidsPlanner; +import org.apache.doris.nereids.StatementContext; +import org.apache.doris.nereids.parser.NereidsParser; +import org.apache.doris.nereids.properties.PhysicalProperties; +import org.apache.doris.nereids.trees.plans.commands.ExplainCommand; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.util.MemoPatternMatchSupported; +import org.apache.doris.nereids.util.MemoTestUtils; +import org.apache.doris.nereids.util.PlanChecker; +import org.apache.doris.qe.OriginStatement; +import org.apache.doris.utframe.TestWithFeService; + +import org.junit.jupiter.api.Test; + +public class CTEInlineTest extends TestWithFeService implements MemoPatternMatchSupported { + @Override + protected void runBeforeAll() throws Exception { + createDatabase("test"); + connectContext.setDatabase("test"); + } + + @Test + public void recCteInline() { + String sql = new StringBuilder().append("with recursive t1 as (\n").append(" select\n") + .append(" 1 as c1,\n").append(" 1 as c2\n").append("),\n").append("t2 as (\n") + .append(" select\n").append(" 2 as c1,\n").append(" 2 as c2\n").append("),\n") + .append("t3 as (\n").append(" select\n").append(" 3 as c1,\n").append(" 3 as c2\n") + .append("),\n").append("xx as (\n").append(" select\n").append(" c1,\n") + .append(" c2\n").append(" from\n").append(" t1\n").append(" union\n") + .append(" select\n").append(" t2.c1,\n").append(" t2.c2\n").append(" from\n") + .append(" t2,\n").append(" xx\n").append(" where\n").append(" t2.c1 = xx.c1\n") + .append("),\n").append("yy as (\n").append(" select\n").append(" c1,\n") + .append(" c2\n").append(" from\n").append(" t3\n").append(" union\n") + .append(" select\n").append(" t3.c1,\n").append(" t3.c2\n").append(" from\n") + .append(" t3,\n").append(" yy,\n").append(" xx\n").append(" where\n") + .append(" t3.c1 = yy.c1\n").append(" and t3.c2 = xx.c1\n").append(")\n") + .append("select\n").append(" *\n").append("from\n").append(" yy y1,\n").append(" yy y2;") + .toString(); + LogicalPlan unboundPlan = new NereidsParser().parseSingle(sql); + StatementContext statementContext = new StatementContext(connectContext, + new OriginStatement(sql, 0)); + NereidsPlanner planner = new NereidsPlanner(statementContext); + planner.planWithLock(unboundPlan, PhysicalProperties.ANY, + ExplainCommand.ExplainLevel.REWRITTEN_PLAN); + MemoTestUtils.initMemoAndValidState(planner.getCascadesContext()); + PlanChecker.from(planner.getCascadesContext()).matches( + logicalRecursiveCte( + any( + ), + logicalRecursiveCteRecursiveChild( + logicalProject( + logicalJoin( + any(), + logicalProject( + logicalFilter( + logicalRecursiveCte().when(cte -> cte.getCteName().equals("xx")) + ) + ) + ) + ) + ) + ).when(cte -> cte.getCteName().equals("yy")) + ); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/ColumnPruningTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/ColumnPruningTest.java index 12e9a1ad3816dc..353f0c13863ae8 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/ColumnPruningTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/ColumnPruningTest.java @@ -17,15 +17,23 @@ package org.apache.doris.nereids.rules.rewrite; +import org.apache.doris.nereids.NereidsPlanner; +import org.apache.doris.nereids.StatementContext; +import org.apache.doris.nereids.parser.NereidsParser; +import org.apache.doris.nereids.properties.PhysicalProperties; import org.apache.doris.nereids.trees.expressions.NamedExpression; import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.expressions.literal.TinyIntLiteral; import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.commands.ExplainCommand; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import org.apache.doris.nereids.types.DoubleType; import org.apache.doris.nereids.types.TinyIntType; import org.apache.doris.nereids.util.MemoPatternMatchSupported; +import org.apache.doris.nereids.util.MemoTestUtils; import org.apache.doris.nereids.util.PlanChecker; +import org.apache.doris.qe.OriginStatement; import org.apache.doris.utframe.TestWithFeService; import com.google.common.collect.ImmutableList; @@ -328,6 +336,28 @@ public void pruneUnionAllWithCount() { ); } + @Test + public void pruneRecCte() { + String sql = new StringBuilder().append("WITH RECURSIVE t1(col1, col2, col3) AS (\n") + .append(" SELECT 1, 1, 1\n").append(" UNION ALL\n").append(" SELECT 2, 2, 2\n") + .append(" FROM student, t1\n").append(" WHERE t1.col1 = student.id\n").append(" )\n") + .append("SELECT col1\n").append("FROM t1\n").append("WHERE col2 = 2;").toString(); + LogicalPlan unboundPlan = new NereidsParser().parseSingle(sql); + StatementContext statementContext = new StatementContext(connectContext, + new OriginStatement(sql, 0)); + NereidsPlanner planner = new NereidsPlanner(statementContext); + planner.planWithLock(unboundPlan, PhysicalProperties.ANY, + ExplainCommand.ExplainLevel.REWRITTEN_PLAN); + MemoTestUtils.initMemoAndValidState(planner.getCascadesContext()); + PlanChecker.from(planner.getCascadesContext()).matches( + logicalProject( + logicalFilter( + logicalRecursiveCte().when(cte -> cte.getOutput().size() == 3) + ) + ).when(project -> project.getOutputs().size() == 1) + ); + } + private List getOutputQualifiedNames(LogicalProject p) { return getOutputQualifiedNames(p.getOutputs()); } diff --git a/regression-test/data/rec_cte_p0/rec_cte/rec_cte.out b/regression-test/data/rec_cte_p0/rec_cte/rec_cte.out new file mode 100644 index 00000000000000..ba843a71ee5282 --- /dev/null +++ b/regression-test/data/rec_cte_p0/rec_cte/rec_cte.out @@ -0,0 +1,953 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +0.5403023058681398 +0.6542897904977791 +0.7013687736227565 +0.7221024250267077 +0.7314040424225098 +0.7356047404363474 +0.7375068905132428 +0.7383692041223232 +0.7387603198742113 +0.7389377567153445 +0.7390182624274122 +0.7390547907469174 +0.7390713652989449 +0.7390788859949921 +0.7390822985224024 +0.7390838469650002 +0.7390845495752126 +0.7390848683867142 +0.7390850130484203 +0.739085078689123 +0.7390851084737987 +0.7390851219886894 +0.7390851281211138 +0.7390851309037207 +0.7390851321663374 +0.7390851327392538 +0.7390851329992164 +0.7390851331171753 +0.7390851331706995 +0.7390851331949863 +0.7390851332060064 +0.7390851332110069 +0.7390851332132758 +0.7390851332143055 +0.7390851332147726 +0.7390851332149846 +0.7390851332150807 +0.7390851332151244 +0.7390851332151441 +0.7390851332151531 +0.7390851332151572 +0.7390851332151591 +0.7390851332151599 +0.7390851332151603 +0.7390851332151605 +0.7390851332151606 +0.7390851332151607 +0.7390851332151608 +0.7390851332151609 +0.7390851332151611 +0.7390851332151617 +0.7390851332151629 +0.7390851332151657 +0.7390851332151718 +0.7390851332151851 +0.7390851332152145 +0.7390851332152792 +0.739085133215422 +0.7390851332157367 +0.7390851332164302 +0.7390851332179587 +0.7390851332213271 +0.7390851332287504 +0.7390851332451103 +0.7390851332811648 +0.7390851333606233 +0.7390851335357372 +0.7390851339216605 +0.7390851347721744 +0.7390851366465718 +0.7390851407774467 +0.7390851498812394 +0.7390851699445544 +0.7390852141609171 +0.7390853116067619 +0.7390855263619245 +0.7390859996481299 +0.7390870426953322 +0.7390893414033927 +0.7390944073790913 +0.7391055719265363 +0.7391301765296711 +0.7391843997714936 +0.7393038923969059 +0.7395672022122561 +0.7401473355678757 +0.7414250866101092 +0.7442373549005569 +0.7504177617637605 +0.7639596829006542 +0.7934803587425656 +0.8575532158463934 +1 + +-- !sql -- +55 + +-- !sql -- +1 3 +1 5 +1 8 +2 4 +2 5 +2 10 +2 19 +3 1 +3 5 +3 8 +3 10 +3 24 +5 3 +5 4 +5 8 +5 15 +6 3 +6 4 +6 7 +7 4 +8 1 +9 4 + +-- !sql -- +1 3 +1 5 +2 4 +2 5 +2 10 +3 1 +3 5 +3 8 +3 10 +5 3 +5 4 +5 8 +5 10 +6 3 +6 4 +6 5 +7 4 +8 1 +8 8 +9 4 +11 1 +12 3 +29 4 + +-- !sql -- +1 3 +1 5 +2 4 +2 5 +2 10 +3 1 +3 5 +3 8 +3 10 +5 3 +5 4 +5 8 +6 3 +6 4 +7 4 +7 10 +8 1 +8 10 +9 4 +9 5 +9 10 +10 5 +10 8 +10 10 +11 5 +11 8 +11 10 +12 5 +12 8 +12 10 +13 1 +13 5 +13 8 +13 10 +14 1 +14 5 +14 8 +14 10 +15 1 +15 3 +15 5 +15 8 +15 10 +16 1 +16 3 +16 5 +16 8 +16 10 +17 1 +17 3 +17 5 +17 8 +17 10 +18 1 +18 3 +18 5 +18 8 +18 10 +19 1 +19 3 +19 5 +19 8 +19 10 +20 1 +20 3 +20 5 +20 8 +20 10 +21 1 +21 3 +21 5 +21 8 +21 10 +22 1 +22 3 +22 5 +22 8 +22 10 +23 1 +23 3 +23 5 +23 8 +23 10 +24 1 +24 3 +24 5 +24 8 +24 10 +25 1 +25 3 +25 5 +25 8 +25 10 +26 1 +26 3 +26 5 +26 8 +26 10 +27 1 +27 3 +27 5 +27 8 +27 10 +28 1 +28 3 +28 5 +28 8 +28 10 +29 1 +29 3 +29 5 +29 8 +29 10 +30 1 +30 3 +30 5 +30 8 +30 10 +31 1 +31 3 +31 5 +31 8 +31 10 +32 1 +32 3 +32 5 +32 8 +32 10 +33 1 +33 3 +33 5 +33 8 +33 10 +34 1 +34 3 +34 4 +34 5 +34 8 +34 10 +35 1 +35 3 +35 4 +35 5 +35 8 +35 10 +36 1 +36 3 +36 4 +36 5 +36 8 +36 10 +37 1 +37 3 +37 4 +37 5 +37 8 +37 10 +38 1 +38 3 +38 4 +38 5 +38 8 +38 10 +39 1 +39 3 +39 4 +39 5 +39 8 +39 10 +40 1 +40 3 +40 4 +40 5 +40 8 +40 10 +41 1 +41 3 +41 4 +41 5 +41 8 +41 10 +42 1 +42 3 +42 4 +42 5 +42 8 +42 10 +43 1 +43 3 +43 4 +43 5 +43 8 +43 10 +44 1 +44 3 +44 4 +44 5 +44 8 +44 10 +45 1 +45 3 +45 4 +45 5 +45 8 +45 10 +46 1 +46 3 +46 4 +46 5 +46 8 +46 10 +47 1 +47 3 +47 4 +47 5 +47 8 +47 10 +48 1 +48 3 +48 4 +48 5 +48 8 +48 10 +49 1 +49 3 +49 4 +49 5 +49 8 +49 10 +50 1 +50 3 +50 4 +50 5 +50 8 +50 10 +51 1 +51 3 +51 4 +51 5 +51 8 +51 10 +52 1 +52 3 +52 4 +52 5 +52 8 +52 10 +53 1 +53 3 +53 4 +53 5 +53 8 +53 10 +54 1 +54 3 +54 4 +54 5 +54 8 +54 10 +55 1 +55 3 +55 4 +55 5 +55 8 +55 10 +56 1 +56 3 +56 4 +56 5 +56 8 +56 10 +57 1 +57 3 +57 4 +57 5 +57 8 +57 10 +58 1 +58 3 +58 4 +58 5 +58 8 +58 10 +59 1 +59 3 +59 4 +59 5 +59 8 +59 10 +60 1 +60 3 +60 4 +60 5 +60 8 +60 10 +61 1 +61 3 +61 4 +61 5 +61 8 +61 10 +62 1 +62 3 +62 4 +62 5 +62 8 +62 10 +63 1 +63 3 +63 4 +63 5 +63 8 +63 10 +64 1 +64 3 +64 4 +64 5 +64 8 +64 10 +65 1 +65 3 +65 4 +65 5 +65 8 +65 10 +66 1 +66 3 +66 4 +66 5 +66 8 +66 10 +67 1 +67 3 +67 4 +67 5 +67 8 +67 10 +68 1 +68 3 +68 4 +68 5 +68 8 +68 10 +69 1 +69 3 +69 4 +69 5 +69 8 +69 10 +70 1 +70 3 +70 4 +70 5 +70 8 +70 10 +71 1 +71 3 +71 4 +71 5 +71 8 +71 10 +72 1 +72 3 +72 4 +72 5 +72 8 +72 10 +73 1 +73 3 +73 4 +73 5 +73 8 +73 10 +74 1 +74 3 +74 4 +74 5 +74 8 +74 10 +75 1 +75 3 +75 4 +75 5 +75 8 +75 10 +76 1 +76 3 +76 4 +76 5 +76 8 +76 10 +77 1 +77 3 +77 4 +77 5 +77 8 +77 10 +78 1 +78 3 +78 4 +78 5 +78 8 +78 10 +79 1 +79 3 +79 4 +79 5 +79 8 +79 10 +80 1 +80 3 +80 4 +80 5 +80 8 +80 10 +81 1 +81 3 +81 4 +81 5 +81 8 +81 10 +82 1 +82 3 +82 4 +82 5 +82 8 +82 10 +83 1 +83 3 +83 4 +83 5 +83 8 +83 10 +84 1 +84 3 +84 4 +84 5 +84 8 +84 10 +85 1 +85 3 +85 4 +85 5 +85 8 +85 10 +86 1 +86 3 +86 4 +86 5 +86 8 +86 10 +87 1 +87 3 +87 4 +87 5 +87 8 +87 10 +88 1 +88 3 +88 4 +88 5 +88 8 +88 10 +89 1 +89 3 +89 4 +89 5 +89 8 +89 10 +90 1 +90 3 +90 4 +90 5 +90 8 +90 10 +91 1 +91 3 +91 4 +91 5 +91 8 +91 10 +92 1 +92 3 +92 4 +92 5 +92 8 +92 10 +93 1 +93 3 +93 4 +93 5 +93 8 +93 10 +94 1 +94 3 +94 4 +94 5 +94 8 +94 10 +95 1 +95 3 +95 4 +95 5 +95 8 +95 10 +96 1 +96 3 +96 4 +96 5 +96 8 +96 10 +97 1 +97 3 +97 4 +97 5 +97 8 +97 10 +98 1 +98 3 +98 4 +98 5 +98 8 +98 10 +99 1 +99 3 +99 4 +99 5 +99 8 +99 10 +100 1 +100 3 +100 4 +100 5 +100 8 +100 10 + +-- !sql -- +1 3 +1 5 +1 8 +2 \N +2 4 +2 5 +2 9 +2 10 +2 15 +2 19 +2 28 +2 34 +2 43 +2 71 +2 77 +2 105 +2 176 +2 182 +2 253 +2 429 +2 435 +2 611 +2 1040 +2 1046 +2 1475 +2 2515 +2 2521 +2 3561 +2 6076 +2 6082 +2 8597 +2 14673 +2 14679 +2 20755 +2 35428 +2 35434 +2 50107 +2 85535 +2 85541 +2 120969 +2 206504 +2 206510 +2 292045 +2 498549 +2 498555 +2 705059 +2 1203608 +2 1203614 +2 1702163 +2 2905771 +2 2905777 +2 4109385 +2 7015156 +2 7015162 +2 9920933 +2 16936089 +2 16936095 +2 23951251 +2 40887340 +2 40887346 +2 57823435 +2 98710775 +2 98710781 +2 139598121 +2 238308896 +2 238308902 +2 337019677 +2 575328573 +2 575328579 +2 813637475 +2 1388966048 +2 1388966054 +2 1964294627 +3 \N +3 1 +3 5 +3 6 +3 8 +3 10 +3 14 +3 18 +3 20 +3 23 +3 41 +3 43 +3 55 +3 63 +3 96 +3 118 +3 139 +3 181 +3 235 +3 320 +3 353 +3 501 +3 588 +3 854 +3 908 +3 1355 +3 1496 +3 2263 +3 2350 +3 3618 +3 3846 +3 5968 +3 6109 +3 9586 +3 9955 +3 15695 +3 15923 +3 25281 +3 25878 +3 41204 +3 41573 +3 66485 +3 67451 +3 108058 +3 108655 +3 174543 +3 176106 +3 283198 +3 284164 +3 457741 +3 460270 +3 741905 +3 743468 +3 1199646 +3 1203738 +3 1943114 +3 1945643 +3 3142760 +3 3149381 +3 5088403 +3 5092495 +3 8231163 +3 8241876 +3 13323658 +3 13330279 +3 21554821 +3 21572155 +3 34885100 +3 34895813 +3 56439921 +3 56467968 +3 91335734 +3 91353068 +3 147775655 +3 147821036 +3 239128723 +3 239156770 +3 386904378 +3 386977806 +3 626061148 +3 626106529 +3 1012965526 +3 1013084335 +3 1639072055 +3 1639145483 +5 \N +5 3 +5 4 +5 7 +5 8 +5 12 +5 15 +5 22 +5 27 +5 34 +5 56 +5 61 +5 83 +5 139 +5 144 +5 200 +5 339 +5 344 +5 483 +5 822 +5 827 +5 1166 +5 1988 +5 1993 +5 2815 +5 4803 +5 4808 +5 6796 +5 11599 +5 11604 +5 16407 +5 28006 +5 28011 +5 39610 +5 67616 +5 67621 +5 95627 +5 163243 +5 163248 +5 230864 +5 394107 +5 394112 +5 557355 +5 951462 +5 951467 +5 1345574 +5 2297036 +5 2297041 +5 3248503 +5 5545539 +5 5545544 +5 7842580 +5 13388119 +5 13388124 +5 18933663 +5 32321782 +5 32321787 +5 45709906 +5 78031688 +5 78031693 +5 110353475 +5 188385163 +5 188385168 +5 266416856 +5 454802019 +5 454802024 +5 643187187 +5 1097989206 +5 1097989211 +5 1552791230 +6 3 +6 4 +6 7 +7 4 +8 1 +9 4 + +-- !sql -- +1 2 + +-- !sql -- +1 2 +3 4 + +-- !sql -- +1 2 +3 4 +11 22 +33 44 + +-- !sql -- +1 2 +3 4 +11 22 + +-- !sql -- +1 22 +3 22 +11 22 + +-- !sql -- +1 2 +2 3 + +-- !sql -- +1 2 +3 4 +11 22 + diff --git a/regression-test/data/rec_cte_p0/rec_cte_from_ck_doc/rec_cte_from_ck_doc.out b/regression-test/data/rec_cte_p0/rec_cte_from_ck_doc/rec_cte_from_ck_doc.out new file mode 100644 index 00000000000000..3a055b01acc868 --- /dev/null +++ b/regression-test/data/rec_cte_p0/rec_cte_from_ck_doc/rec_cte_from_ck_doc.out @@ -0,0 +1,29 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !q1 -- +5050 + +-- !q2 -- +0 \N ROOT +1 0 Child_1 +2 0 Child_2 +3 1 Child_1_1 + +-- !q3 -- +0 \N ROOT [0] +1 0 Child_1 [0, 1] +3 1 Child_1_1 [0, 1, 3] +2 0 Child_2 [0, 2] + +-- !q4 -- +0 \N ROOT [0] 0 +1 0 Child_1 [0, 1] 1 +2 0 Child_2 [0, 2] 1 +3 1 Child_1_1 [0, 1, 3] 2 + +-- !q5 -- +1 2 1 -> 2 +1 3 1 -> 3 +1 4 1 -> 4 +2 3 2 -> 3 +4 5 4 -> 5 + diff --git a/regression-test/data/rec_cte_p0/rec_cte_from_duckdb_doc/rec_cte_from_duckdb_doc.out b/regression-test/data/rec_cte_p0/rec_cte_from_duckdb_doc/rec_cte_from_duckdb_doc.out new file mode 100644 index 00000000000000..958b6848838bd0 --- /dev/null +++ b/regression-test/data/rec_cte_p0/rec_cte_from_duckdb_doc/rec_cte_from_duckdb_doc.out @@ -0,0 +1,30 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !q1 -- +0 0 1 +1 1 1 +2 1 2 +3 2 3 +4 3 5 +5 5 8 +6 8 13 +7 13 21 +8 21 34 +9 34 55 + +-- !q2 -- +["Oasis", "Rock", "Music", "Art"] + +-- !q3 -- +1 3 [1, 3] +1 5 [1, 5] +1 5 [1, 3, 5] +1 8 [1, 3, 8] +1 10 [1, 3, 10] +1 3 [1, 5, 3] +1 4 [1, 5, 4] +1 8 [1, 5, 8] +1 4 [1, 3, 5, 4] +1 8 [1, 3, 5, 8] +1 8 [1, 5, 3, 8] +1 10 [1, 5, 3, 10] + diff --git a/regression-test/data/rec_cte_p0/rec_cte_from_mysql_doc/rec_cte_from_mysql_doc.out b/regression-test/data/rec_cte_p0/rec_cte_from_mysql_doc/rec_cte_from_mysql_doc.out new file mode 100644 index 00000000000000..3bc4cf277852f6 --- /dev/null +++ b/regression-test/data/rec_cte_p0/rec_cte_from_mysql_doc/rec_cte_from_mysql_doc.out @@ -0,0 +1,42 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !q1 -- +1 abc +2 abcabc +3 abcabcabcabc + +-- !q2 -- +1 1 -1 +2 -2 2 +3 4 -4 +4 -8 8 +5 16 -16 + +-- !q4 -- +2017-01-03 +2017-01-04 +2017-01-05 +2017-01-06 +2017-01-07 +2017-01-08 +2017-01-09 +2017-01-10 + +-- !q5 -- +2017-01-03 300 +2017-01-04 0 +2017-01-05 0 +2017-01-06 50 +2017-01-07 0 +2017-01-08 180 +2017-01-09 0 +2017-01-10 5 + +-- !q6 -- +333 Yasmina 333 +198 John 333,198 +29 Pedro 333,198,29 +4610 Sarah 333,198,29,4610 +72 Pierre 333,198,29,72 +692 Tarek 333,692 +123 Adil 333,692,123 + diff --git a/regression-test/suites/rec_cte_p0/rec_cte/rec_cte.groovy b/regression-test/suites/rec_cte_p0/rec_cte/rec_cte.groovy new file mode 100644 index 00000000000000..d456f6ba998501 --- /dev/null +++ b/regression-test/suites/rec_cte_p0/rec_cte/rec_cte.groovy @@ -0,0 +1,271 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +suite ("rec_cte") { + qt_sql """ + WITH RECURSIVE test_table AS ( + SELECT + cast(1.0 as double) AS number + UNION + SELECT + cos(number) + FROM + test_table + ) + SELECT + number + FROM + test_table order by number; + """ + + qt_sql """ + WITH RECURSIVE test_table AS ( + SELECT cast(10 as int) AS number + UNION ALL + SELECT cast(number - 1 as int) FROM test_table WHERE number > 0 + ) + SELECT sum(number) FROM test_table; + """ + + + sql "DROP TABLE IF EXISTS edge;" + sql """ + CREATE TABLE edge + ( + node1id int, + node2id int + ) DUPLICATE KEY (node1id) + DISTRIBUTED BY HASH(node1id) BUCKETS 1 PROPERTIES ('replication_num' = '1'); + """ + sql """ + INSERT INTO edge VALUES + (1, 3), (1, 5), (2, 4), (2, 5), (2, 10), (3, 1), + (3, 5), (3, 8), (3, 10), (5, 3), (5, 4), (5, 8), + (6, 3), (6, 4), (7, 4), (8, 1), (9, 4); + """ + + qt_sql """ + WITH RECURSIVE t1(k1, k2) AS ( + SELECT + node1id AS k1, + node2id AS k2 + FROM edge + UNION + SELECT + k1, + cast(sum(k2) as int) + FROM t1 GROUP BY k1 + ) + SELECT * FROM t1 ORDER BY 1,2; + """ + + qt_sql """ + WITH RECURSIVE t1(k1, k2) AS ( + SELECT + node1id AS k1, + node2id AS k2 + FROM edge + UNION + SELECT + cast(sum(k1) as int), + k2 + FROM t1 GROUP BY k2 + ) + SELECT * FROM t1 ORDER BY 1,2; + """ + + test { + sql """ + WITH RECURSIVE t1(k1, k2) AS ( + SELECT + node1id AS k1, + node2id AS k2 + FROM edge + UNION + SELECT + cast(sum(k1 + 1) as int), + k2 + FROM t1 GROUP BY k2 + ) + SELECT * FROM t1 ORDER BY 1,2; + """ + exception "ABORTED" + } + + qt_sql """ + WITH RECURSIVE t1(k1, k2) AS ( + SELECT + node1id AS k1, + node2id AS k2 + FROM edge + UNION + SELECT + cast(sum(k1 + 1) as int), + k2 + FROM t1 WHERE k1 < 100 GROUP BY k2 + ) + SELECT * FROM t1 ORDER BY 1,2; + """ + + qt_sql """ + WITH RECURSIVE t1(k1, k2) AS ( + SELECT + node1id AS k1, + node2id AS k2 + FROM edge + UNION + SELECT + k1, + cast(sum(k2) OVER (PARTITION BY k1 ORDER BY k1 ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) as int) + FROM t1 + ) + SELECT * FROM t1 ORDER BY 1,2; + """ + + test { + sql """ + WITH RECURSIVE t1(k1, k2) AS ( + SELECT + 1,2 + UNION ALL + SELECT + 1,2 + FROM t1 GROUP BY k1 + ) + SELECT * FROM t1 ORDER BY 1,2; + """ + exception "ABORTED" + } + + qt_sql """ + WITH RECURSIVE t1(k1, k2) AS ( + SELECT + 1,2 + UNION + SELECT + 1,2 + FROM t1 GROUP BY k1 + ) + SELECT * FROM t1 ORDER BY 1,2; + """ + + qt_sql """ + WITH RECURSIVE t1(k1, k2) AS ( + SELECT + 1,2 + UNION + SELECT + 3,4 + FROM t1 GROUP BY k1 + ) + SELECT * FROM t1 ORDER BY 1,2; + """ + + qt_sql """ + WITH RECURSIVE t1(k1, k2) AS ( + SELECT + 1,2 + UNION + SELECT + 3,4 + FROM t1 GROUP BY k1 + ), + t2(k1, k2) AS ( + SELECT + 11,22 + UNION + SELECT + 33,44 + FROM t2 GROUP BY k1 + ) + SELECT * FROM t1 UNION select * from t2 ORDER BY 1,2; + """ + + qt_sql """ + WITH RECURSIVE t1(k1, k2) AS ( + SELECT + 1,2 + UNION + SELECT + 3,4 + FROM t1 GROUP BY k1 + ), + t2(k1, k2) AS ( + SELECT + 11,22 + UNION + SELECT t2.k1, t2.k2 FROM t1,t2 + ) + SELECT * FROM t1 UNION select * from t2 ORDER BY 1,2; + """ + + qt_sql """ + WITH RECURSIVE t1(k1, k2) AS ( + SELECT + 1,2 + UNION + SELECT + 3,4 + FROM t1 GROUP BY k1 + ), + t2(k1, k2) AS ( + SELECT + 11,22 + UNION + SELECT t1.k1, t2.k2 FROM t1,t2 + ) + select * from t2 ORDER BY 1,2; + """ + + qt_sql """ + WITH RECURSIVE t1(k1, k2) AS ( + SELECT + 1,2 + UNION + SELECT + 3,4 + FROM t1 GROUP BY k1 + ), + t2(k1, k2) AS ( + SELECT + 2,3 + UNION + SELECT least(t1.k1,t2.k1), least(t1.k2,t2.k2) FROM t1,t2 + ) + select * from t2 ORDER BY 1,2; + """ + + qt_sql """ + WITH RECURSIVE t1(k1, k2) AS ( + SELECT + 1,2 + UNION + SELECT + 3,4 + FROM t1 GROUP BY k1 + ), + t2(k1, k2) AS ( + SELECT + 11,22 + UNION + SELECT t1.k1, t1.k2 FROM t1 + ) + SELECT * FROM t1 UNION select * from t2 ORDER BY 1,2; + """ +} diff --git a/regression-test/suites/rec_cte_p0/rec_cte_from_ck_doc/rec_cte_from_ck_doc.groovy b/regression-test/suites/rec_cte_p0/rec_cte_from_ck_doc/rec_cte_from_ck_doc.groovy new file mode 100644 index 00000000000000..a01ab03db347c7 --- /dev/null +++ b/regression-test/suites/rec_cte_p0/rec_cte_from_ck_doc/rec_cte_from_ck_doc.groovy @@ -0,0 +1,144 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +// https://clickhouse.com/docs/sql-reference/statements/select/with +suite ("rec_cte_from_ck_doc") { + qt_q1 """ + WITH RECURSIVE test_table AS ( + SELECT cast(1 as int) AS number + UNION ALL + SELECT cast(number + 1 as int) FROM test_table WHERE number < 100 + ) + SELECT sum(number) FROM test_table; + """ + + sql "DROP TABLE IF EXISTS tree;" + sql """ + CREATE TABLE tree + ( + id int, + parent_id int, + data varchar(100) + ) DUPLICATE KEY (id) + DISTRIBUTED BY HASH(id) BUCKETS 1 PROPERTIES ('replication_num' = '1'); + """ + sql """INSERT INTO tree VALUES (0, NULL, 'ROOT'), (1, 0, 'Child_1'), (2, 0, 'Child_2'), (3, 1, 'Child_1_1');""" + + qt_q2 """ + WITH RECURSIVE search_tree AS ( + SELECT id, parent_id, data + FROM tree t + WHERE t.id = 0 + UNION ALL + SELECT t.id, t.parent_id, t.data + FROM tree t, search_tree st + WHERE t.parent_id = st.id + ) + SELECT * FROM search_tree order BY id; + """ + + qt_q3 """ + WITH RECURSIVE search_tree AS ( + SELECT id, parent_id, data, array(t.id) AS path + FROM tree t + WHERE t.id = 0 + UNION ALL + SELECT t.id, t.parent_id, t.data, array_concat(path, array(t.id)) + FROM tree t, search_tree st + WHERE t.parent_id = st.id + ) + SELECT * FROM search_tree ORDER BY path; + """ + + qt_q4 """ + WITH RECURSIVE search_tree AS ( + SELECT id, parent_id, data, array(t.id) AS path, cast(0 as int) AS depth + FROM tree t + WHERE t.id = 0 + UNION ALL + SELECT t.id, t.parent_id, t.data, array_concat(path, array(t.id)), cast(depth + 1 as int) + FROM tree t, search_tree st + WHERE t.parent_id = st.id + ) + SELECT * FROM search_tree ORDER BY depth, id; + """ + + sql "DROP TABLE IF EXISTS graph;" + sql """ + CREATE TABLE graph + ( + c_from int, + c_to int, + label varchar(100) + ) DUPLICATE KEY (c_from) DISTRIBUTED BY HASH(c_from) BUCKETS 1 PROPERTIES ('replication_num' = '1'); + """ + sql """INSERT INTO graph VALUES (1, 2, '1 -> 2'), (1, 3, '1 -> 3'), (2, 3, '2 -> 3'), (1, 4, '1 -> 4'), (4, 5, '4 -> 5');""" + + qt_q5 """ + WITH RECURSIVE search_graph AS ( + SELECT c_from, c_to, label FROM graph g + UNION ALL + SELECT g.c_from, g.c_to, g.label + FROM graph g, search_graph sg + WHERE g.c_from = sg.c_to + ) + SELECT DISTINCT * FROM search_graph ORDER BY c_from, c_to; + """ + + sql "INSERT INTO graph VALUES (5, 1, '5 -> 1');" + test { + sql """ + WITH RECURSIVE search_graph AS ( + SELECT c_from, c_to, label FROM graph g + UNION ALL + SELECT g.c_from, g.c_to, g.label + FROM graph g, search_graph sg + WHERE g.c_from = sg.c_to + ) + SELECT DISTINCT * FROM search_graph ORDER BY c_from, c_to; + """ + exception "ABORTED" + } + + // test global rf + sql "set enable_runtime_filter_prune = false;" + test { + sql """ + WITH RECURSIVE search_graph AS ( + SELECT c_from, c_to, label FROM graph g + UNION ALL + SELECT g.c_from, g.c_to, g.label + FROM graph g join [shuffle] search_graph sg + on g.c_from = sg.c_to + ) + SELECT DISTINCT * FROM search_graph ORDER BY c_from, c_to; + """ + exception "ABORTED" + } + + // do not support use limit to stop recursion now + //qt_q6 """ + //WITH RECURSIVE test_table AS ( + // SELECT cast(1 as int) AS number + //UNION ALL + // SELECT cast(number + 1 as int) FROM test_table + //) + //SELECT sum(number) FROM test_table LIMIT 100; + //""" +} diff --git a/regression-test/suites/rec_cte_p0/rec_cte_from_duckdb_doc/rec_cte_from_duckdb_doc.groovy b/regression-test/suites/rec_cte_p0/rec_cte_from_duckdb_doc/rec_cte_from_duckdb_doc.groovy new file mode 100644 index 00000000000000..62d70558163911 --- /dev/null +++ b/regression-test/suites/rec_cte_p0/rec_cte_from_duckdb_doc/rec_cte_from_duckdb_doc.groovy @@ -0,0 +1,185 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +// https://duckdb.org/docs/stable/sql/query_syntax/with#recursive-ctes +suite ("rec_cte_from_duckdb_doc") { + qt_q1 """ + WITH RECURSIVE FibonacciNumbers ( + RecursionDepth, + FibonacciNumber, + NextNumber + ) AS ( + -- Base case + SELECT + cast(0 as int) AS RecursionDepth, + cast(0 as int) AS FibonacciNumber, + cast(1 as int) AS NextNumber + UNION + ALL -- Recursive step + SELECT + cast((fib.RecursionDepth + 1) as int) AS RecursionDepth, + fib.NextNumber AS FibonacciNumber, + cast((fib.FibonacciNumber + fib.NextNumber) as int) AS NextNumber + FROM + FibonacciNumbers fib + WHERE + cast((fib.RecursionDepth + 1) as int) < 10 + ) + SELECT + * + FROM + FibonacciNumbers fn ORDER BY fn.RecursionDepth; + """ + + sql "DROP TABLE IF EXISTS tag;" + sql """ + CREATE TABLE tag + ( + id int, + name varchar(100), + subclassof int + ) DUPLICATE KEY (id) + DISTRIBUTED BY HASH(id) BUCKETS 1 PROPERTIES ('replication_num' = '1'); + """ + sql """INSERT INTO tag VALUES + (1, 'U2', 5), + (2, 'Blur', 5), + (3, 'Oasis', 5), + (4, '2Pac', 6), + (5, 'Rock', 7), + (6, 'Rap', 7), + (7, 'Music', 9), + (8, 'Movies', 9), + (9, 'Art', NULL);""" + + qt_q2 """ + WITH RECURSIVE tag_hierarchy(id, source, path) AS ( + SELECT id, name, array(name) AS path + FROM tag + WHERE subclassof IS NULL + UNION ALL + SELECT tag.id, tag.name, array_concat(array(tag.name), tag_hierarchy.path) + FROM tag, tag_hierarchy + WHERE tag.subclassof = tag_hierarchy.id + ) + SELECT path + FROM tag_hierarchy + WHERE source = 'Oasis'; + """ + + sql "DROP TABLE IF EXISTS edge;" + sql """ + CREATE TABLE edge + ( + node1id int, + node2id int + ) DUPLICATE KEY (node1id) + DISTRIBUTED BY HASH(node1id) BUCKETS 1 PROPERTIES ('replication_num' = '1'); + """ + sql """ + INSERT INTO edge VALUES + (1, 3), (1, 5), (2, 4), (2, 5), (2, 10), (3, 1), + (3, 5), (3, 8), (3, 10), (5, 3), (5, 4), (5, 8), + (6, 3), (6, 4), (7, 4), (8, 1), (9, 4); + """ + + qt_q3 """ + WITH RECURSIVE paths(startNode, endNode, path) AS ( + SELECT -- Define the path as the first edge of the traversal + node1id AS startNode, + node2id AS endNode, + array_concat(array(node1id), array(node2id)) AS path + FROM edge + WHERE node1id = 1 + UNION ALL + SELECT -- Concatenate new edge to the path + paths.startNode AS startNode, + node2id AS endNode, + array_concat(path, array(node2id)) AS path + FROM paths + JOIN edge ON paths.endNode = node1id + -- Prevent adding a repeated node to the path. + -- This ensures that no cycles occur. + WHERE array_contains(paths.path, node2id) = false + ) + SELECT startNode, endNode, path + FROM paths + ORDER BY array_size(path), path; + """ + + // do not support subquery containing recursive cte + //qt_q4 """ + //WITH RECURSIVE paths(startNode, endNode, path) AS ( + // SELECT -- Define the path as the first edge of the traversal + // node1id AS startNode, + // node2id AS endNode, + // array_concat(array(node1id), array(node2id)) AS path + // FROM edge + // WHERE startNode = 1 + // UNION ALL + // SELECT -- Concatenate new edge to the path + // paths.startNode AS startNode, + // node2id AS endNode, + // array_concat(path, array(node2id)) AS path + // FROM paths + // JOIN edge ON paths.endNode = node1id + // -- Prevent adding a node that was visited previously by any path. + // -- This ensures that (1) no cycles occur and (2) only nodes that + // -- were not visited by previous (shorter) paths are added to a path. + // WHERE NOT EXISTS ( + // SELECT 1 FROM paths previous_paths + // WHERE array_contains(previous_paths.path, node2id) + // ) + // ) + //SELECT startNode, endNode, path + //FROM paths + //ORDER BY array_size(path), path; + //""" + + //qt_q5 """ + //WITH RECURSIVE paths(startNode, endNode, path, endReached) AS ( + //SELECT -- Define the path as the first edge of the traversal + // node1id AS startNode, + // node2id AS endNode, + // array_concat(array(node1id), array(node2id)) AS path, + // (node2id = 8) AS endReached + // FROM edge + // WHERE startNode = 1 + //UNION ALL + //SELECT -- Concatenate new edge to the path + // paths.startNode AS startNode, + // node2id AS endNode, + // array_concat(path, array(node2id)) AS path, + // max(CASE WHEN node2id = 8 THEN 1 ELSE 0 END) + // OVER (ROWS BETWEEN UNBOUNDED PRECEDING + // AND UNBOUNDED FOLLOWING) AS endReached + // FROM paths + // JOIN edge ON paths.endNode = node1id + // WHERE NOT EXISTS ( + // FROM paths previous_paths + // WHERE array_contains(previous_paths.path, node2id) + // ) + // AND paths.endReached = 0 + //) + //SELECT startNode, endNode, path + //FROM paths + //WHERE endNode = 8 + //ORDER BY array_size(path), path; + //""" +} diff --git a/regression-test/suites/rec_cte_p0/rec_cte_from_mysql_doc/rec_cte_from_mysql_doc.groovy b/regression-test/suites/rec_cte_p0/rec_cte_from_mysql_doc/rec_cte_from_mysql_doc.groovy new file mode 100644 index 00000000000000..cee07dc6612543 --- /dev/null +++ b/regression-test/suites/rec_cte_p0/rec_cte_from_mysql_doc/rec_cte_from_mysql_doc.groovy @@ -0,0 +1,140 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +// https://dev.mysql.com/doc/refman/8.4/en/with.html#common-table-expressions-recursive +suite ("rec_cte_from_mysql_doc") { + qt_q1 """ + WITH RECURSIVE cte AS + ( + SELECT cast(1 as int) AS n, cast('abc' as varchar(65533)) AS str + UNION ALL + SELECT cast(n + 1 as int), cast(CONCAT(str, str) as varchar(65533)) FROM cte WHERE n < 3 + ) + SELECT * FROM cte order by n; + """ + + qt_q2 """ + WITH RECURSIVE cte AS + ( + SELECT cast(1 as int) AS n, cast(1 as int) AS p, cast(-1 as int) AS q + UNION ALL + SELECT cast(n + 1 as int), cast(q * 2 as int), cast(p * 2 as int) FROM cte WHERE n < 5 + ) + SELECT * FROM cte order by n; + """ + + test { + sql """ + WITH RECURSIVE cte (n) AS + ( + SELECT cast(1 as int) + UNION ALL + SELECT cast(n + 1 as int) FROM cte + ) + SELECT n FROM cte order by n; + """ + exception "ABORTED" + } + + // do not support use limit to stop recursion now + //qt_q3 """ + //WITH RECURSIVE cte (n) AS + //( + //SELECT cast(1 as int) + //UNION ALL + //SELECT cast(n + 1 as int) FROM cte LIMIT 10000 + //) + //SELECT n FROM cte order by n; + //""" + + sql "DROP TABLE IF EXISTS sales;" + sql """ + CREATE TABLE sales + ( + c_date date, + c_price double + ) DUPLICATE KEY (c_date) + DISTRIBUTED BY HASH(c_date) BUCKETS 1 PROPERTIES ('replication_num' = '1'); + """ + sql """insert into sales values + ('2017-01-03', 100.0), + ('2017-01-03', 200.0), + ('2017-01-06', 50.0), + ('2017-01-08', 10.0), + ('2017-01-08', 20.0), + ('2017-01-08', 150.0), + ('2017-01-10', 5.0);""" + + qt_q4 """ + WITH RECURSIVE dates (c_date) AS + ( + SELECT MIN(c_date) FROM sales + UNION ALL + SELECT c_date + INTERVAL 1 DAY FROM dates + WHERE c_date + INTERVAL 1 DAY <= (SELECT MAX(c_date) FROM sales) + ) + SELECT * FROM dates order by 1; + """ + + qt_q5 """ + WITH RECURSIVE dates (c_date) AS + ( + SELECT MIN(c_date) FROM sales + UNION ALL + SELECT c_date + INTERVAL 1 DAY FROM dates + WHERE c_date + INTERVAL 1 DAY <= (SELECT MAX(c_date) FROM sales) + ) + SELECT dates.c_date, COALESCE(SUM(c_price), 0) AS sum_price + FROM dates LEFT JOIN sales ON dates.c_date = sales.c_date + GROUP BY dates.c_date + ORDER BY dates.c_date; + """ + + sql "DROP TABLE IF EXISTS employees;" + sql """ + CREATE TABLE employees ( + id INT NOT NULL, + name VARCHAR(100) NOT NULL, + manager_id INT NULL + ) DISTRIBUTED BY HASH(id) BUCKETS 1 PROPERTIES ('replication_num' = '1'); + """ + sql """INSERT INTO employees VALUES + (333, "Yasmina", NULL), + (198, "John", 333), + (692, "Tarek", 333), + (29, "Pedro", 198), + (4610, "Sarah", 29), + (72, "Pierre", 29), + (123, "Adil", 692); + """ + + qt_q6 """ + WITH RECURSIVE employee_paths (id, name, path) AS + ( + SELECT id, name, CAST(id AS varchar(65533)) + FROM employees + WHERE manager_id IS NULL + UNION ALL + SELECT e.id, e.name, cast(CONCAT(ep.path, ',', e.id) as varchar(65533)) + FROM employee_paths AS ep JOIN employees AS e + ON ep.id = e.manager_id + ) + SELECT * FROM employee_paths ORDER BY path; + """ +} diff --git a/regression-test/suites/recursive_cte/column_attribute_null_test.groovy b/regression-test/suites/recursive_cte/column_attribute_null_test.groovy new file mode 100644 index 00000000000000..df10fbe7b3362b --- /dev/null +++ b/regression-test/suites/recursive_cte/column_attribute_null_test.groovy @@ -0,0 +1,94 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("column_attribute_null_test", "rec_cte") { + + String db = context.config.getDbNameByFile(context.file) + def prefix_str = "column_attribute_null_" + def tb_name = prefix_str + "recursive_cte_tb" + def temp_cte_name = prefix_str + "temp_cte_result" + + sql """drop table if exists ${tb_name}""" + sql """CREATE TABLE ${tb_name} ( + EmployeeID INT not null, + Name VARCHAR(50) not NULL, + ManagerID INT not NULL + ) DUPLICATE KEY(EmployeeID) + DISTRIBUTED BY HASH(EmployeeID) + BUCKETS 3 PROPERTIES ('replication_num' = '1');""" + + sql """INSERT INTO ${tb_name} VALUES + (100, 'Manager X', 0), + (101, 'Alice', 100), + (102, 'Bob', 100), + (103, 'Charlie', 101), + (104, 'David', 103), + (105, 'Eve', 101);""" + + + sql """drop view if exists ${temp_cte_name}""" + sql """ + CREATE VIEW ${temp_cte_name} AS + WITH recursive SubordinateHierarchy ( + EmployeeID, + EmployeeName, + ManagerID, + Level, + Comments + ) + AS + ( + SELECT + EmployeeID, + Name AS EmployeeName, + ManagerID, + cast(0 as bigint) AS Level, + CAST(NULL AS VARCHAR(100)) AS Comments + FROM + ${tb_name} + WHERE + EmployeeID = 100 + + UNION ALL + + SELECT + E.EmployeeID, + E.Name AS EmployeeName, + E.ManagerID, + cast(H.Level + 1 as bigint) AS Level, + cast(H.EmployeeName as VARCHAR(100)) + FROM + ${tb_name} AS E + INNER JOIN + SubordinateHierarchy AS H + ON E.ManagerID = H.EmployeeID + ) + SELECT + * + FROM + SubordinateHierarchy + ORDER BY + Level, EmployeeID; + """ + + def desc_res = sql """desc ${temp_cte_name}""" + logger.info("desc_res: " + desc_res) + for (int i = 0; i < desc_res.size(); i++) { + assertTrue(desc_res[i][4] == null) + } + +} diff --git a/regression-test/suites/recursive_cte/complex_rec_cte_test.groovy b/regression-test/suites/recursive_cte/complex_rec_cte_test.groovy new file mode 100644 index 00000000000000..575ba6f80d1cc0 --- /dev/null +++ b/regression-test/suites/recursive_cte/complex_rec_cte_test.groovy @@ -0,0 +1,478 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("complex_rec_cte_test", "rec_cte") { + + sql """ + CREATE TABLE IF NOT EXISTS departments ( + id INT, + parent_id INT, + dept_name VARCHAR(50), + budget DECIMAL(18, 2) + ) + DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 4 + PROPERTIES ("replication_num" = "1");""" + + sql """ + CREATE TABLE IF NOT EXISTS connections ( + source_id INT, + target_id INT, + status VARCHAR(20) + ) + DUPLICATE KEY(source_id) + DISTRIBUTED BY HASH(source_id) BUCKETS 4 + PROPERTIES ("replication_num" = "1");""" + + sql """ + CREATE TABLE IF NOT EXISTS nodes ( + id INT, + type VARCHAR(20) + ) + DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES ("replication_num" = "1");""" + + sql """ + CREATE TABLE IF NOT EXISTS raw_bom ( + part_id INT, + sub_part_id INT, + qty DOUBLE + ) + DUPLICATE KEY(part_id) + DISTRIBUTED BY HASH(part_id) BUCKETS 4 + PROPERTIES ("replication_num" = "1");""" + + sql """ + INSERT INTO departments VALUES + (1, NULL, '总部', 10000.00), + (10, 1, '研发部', 5000.00), + (11, 1, '市场部', 4000.00), + (101, 10, '后端开发', 2000.00), + (102, 10, '前端开发', 1500.00), + (111, 11, '线上推广', 2000.00);""" + + sql """ + INSERT INTO nodes VALUES (1, 'seed'), (2, 'normal'), (3, 'normal'), (4, 'normal');""" + + sql """ + INSERT INTO connections VALUES + (1, 2, 'active'), (2, 3, 'active'), (3, 4, 'active'), + (4, 1, 'active'), (2, 4, 'active'), (1, 3, 'inactive');""" + + sql """ + INSERT INTO raw_bom VALUES + (1001, 2001, 2.0), (1001, 2002, 5.0), + (2001, 3001, 1.5), (2001, 3002, 3.0), + (2002, 4001, 1.0);""" + + sql """ + WITH RECURSIVE + dept_tree(id, parent_id, dept_name, path, budget) AS ( + SELECT + id, + parent_id, + dept_name, + CAST(dept_name AS CHAR(200)) AS path, + budget + FROM departments + WHERE parent_id IS NULL + UNION ALL + SELECT + t.id, + t.parent_id, + t.dept_name, + CAST(CONCAT(c.path, '->', t.dept_name) AS CHAR(200)), + t.budget + FROM departments t + JOIN dept_tree c ON t.parent_id = c.id + WHERE c.id < 1000 + ), + budget_summary(dept_id, total_budget, lvl) AS ( + SELECT + id, + CAST(budget AS DECIMAL(18,2)), + CAST(1 AS INT) + FROM dept_tree + WHERE parent_id IS NULL + UNION ALL + SELECT + curr.id, + CAST(curr.budget + prev.total_budget AS DECIMAL(18,2)), + CAST(prev.lvl + 1 AS INT) + FROM departments curr + JOIN budget_summary prev ON curr.parent_id = prev.dept_id + WHERE prev.lvl < 10 + ) + SELECT + dept_id, + MAX(total_budget) as max_b, + RANK() OVER(ORDER BY MAX(total_budget) DESC) as rk + FROM budget_summary + GROUP BY dept_id + HAVING max_b > 5000 + ORDER BY rk + LIMIT 100; + """ + + sql """ + WITH RECURSIVE path_finder(start_node, end_node, hops, visited_nodes) AS ( + SELECT + source_id, + target_id, + CAST(1 AS BIGINT), + CAST(source_id AS CHAR(500)) + FROM connections + WHERE source_id IN (SELECT id FROM nodes WHERE type = 'seed') + + UNION ALL + + SELECT DISTINCT + p.start_node, + c.target_id, + CAST(p.hops + 1 AS BIGINT), + CAST(CONCAT(p.visited_nodes, ',', c.target_id) AS CHAR(500)) + FROM path_finder p + JOIN ( + SELECT a.source_id, a.target_id + FROM connections a + WHERE a.status = 'active' + ) c ON p.end_node = c.source_id + WHERE p.hops < 5 + AND p.visited_nodes NOT LIKE CONCAT('%', CAST(c.target_id AS CHAR), '%') + ) + SELECT + start_node, + end_node, + MIN(hops) as min_hops + FROM path_finder + GROUP BY start_node, end_node + ORDER BY min_hops ASC;""" + + sql """ + WITH RECURSIVE bom_recursive(part_id, sub_part_id, quantity, depth) AS ( + SELECT + part_id, + sub_part_id, + CAST(qty AS DOUBLE), + CAST(0 AS INT) + FROM ( + SELECT *, ROW_NUMBER() OVER(PARTITION BY part_id ORDER BY qty DESC) as rn + FROM raw_bom + ) t + WHERE rn = 1 + + UNION ALL + + SELECT + r.part_id, + b.sub_part_id, + CAST(r.quantity * b.qty AS DOUBLE), + CAST(r.depth + 1 AS INT) + FROM bom_recursive r + JOIN raw_bom b ON r.sub_part_id = b.part_id + WHERE r.depth < 20 + ) + SELECT + part_id, + SUM(quantity) as total_qty, + COUNT(DISTINCT sub_part_id) as unique_subs + FROM bom_recursive + WHERE depth > 0 + GROUP BY part_id + HAVING total_qty > 0 + ORDER BY total_qty DESC;""" + + sql """ + WITH RECURSIVE outer_cte(id, val, lvl) AS ( + SELECT + cast(id as bigint), + cast(val as bigint), + CAST(0 AS bigint) + FROM (WITH RECURSIVE inner_recursive(n) AS ( + SELECT CAST(1 AS INT) + UNION ALL + SELECT CAST(n + 1 AS INT) FROM inner_recursive WHERE n < 3 + ) + SELECT n as id, n * 10 as val FROM inner_recursive) as t + + UNION ALL + + SELECT + CAST(o.id + 100 AS bigint), + CAST(o.val + 10 AS bigint), + CAST(o.lvl + 1 AS bigINT) + FROM outer_cte o + WHERE o.lvl < 2 + ) + SELECT * FROM outer_cte ORDER BY id;""" + + sql """ + WITH RECURSIVE multi_branch_cte(node_id, label, depth) AS ( + -- branch1:no recursive Anchor 1 + SELECT CAST(id AS INT), CAST('Type_A' AS CHAR(10)), CAST(0 AS INT) + FROM nodes WHERE type = 'seed' + UNION + -- branch 2:no recursive Anchor 2 + SELECT CAST(id AS INT), CAST('Type_B' AS CHAR(10)), CAST(0 AS INT) + FROM nodes WHERE type = 'normal' AND id < 2 + + UNION ALL + + -- branch3:recursive part + SELECT + CAST(c.target_id AS INT), + CAST(m.label AS CHAR(10)), + CAST(m.depth + 1 AS INT) + FROM multi_branch_cte m + JOIN connections c ON m.node_id = c.source_id + WHERE m.depth < 5 + ) + SELECT + label, + depth, + COUNT(DISTINCT node_id) as node_count + FROM multi_branch_cte + GROUP BY label, depth + HAVING node_count > 0 + ORDER BY label, depth; + """ + + sql """ + WITH RECURSIVE complex_logic(curr_id, total_score, step_path) AS ( + SELECT + id, + CAST(budget AS DOUBLE), + CAST(dept_name AS CHAR(200)) + FROM ( + SELECT *, ROW_NUMBER() OVER(ORDER BY budget DESC) as rank_id + FROM departments + ) d WHERE rank_id = 1 + + UNION ALL + + SELECT + CAST(t.id AS INT), + CAST(c.total_score + t.budget AS DOUBLE), + CAST(CONCAT(c.step_path, '->', t.dept_name) AS CHAR(200)) + FROM departments t + INNER JOIN complex_logic c ON t.parent_id = c.curr_id + WHERE c.total_score < 50000 + AND t.dept_name NOT IN (SELECT dept_name FROM departments WHERE budget < 100) + ) + SELECT + curr_id, + total_score, + step_path, + DENSE_RANK() OVER(ORDER BY total_score DESC) as score_rank + FROM complex_logic + WHERE step_path LIKE '%研发%' + ORDER BY score_rank + LIMIT 10; + """ + + sql """ + WITH RECURSIVE + num_seq(n) AS ( + SELECT CAST(1 AS INT) + UNION ALL + SELECT CAST(n + 1 AS INT) FROM num_seq WHERE n < 5 + ), + char_path(lvl, str) AS ( + SELECT CAST(1 AS INT), CAST('A' AS CHAR(100)) + UNION ALL + SELECT CAST(lvl + 1 AS INT), CAST(CONCAT(str, '->', 'B') AS CHAR(100)) + FROM char_path WHERE lvl < 5 + ) + SELECT a.n, b.str + FROM num_seq a + JOIN char_path b ON a.n = b.lvl + ORDER BY a.n; + """ + + sql """ + WITH RECURSIVE complex_join_cte(id, p_id, info, score) AS ( + -- Anchor + SELECT id, parent_id, CAST(dept_name AS CHAR(100)), CAST(budget AS DOUBLE) + FROM departments WHERE parent_id IS NULL + UNION ALL + -- Recursive part:Join another table to logical judge + SELECT + t.id, t.parent_id, + CAST(CONCAT(c.info, '/', t.dept_name) AS CHAR(100)), + CAST(CASE WHEN t.budget > 1000 THEN c.score + t.budget ELSE c.score END AS DOUBLE) + FROM departments t + JOIN complex_join_cte c ON t.parent_id = c.id + LEFT JOIN nodes n ON t.id = n.id + WHERE c.score < 100000 AND (n.type IS NULL OR n.type != 'blocked') + ) + SELECT * FROM complex_join_cte WHERE score > 5000; + """ + + sql """ + WITH RECURSIVE sub_cte(id) AS ( + SELECT CAST(101 AS INT) + UNION ALL + SELECT CAST(parent_id AS INT) FROM departments d + JOIN sub_cte s ON d.id = s.id + WHERE d.parent_id IS NOT NULL + ) + SELECT * FROM departments + WHERE id IN (SELECT id FROM sub_cte) + OR parent_id IN (SELECT id FROM sub_cte); + """ + + sql """ + WITH RECURSIVE grouped_cte(grp_key, total_val, depth) AS ( + SELECT + cast(parent_id as int), + cast(SUM(budget) as double), + CAST(0 AS INT) + FROM departments + GROUP BY parent_id + UNION ALL + SELECT + CAST(d.id AS INT), + CAST(SUM(d.budget + g.total_val) AS DOUBLE), + CAST(g.depth + 1 AS INT) + FROM departments d + JOIN grouped_cte g ON d.parent_id = g.grp_key + WHERE g.depth < 3 + GROUP BY d.id, g.depth + ) + SELECT grp_key, MAX(total_val) FROM grouped_cte GROUP BY grp_key; + """ + + test { + sql """ + WITH RECURSIVE multi_union(id, tag) AS ( + -- two Anchor branch + SELECT CAST(1 AS INT), CAST('start1' AS CHAR(50)) + UNION + SELECT CAST(2 AS INT), CAST('start2' AS CHAR(50)) + UNION ALL + -- two recursive branch + SELECT CAST(id + 2 AS INT), CAST('step_a' AS CHAR(50)) FROM multi_union WHERE id < 5 + UNION ALL + SELECT CAST(id + 4 AS INT), CAST('step_b' AS CHAR(50)) FROM multi_union WHERE id < 5 + ) + SELECT tag, COUNT(*) FROM multi_union GROUP BY tag; + """ + exception """recursive reference to query multi_union must not appear within its non-recursive term""" + } + + sql """ + WITH RECURSIVE tree_data(id, p_id, val) AS ( + SELECT id, parent_id, CAST(budget AS DOUBLE) FROM departments WHERE parent_id IS NULL + UNION ALL + SELECT d.id, d.parent_id, CAST(d.budget AS DOUBLE) + FROM departments d JOIN tree_data t ON d.parent_id = t.id + ) + SELECT + *, + SUM(val) OVER(PARTITION BY p_id ORDER BY val DESC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as running_total, + LAG(val) OVER(ORDER BY id) as prev_val + FROM tree_data; + """ + + sql """ + WITH RECURSIVE complex_predicate_cte(id, category) AS ( + SELECT id, CAST('ROOT' AS CHAR(20)) FROM departments WHERE parent_id IS NULL + UNION ALL + SELECT + d.id, + CAST( + CASE + WHEN d.budget > 5000 THEN 'High' + WHEN d.id IN (SELECT source_id FROM connections) THEN 'Connected' + ELSE 'Normal' + END AS CHAR(20) + ) + FROM departments d + JOIN complex_predicate_cte c ON d.parent_id = c.id + WHERE c.category != 'End' + ) + SELECT category, COUNT(*) FROM complex_predicate_cte GROUP BY category; + """ + + sql """ + WITH RECURSIVE + cte_1(n) AS ( + SELECT CAST(1 AS INT) + UNION ALL + SELECT CAST(n + 1 AS INT) FROM cte_1 WHERE n < 5 + ), + cte_2(n) AS ( + SELECT CAST(1 AS INT) + UNION ALL + SELECT CAST(n + 1 AS INT) FROM cte_2 WHERE n < 5 + ), + cte_3(n) AS ( + SELECT CAST(1 AS INT) + UNION ALL + SELECT CAST(n + 1 AS INT) FROM cte_3 WHERE n < 5 + ), + cte_4(n) AS ( + SELECT CAST(1 AS INT) + UNION ALL + SELECT CAST(n + 1 AS INT) FROM cte_4 WHERE n < 5 + ), + cte_5(n) AS ( + SELECT CAST(1 AS INT) + UNION ALL + SELECT CAST(n + 1 AS INT) FROM cte_5 WHERE n < 5 + ), + cte_6(n) AS ( + SELECT CAST(1 AS INT) + UNION ALL + SELECT CAST(n + 1 AS INT) FROM cte_6 WHERE n < 5 + ), + cte_7(n) AS ( + SELECT CAST(1 AS INT) + UNION ALL + SELECT CAST(n + 1 AS INT) FROM cte_7 WHERE n < 5 + ), + cte_8(n) AS ( + SELECT CAST(1 AS INT) + UNION ALL + SELECT CAST(n + 1 AS INT) FROM cte_8 WHERE n < 5 + ), + cte_9(n) AS ( + SELECT CAST(1 AS INT) + UNION ALL + SELECT CAST(n + 1 AS INT) FROM cte_9 WHERE n < 5 + ), + cte_10(n) AS ( + SELECT CAST(1 AS INT) + UNION ALL + SELECT CAST(n + 1 AS INT) FROM cte_10 WHERE n < 5 + ) + SELECT * + FROM cte_1 + JOIN cte_2 ON cte_1.n = cte_2.n + JOIN cte_3 ON cte_1.n = cte_3.n + JOIN cte_4 ON cte_1.n = cte_4.n + JOIN cte_5 ON cte_1.n = cte_5.n + JOIN cte_6 ON cte_1.n = cte_6.n + JOIN cte_7 ON cte_1.n = cte_7.n + JOIN cte_8 ON cte_1.n = cte_8.n + JOIN cte_9 ON cte_1.n = cte_9.n + JOIN cte_10 ON cte_1.n = cte_10.n; + """ + + +} diff --git a/regression-test/suites/recursive_cte/create_and_insert_select_test.groovy b/regression-test/suites/recursive_cte/create_and_insert_select_test.groovy new file mode 100644 index 00000000000000..410290dfd8d679 --- /dev/null +++ b/regression-test/suites/recursive_cte/create_and_insert_select_test.groovy @@ -0,0 +1,140 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("create_and_insert_select_test", "rec_cte") { + + String db = context.config.getDbNameByFile(context.file) + def prefix_str = "create_and_insert_select_" + def tb_name = prefix_str + "recursive_cte_tb" + def table_cte_name = prefix_str + "temp_cte_table" + + sql """drop table if exists ${tb_name}""" + sql """CREATE TABLE ${tb_name} ( + EmployeeID INT not null, + Name VARCHAR(50) not NULL, + ManagerID INT not NULL + ) DUPLICATE KEY(EmployeeID) + DISTRIBUTED BY HASH(EmployeeID) + BUCKETS 3 PROPERTIES ('replication_num' = '1');""" + + sql """INSERT INTO ${tb_name} VALUES + (100, 'Manager X', 0), + (101, 'Alice', 100), + (102, 'Bob', 100), + (103, 'Charlie', 101), + (104, 'David', 103), + (105, 'Eve', 101);""" + + + sql """drop table if exists ${table_cte_name}""" + sql """ + CREATE table ${table_cte_name} + PROPERTIES ('replication_num' = '1') + AS + WITH recursive SubordinateHierarchy ( + EmployeeID, + EmployeeName, + ManagerID, + Level, + Comments + ) + AS + ( + SELECT + EmployeeID, + Name AS EmployeeName, + ManagerID, + cast(0 as bigint) AS Level, + CAST(NULL AS VARCHAR(100)) AS Comments + FROM + ${tb_name} + WHERE + EmployeeID = 100 + + UNION ALL + + SELECT + E.EmployeeID, + E.Name AS EmployeeName, + E.ManagerID, + cast(H.Level + 1 as bigint) AS Level, + cast(H.EmployeeName as VARCHAR(100)) + FROM + ${tb_name} AS E + INNER JOIN + SubordinateHierarchy AS H + ON E.ManagerID = H.EmployeeID + ) + SELECT + * + FROM + SubordinateHierarchy + ORDER BY + Level, EmployeeID; + """ + + def tb_res1 = sql """select * from ${table_cte_name}""" + assertTrue(tb_res1.size() > 0) + + sql """ + insert into ${table_cte_name} + WITH recursive SubordinateHierarchy ( + EmployeeID, + EmployeeName, + ManagerID, + Level, + Comments + ) + AS + ( + SELECT + EmployeeID, + Name AS EmployeeName, + ManagerID, + cast(0 as bigint) AS Level, + CAST(NULL AS VARCHAR(100)) AS Comments + FROM + ${tb_name} + WHERE + EmployeeID = 100 + + UNION ALL + + SELECT + E.EmployeeID, + E.Name AS EmployeeName, + E.ManagerID, + cast(H.Level + 1 as bigint) AS Level, + cast(H.EmployeeName as VARCHAR(100)) + FROM + ${tb_name} AS E + INNER JOIN + SubordinateHierarchy AS H + ON E.ManagerID = H.EmployeeID + ) + SELECT + * + FROM + SubordinateHierarchy + ORDER BY + Level, EmployeeID; + """ + + def tb_res2 = sql """select * from ${table_cte_name}""" + assertTrue(tb_res1.size() * 2 == tb_res2.size()) + +} diff --git a/regression-test/suites/recursive_cte/mtmv_and_cte_test.groovy b/regression-test/suites/recursive_cte/mtmv_and_cte_test.groovy new file mode 100644 index 00000000000000..9baa20842cc9b9 --- /dev/null +++ b/regression-test/suites/recursive_cte/mtmv_and_cte_test.groovy @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("mtmv_and_cte_test", "rec_cte") { + + String db = context.config.getDbNameByFile(context.file) + def prefix_str = "mtmv_and_cte_" + def tb_name = prefix_str + "recursive_cte_tb" + + sql """use ${db}""" + sql """drop table if exists ${tb_name}""" + sql """CREATE TABLE ${tb_name} ( + EmployeeID INT not null, + Name VARCHAR(50) not NULL, + ManagerID INT not NULL + ) DUPLICATE KEY(EmployeeID) + DISTRIBUTED BY HASH(EmployeeID) + BUCKETS 3 PROPERTIES ('replication_num' = '1');""" + + sql """INSERT INTO ${tb_name} VALUES + (100, 'Manager X', 0), + (101, 'Alice', 100), + (102, 'Bob', 100), + (103, 'Charlie', 101), + (104, 'David', 103), + (105, 'Eve', 101);""" + + def sql_str = """WITH recursive SubordinateHierarchy ( + EmployeeID, + EmployeeName, + ManagerID, + Level, + Comments + ) + AS + ( + SELECT + EmployeeID, + Name AS EmployeeName, + ManagerID, + cast(0 as bigint) AS Level, + CAST(NULL AS VARCHAR(100)) AS Comments + FROM + ${tb_name} + WHERE + EmployeeID = 100 + + UNION ALL + + SELECT + E.EmployeeID, + E.Name AS EmployeeName, + E.ManagerID, + cast(H.Level + 1 as bigint) AS Level, + cast(H.EmployeeName as VARCHAR(100)) + FROM + ${tb_name} AS E + INNER JOIN + SubordinateHierarchy AS H + ON E.ManagerID = H.EmployeeID + ) + SELECT + * + FROM + SubordinateHierarchy + ORDER BY + Level, EmployeeID;""" + def mtmv_name = prefix_str + "cte_mtmv" + + async_create_mv(db, sql_str, mtmv_name) + mv_rewrite_fail(sql_str, mtmv_name) + + +} diff --git a/regression-test/suites/recursive_cte/mv_and_cte_test.groovy b/regression-test/suites/recursive_cte/mv_and_cte_test.groovy new file mode 100644 index 00000000000000..3f869ea763791c --- /dev/null +++ b/regression-test/suites/recursive_cte/mv_and_cte_test.groovy @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("mv_and_cte_test", "rec_cte") { + + String db = context.config.getDbNameByFile(context.file) + def prefix_str = "mv_and_cte_" + def tb_name = prefix_str + "recursive_cte_tb" + + + sql """drop table if exists ${tb_name}""" + sql """CREATE TABLE ${tb_name} ( + EmployeeID INT not null, + Name VARCHAR(50) not NULL, + ManagerID INT not NULL + ) DUPLICATE KEY(EmployeeID) + DISTRIBUTED BY HASH(EmployeeID) + BUCKETS 3 PROPERTIES ('replication_num' = '1');""" + + sql """INSERT INTO ${tb_name} VALUES + (100, 'Manager X', 0), + (101, 'Alice', 100), + (102, 'Bob', 100), + (103, 'Charlie', 101), + (104, 'David', 103), + (105, 'Eve', 101);""" + + def sql_str = """WITH recursive SubordinateHierarchy ( + EmployeeID, + EmployeeName, + ManagerID, + Level, + Comments + ) + AS + ( + SELECT + EmployeeID, + Name AS EmployeeName, + ManagerID, + cast(0 as bigint) AS Level, + CAST(NULL AS VARCHAR(100)) AS Comments + FROM + ${tb_name} + WHERE + EmployeeID = 100 + + UNION ALL + + SELECT + E.EmployeeID, + E.Name AS EmployeeName, + E.ManagerID, + cast(H.Level + 1 as bigint) AS Level, + cast(H.EmployeeName as VARCHAR(100)) + FROM + ${tb_name} AS E + INNER JOIN + SubordinateHierarchy AS H + ON E.ManagerID = H.EmployeeID + ) + SELECT + * + FROM + SubordinateHierarchy + ORDER BY + Level, EmployeeID;""" + def mv_name = prefix_str + "cte_mv" + test { + sql "CREATE MATERIALIZED VIEW ${mv_name} AS " + sql_str + exception "LogicalCTEAnchor is not supported in sync materialized view" + } + +} diff --git a/regression-test/suites/recursive_cte/no_data_test.groovy b/regression-test/suites/recursive_cte/no_data_test.groovy new file mode 100644 index 00000000000000..cabdd0cc5626a3 --- /dev/null +++ b/regression-test/suites/recursive_cte/no_data_test.groovy @@ -0,0 +1,77 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("no_data_test", "rec_cte") { + + String db = context.config.getDbNameByFile(context.file) + def prefix_str = "no_data_" + def tb_name = prefix_str + "recursive_cte_tb" + + sql """drop table if exists ${tb_name}""" + sql """CREATE TABLE ${tb_name} ( + EmployeeID INT not null, + Name VARCHAR(50) not NULL, + ManagerID INT not NULL + ) DUPLICATE KEY(EmployeeID) + DISTRIBUTED BY HASH(EmployeeID) + BUCKETS 3 PROPERTIES ('replication_num' = '1');""" + + sql """ + WITH recursive SubordinateHierarchy ( + EmployeeID, + EmployeeName, + ManagerID, + Level, + Comments + ) + AS + ( + SELECT + EmployeeID, + Name AS EmployeeName, + ManagerID, + cast(0 as bigint) AS Level, + CAST(NULL AS VARCHAR(100)) AS Comments + FROM + ${tb_name} + WHERE + EmployeeID = 100 + + UNION ALL + + SELECT + E.EmployeeID, + E.Name AS EmployeeName, + E.ManagerID, + cast(H.Level + 1 as bigint) AS Level, + cast(H.EmployeeName as VARCHAR(100)) + FROM + ${tb_name} AS E + INNER JOIN + SubordinateHierarchy AS H + ON E.ManagerID = H.EmployeeID + ) + SELECT + * + FROM + SubordinateHierarchy + ORDER BY + Level, EmployeeID; + """ + + +} diff --git a/regression-test/suites/recursive_cte/recursive_100_number_test.groovy b/regression-test/suites/recursive_cte/recursive_100_number_test.groovy new file mode 100644 index 00000000000000..28a81e15f36b1a --- /dev/null +++ b/regression-test/suites/recursive_cte/recursive_100_number_test.groovy @@ -0,0 +1,106 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("recursive_100_number_test", "rec_cte") { + + def level = 100 + sql """ + WITH recursive RecursiveCounter ( + N, + Depth + ) + AS + ( + SELECT + cast(1 as bigint) AS N, + cast(1 as bigint) AS Depth + UNION ALL + SELECT + cast(N + 1 as bigint) AS N, + cast(Depth + 1 as bigint) AS Depth + FROM + RecursiveCounter + WHERE + Depth < ${level} + ) + SELECT + N, + Depth + FROM + RecursiveCounter + ORDER BY + Depth;""" + + level = 101 + test { + sql """WITH recursive RecursiveCounter ( + N, + Depth + ) + AS + ( + SELECT + cast(1 as bigint) AS N, + cast(1 as bigint) AS Depth + UNION ALL + SELECT + cast(N + 1 as bigint) AS N, + cast(Depth + 1 as bigint) AS Depth + FROM + RecursiveCounter + WHERE + Depth < ${level} + ) + SELECT + N, + Depth + FROM + RecursiveCounter + ORDER BY + Depth;""" + exception "reach cte_max_recursion_depth 100" + } + + sql "set cte_max_recursion_depth=101" + sql """ + WITH recursive RecursiveCounter ( + N, + Depth + ) + AS + ( + SELECT + cast(1 as bigint) AS N, + cast(1 as bigint) AS Depth + UNION ALL + SELECT + cast(N + 1 as bigint) AS N, + cast(Depth + 1 as bigint) AS Depth + FROM + RecursiveCounter + WHERE + Depth < ${level} + ) + SELECT + N, + Depth + FROM + RecursiveCounter + ORDER BY + Depth;""" + +} diff --git a/regression-test/suites/recursive_cte/recursive_cte_exception_test.groovy b/regression-test/suites/recursive_cte/recursive_cte_exception_test.groovy new file mode 100644 index 00000000000000..dfa7ed5d462800 --- /dev/null +++ b/regression-test/suites/recursive_cte/recursive_cte_exception_test.groovy @@ -0,0 +1,216 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("exception_test", "rec_cte") { + String db = context.config.getDbNameByFile(context.file) + def prefix_str = "exception_" + def tb_name = prefix_str + "recursive_cte_tb" + + sql """drop table if exists ${tb_name}""" + sql """CREATE TABLE ${tb_name} ( + EmployeeID INT not null, + Name VARCHAR(50) NOT NULL, + ManagerID INT NULL + ) DUPLICATE KEY(EmployeeID) + DISTRIBUTED BY HASH(EmployeeID) + BUCKETS 3 PROPERTIES ('replication_num' = '1');""" + + sql """INSERT INTO ${tb_name} VALUES + (100, 'Manager X', 1), + (101, 'Alice', 100), + (102, 'Bob', 100), + (103, 'Charlie', 101), + (104, 'David', 103), + (105, 'Eve', 101);""" + + test { + sql """ + WITH recursive SubordinateHierarchy ( + EmployeeID, + EmployeeID, + EmployeeName, + ManagerID, + Level, + Comments + ) + AS + ( + SELECT + EmployeeID, + EmployeeID, + Name AS EmployeeName, + ManagerID, + cast(0 as bigint) AS Level, + CAST(NULL AS VARCHAR(100)) AS Comments + FROM + ${tb_name} + WHERE + EmployeeID = 100 + + UNION ALL + + SELECT + E.EmployeeID, + E.EmployeeID, + E.Name AS EmployeeName, + E.ManagerID, + cast(H.Level + 1 as bigint) AS Level, + cast(H.EmployeeName as VARCHAR(100)) + FROM + ${tb_name} AS E + INNER JOIN + SubordinateHierarchy AS H + ON E.ManagerID = H.EmployeeID + ) + SELECT + * + FROM + SubordinateHierarchy + ORDER BY + Level, EmployeeID; + """ + exception "is ambiguous" + } + + // drop recursive part, become normal cte, no recursive cte + sql """ + WITH recursive SubordinateHierarchy ( + EmployeeID, + EmployeeName, + ManagerID, + Level, + Comments + ) + AS + ( + SELECT + EmployeeID, + Name AS EmployeeName, + ManagerID, + cast(0 as bigint) AS Level, + CAST(NULL AS VARCHAR(100)) AS Comments + FROM + ${tb_name} + WHERE + EmployeeID = 100 + ) + SELECT + * + FROM + SubordinateHierarchy + ORDER BY + Level, EmployeeID; + """ + + + test { + sql """ + WITH recursive SubordinateHierarchy ( + EmployeeID, + EmployeeName, + ManagerID, + Level, + Comments + ) + AS + ( + + SELECT + E.EmployeeID, + E.Name AS EmployeeName, + E.ManagerID, + cast(H.Level + 1 as bigint) AS Level, + cast(H.EmployeeName as VARCHAR(100)) + FROM + ${tb_name} AS E + INNER JOIN + SubordinateHierarchy AS H + ON E.ManagerID = H.EmployeeID + ) + SELECT + * + FROM + SubordinateHierarchy + ORDER BY + Level, EmployeeID; + """ + exception "recursive cte must be union" + } + + test { + sql """ + WITH + StartingEmployees AS ( + SELECT + EmployeeID, + Name, + ManagerID + FROM + ${tb_name} + WHERE + Name LIKE 'A%' + ), + SubordinateHierarchy ( + EmployeeID, + EmployeeName, + ManagerID, + Level, + Comments + ) + AS + ( + SELECT + EmployeeID, + Name AS EmployeeName, + ManagerID, + cast(0 as bigint) AS Level, + CAST(NULL AS VARCHAR(100)) AS Comments + FROM + ${tb_name} + WHERE + EmployeeID = 100 + + UNION ALL + + SELECT + E.EmployeeID, + E.Name AS EmployeeName, + E.ManagerID, + cast(H.Level + 1 as bigint) AS Level, + cast(H.EmployeeName as VARCHAR(100)) + FROM + ${tb_name} AS E + INNER JOIN + SubordinateHierarchy AS H + ON E.ManagerID = H.EmployeeID + ) + SELECT + EmployeeID, + EmployeeName, + ManagerID, + Level, + Comments + FROM + StartingEmployees + ORDER BY + Path DESC; + """ + exception "does not exist" + } + + +} diff --git a/regression-test/suites/recursive_cte/same_data_type_recursive_test.groovy b/regression-test/suites/recursive_cte/same_data_type_recursive_test.groovy new file mode 100644 index 00000000000000..b4047650abb533 --- /dev/null +++ b/regression-test/suites/recursive_cte/same_data_type_recursive_test.groovy @@ -0,0 +1,430 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("same_data_type_recursive_test", "rec_cte") { + String db = context.config.getDbNameByFile(context.file) + def prefix_str = "data_null_not_recursive_" + def tb_name = prefix_str + "tb" + + // No data satisfies the recursion criteria + sql """drop table if exists ${tb_name}""" + sql """CREATE TABLE ${tb_name} ( + EmployeeID INT not null, + Name VARCHAR(50) not NULL, + ManagerID INT not NULL + ) DUPLICATE KEY(EmployeeID) + DISTRIBUTED BY HASH(EmployeeID) + BUCKETS 3 PROPERTIES ('replication_num' = '1');""" + + sql """INSERT INTO ${tb_name} VALUES + (101, 'Alice', 999), + (102, 'Bob', 999), + (103, 'Charlie', 999);""" + + sql """ + WITH recursive HierarchyTraversal ( + EmployeeID, + EmployeeName, + ManagerID, + TraversalLevel + ) + AS + ( + SELECT + EmployeeID, + Name AS EmployeeName, + ManagerID, + cast(1 as bigint) AS TraversalLevel + FROM + ${tb_name} + WHERE + ManagerID IS NULL + + UNION ALL + + SELECT + E.EmployeeID, + E.Name AS EmployeeName, + E.ManagerID, + cast(R.TraversalLevel + 1 as bigint) AS TraversalLevel + FROM + ${tb_name} AS E + INNER JOIN + HierarchyTraversal AS R + ON E.ManagerID = R.EmployeeID + ) + SELECT + * + FROM + HierarchyTraversal;""" + + + + sql """drop table if exists ${tb_name}""" + sql """CREATE TABLE ${tb_name} ( + EmployeeID INT null, + Name VARCHAR(50) NULL, + ManagerID INT NULL + ) DUPLICATE KEY(EmployeeID) + DISTRIBUTED BY HASH(EmployeeID) + BUCKETS 3 PROPERTIES ('replication_num' = '1');""" + sql """INSERT INTO ${tb_name} VALUES + (100, 'CEO Root', NULL), + (101, 'Manager Alpha', 100), + (102, 'Manager Beta', 100), + (103, 'Manager Gamma', 100);""" + + sql """ + WITH recursive ShallowHierarchy ( + EmployeeID, + EmployeeName, + ManagerID, + TraversalLevel + ) + AS + ( + SELECT + EmployeeID, + Name AS EmployeeName, + ManagerID, + cast(1 as bigint) AS TraversalLevel + FROM + ${tb_name} + WHERE + EmployeeID = 100 + + UNION ALL + + SELECT + E.EmployeeID, + E.Name AS EmployeeName, + E.ManagerID, + cast(R.TraversalLevel + 1 as bigint) AS TraversalLevel + FROM + ${tb_name} AS E + INNER JOIN + ShallowHierarchy AS R + ON E.ManagerID = R.EmployeeID + ) + SELECT + * + FROM + ShallowHierarchy + WHERE + TraversalLevel > 1 + ORDER BY + EmployeeID;""" + + sql """drop table if exists ${tb_name}""" + sql """CREATE TABLE ${tb_name} ( + EmployeeID INT null, + Name VARCHAR(50) NULL, + ManagerID INT NULL + ) DUPLICATE KEY(EmployeeID) + DISTRIBUTED BY HASH(EmployeeID) + BUCKETS 3 PROPERTIES ('replication_num' = '1');""" + sql """INSERT INTO ${tb_name} VALUES + (100, 'Project A', NULL), + (101, 'Project B', NULL), + (102, 'Project C', NULL);""" + + sql """ + WITH recursive ExhaustiveAnchor ( + EmployeeID, + EmployeeName, + ManagerID, + TraversalLevel + ) + AS + ( + SELECT + EmployeeID, + Name AS EmployeeName, + ManagerID, + cast(1 as bigint) AS TraversalLevel + FROM + ${tb_name} + WHERE + ManagerID IS NULL + + UNION ALL + + SELECT + E.EmployeeID, + E.Name AS EmployeeName, + E.ManagerID, + cast(R.TraversalLevel + 1 as bigint) AS TraversalLevel + FROM + ${tb_name} AS E + INNER JOIN + ExhaustiveAnchor AS R + ON E.ManagerID = R.EmployeeID + ) + SELECT + * + FROM + ExhaustiveAnchor + ORDER BY + EmployeeID;""" + + + // Some data satisfies the recursion criteria, while others do not + sql """drop table if exists ${tb_name}""" + sql """CREATE TABLE ${tb_name} ( + EmployeeID INT null, + Name VARCHAR(50) NULL, + ManagerID INT NULL + ) DUPLICATE KEY(EmployeeID) + DISTRIBUTED BY HASH(EmployeeID) + BUCKETS 3 PROPERTIES ('replication_num' = '1');""" + + sql """INSERT INTO ${tb_name} VALUES + (100, 'CEO Alpha', NULL), + (101, 'Manager X', 100), + (102, 'Worker Y', 101), + (200, 'Isolated Lead', 999), + (201, 'Isolated Worker', 200);""" + + sql """ + WITH recursive MixedHierarchyTraversal ( + EmployeeID, + EmployeeName, + ManagerID, + TraversalLevel + ) + AS + ( + SELECT + EmployeeID, + Name AS EmployeeName, + ManagerID, + cast(1 as bigint) AS TraversalLevel + FROM + ${tb_name} + WHERE + EmployeeID = 100 + + UNION ALL + + SELECT + E.EmployeeID, + E.Name AS EmployeeName, + E.ManagerID, + cast(R.TraversalLevel + 1 as bigint) AS TraversalLevel + FROM + ${tb_name} AS E + INNER JOIN + MixedHierarchyTraversal AS R + ON E.ManagerID = R.EmployeeID + ) + SELECT + * + FROM + MixedHierarchyTraversal + ORDER BY + EmployeeID;""" + + + // Multi-branch recursion, executed successfully + sql """drop table if exists ${tb_name}""" + sql """CREATE TABLE ${tb_name} ( + EmployeeID INT null, + Name VARCHAR(50) NULL, + ManagerID INT NULL + ) DUPLICATE KEY(EmployeeID) + DISTRIBUTED BY HASH(EmployeeID) + BUCKETS 3 PROPERTIES ('replication_num' = '1');""" + + sql """INSERT INTO ${tb_name} VALUES + (100, 'CEO Root', NULL), + (101, 'Manager Alpha', 100), + (102, 'Worker A1', 101), + (103, 'Worker A2', 101), + (104, 'Manager Beta', 100), + (105, 'Worker B1', 104);""" + + sql """ + WITH recursive MultiBranchHierarchy ( + EmployeeID, + EmployeeName, + ManagerID, + TraversalLevel, + BranchPath + ) + AS + ( + SELECT + EmployeeID, + Name AS EmployeeName, + ManagerID, + cast(1 as bigint) AS TraversalLevel, + CAST(Name AS VARCHAR(65533)) AS BranchPath + FROM + ${tb_name} + WHERE + EmployeeID = 100 + + UNION ALL + + SELECT + E.EmployeeID, + E.Name AS EmployeeName, + E.ManagerID, + cast(R.TraversalLevel + 1 as bigint) AS TraversalLevel, + cast(CONCAT(R.BranchPath, ' -> ', E.Name) as VARCHAR(65533)) AS BranchPath + FROM + ${tb_name} AS E + INNER JOIN + MultiBranchHierarchy AS R + ON E.ManagerID = R.EmployeeID + ) + SELECT + * + FROM + MultiBranchHierarchy + ORDER BY + BranchPath;""" + + // Cyclic recursion with UNION + sql """drop table if exists ${tb_name}""" + sql """CREATE TABLE ${tb_name} ( + EmployeeID INT null, + Name VARCHAR(50) NULL, + ManagerID INT NULL + ) DUPLICATE KEY(EmployeeID) + DISTRIBUTED BY HASH(EmployeeID) + BUCKETS 3 PROPERTIES ('replication_num' = '1');""" + + sql """INSERT INTO ${tb_name} VALUES + (100, 'CEO Root', NULL), + (201, 'Cycle Node A', 202), + (202, 'Cycle Node B', 201);""" + + test { + sql """ + WITH recursive CycleTraversal ( + EmployeeID, + EmployeeName, + ManagerID, + TraversalCount, + PathCheck + ) + AS + ( + SELECT + EmployeeID, + Name AS EmployeeName, + ManagerID, + cast(1 as bigint) AS TraversalLevel, + -- CAST(',' + CAST(EmployeeID AS VARCHAR(65533)) + ',' AS VARCHAR(65533)) AS PathCheck + cast(concat(',', CAST(EmployeeID AS VARCHAR(65533)), ',') as VARCHAR(65533) ) AS PathCheck + FROM + ${tb_name} + WHERE + EmployeeID = 201 + + UNION + + SELECT + E.EmployeeID, + E.Name AS EmployeeName, + E.ManagerID, + cast(R.TraversalCount + 1 as bigint) AS TraversalCount, + -- R.PathCheck + CAST(E.EmployeeID AS VARCHAR(MAX)) + ',' AS PathCheck + cast(concat(R.PathCheck, CAST(E.EmployeeID AS VARCHAR(65533)), ',') as VARCHAR(65533)) AS PathCheck + FROM + ${tb_name} AS E + INNER JOIN + CycleTraversal AS R + ON E.ManagerID = R.EmployeeID + ) + SELECT + EmployeeID, + EmployeeName, + TraversalCount, + PathCheck + FROM + CycleTraversal + ORDER BY + TraversalCount;""" + exception "reach cte_max_recursion_depth 100" + } + + // Cyclic recursion with UNION ALL + sql """drop table if exists ${tb_name}""" + sql """CREATE TABLE ${tb_name} ( + EmployeeID INT null, + Name VARCHAR(50) NULL, + ManagerID INT NULL + ) DUPLICATE KEY(EmployeeID) + DISTRIBUTED BY HASH(EmployeeID) + BUCKETS 3 PROPERTIES ('replication_num' = '1');""" + + sql """INSERT INTO ${tb_name} VALUES + (100, 'CEO Root', NULL), + (201, 'Cycle Node A', 202), + (202, 'Cycle Node B', 201);""" + + test { + sql """ + WITH recursive CycleTraversal ( + EmployeeID, + EmployeeName, + ManagerID, + TraversalCount, + PathCheck + ) + AS + ( + SELECT + EmployeeID, + Name AS EmployeeName, + ManagerID, + cast(1 as bigint) AS TraversalCount, + cast(concat(',', CAST(EmployeeID AS VARCHAR(65533)), ',') as VARCHAR(65533)) as PathCheck + FROM + ${tb_name} + WHERE + EmployeeID = 201 + + UNION ALL + + SELECT + E.EmployeeID, + E.Name AS EmployeeName, + E.ManagerID, + cast(R.TraversalCount + 1 as bigint) AS TraversalCount, + cast(concat(R.PathCheck, CAST(E.EmployeeID AS VARCHAR(65533)), ',') as VARCHAR(65533)) as PathCheck + FROM + ${tb_name} AS E + INNER JOIN + CycleTraversal AS R + ON E.ManagerID = R.EmployeeID + ) + SELECT + EmployeeID, + EmployeeName, + TraversalCount, + PathCheck + FROM + CycleTraversal + ORDER BY + TraversalCount;""" + exception "reach cte_max_recursion_depth 100" + } + +}