From 2160fc5ead5373c95cf8897c7b96d9b775e030dc Mon Sep 17 00:00:00 2001 From: cgivre Date: Sun, 1 Feb 2026 14:48:15 -0500 Subject: [PATCH 1/8] WIP --- .../src/main/codegen/data/Parser.tdd | 3 + .../src/main/codegen/includes/parserImpls.ftl | 81 ++++- .../org/apache/drill/exec/ExecConstants.java | 7 + .../drill/exec/dotdrill/DotDrillFile.java | 9 +- .../drill/exec/dotdrill/DotDrillType.java | 1 + .../drill/exec/dotdrill/MaterializedView.java | 193 ++++++++++++ .../logical/DrillMaterializedViewTable.java | 162 ++++++++++ .../planner/physical/PlannerSettings.java | 4 + .../sql/handlers/MaterializedViewHandler.java | 245 +++++++++++++++ .../sql/parser/SqlCreateMaterializedView.java | 146 +++++++++ .../sql/parser/SqlDropMaterializedView.java | 110 +++++++ .../parser/SqlRefreshMaterializedView.java | 93 ++++++ .../server/options/SystemOptionManager.java | 1 + .../drill/exec/store/AbstractSchema.java | 53 +++- .../store/dfs/WorkspaceSchemaFactory.java | 167 ++++++++++- .../exec/dotdrill/TestMaterializedView.java | 243 +++++++++++++++ .../parser/TestMaterializedViewSqlParser.java | 224 ++++++++++++++ .../exec/sql/TestMaterializedViewSupport.java | 281 ++++++++++++++++++ 18 files changed, 2018 insertions(+), 5 deletions(-) create mode 100644 exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/MaterializedView.java create mode 100644 exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillMaterializedViewTable.java create mode 100644 exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/MaterializedViewHandler.java create mode 100644 exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/SqlCreateMaterializedView.java create mode 100644 exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/SqlDropMaterializedView.java create mode 100644 exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/SqlRefreshMaterializedView.java create mode 100644 exec/java-exec/src/test/java/org/apache/drill/exec/dotdrill/TestMaterializedView.java create mode 100644 exec/java-exec/src/test/java/org/apache/drill/exec/planner/sql/parser/TestMaterializedViewSqlParser.java create mode 100644 exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestMaterializedViewSupport.java diff --git a/exec/java-exec/src/main/codegen/data/Parser.tdd b/exec/java-exec/src/main/codegen/data/Parser.tdd index 98107cd44c3..e3b3304d32e 100644 --- a/exec/java-exec/src/main/codegen/data/Parser.tdd +++ b/exec/java-exec/src/main/codegen/data/Parser.tdd @@ -37,6 +37,7 @@ "FILES", "REFRESH", "METADATA", + "MATERIALIZED", "IF", "JAR", "PROPERTIES", @@ -64,6 +65,7 @@ "SqlDropAllAliases()", "SqlDrop()", "SqlShowFiles()", + "SqlRefreshMaterializedView()", "SqlRefreshMetadata()", "SqlCreateFunction()", "SqlDropFunction()", @@ -934,6 +936,7 @@ "COLUMNS" "DATETIME" "HOURS" + "MATERIALIZED" "STORAGE" "REMOVE" ] diff --git a/exec/java-exec/src/main/codegen/includes/parserImpls.ftl b/exec/java-exec/src/main/codegen/includes/parserImpls.ftl index bb3b3c7b19e..2f66c663b52 100644 --- a/exec/java-exec/src/main/codegen/includes/parserImpls.ftl +++ b/exec/java-exec/src/main/codegen/includes/parserImpls.ftl @@ -193,6 +193,14 @@ SqlNode SqlCreateOrReplace() : [ { createType = "OR_REPLACE"; } ] [ { isTemporary = true; } ] ( + + { + if (isTemporary) { + throw new ParseException("Create materialized view statement does not allow keyword."); + } + return SqlCreateMaterializedView(pos, createType); + } + | { if (isTemporary) { @@ -259,6 +267,36 @@ SqlNode SqlCreateView(SqlParserPos pos, String createType) : } } +/** + * Parses a create materialized view or replace existing materialized view statement. + * after CREATE OR REPLACE MATERIALIZED VIEW statement which is handled in the SqlCreateOrReplace method. + * + * CREATE { [OR REPLACE] MATERIALIZED VIEW | MATERIALIZED VIEW [IF NOT EXISTS] | MATERIALIZED VIEW } view_name [ (field1, field2 ...) ] AS select_statement + */ +SqlNode SqlCreateMaterializedView(SqlParserPos pos, String createType) : +{ + SqlIdentifier viewName; + SqlNode query; + SqlNodeList fieldList; +} +{ + [ + { + if (createType == "OR_REPLACE") { + throw new ParseException("Create materialized view statement cannot have both and clause"); + } + createType = "IF_NOT_EXISTS"; + } + ] + viewName = CompoundIdentifier() + fieldList = ParseOptionalFieldList("Materialized View") + + query = OrderedQueryOrExpr(ExprContext.ACCEPT_QUERY) + { + return new SqlCreateMaterializedView(pos, viewName, fieldList, query, SqlLiteral.createCharString(createType, getPos())); + } +} + /** * Parses a CTAS or CTTAS statement after CREATE [TEMPORARY] TABLE statement * which is handled in the SqlCreateOrReplace method. @@ -392,7 +430,7 @@ void addProperty(SqlNodeList properties) : } /** - * Parses DROP command for VIEW, TABLE and SCHEMA. + * Parses DROP command for VIEW, TABLE, MATERIALIZED VIEW and SCHEMA. */ SqlNode SqlDrop() : { @@ -401,6 +439,11 @@ SqlNode SqlDrop() : { { pos = getPos(); } ( + + { + return SqlDropMaterializedView(pos); + } + | { return SqlDropView(pos); @@ -435,6 +478,23 @@ SqlNode SqlDropView(SqlParserPos pos) : } } +/** + * Parses a drop materialized view or drop materialized view if exists statement + * after DROP MATERIALIZED VIEW statement which is handled in SqlDrop method. + * + * DROP MATERIALIZED VIEW [IF EXISTS] view_name; + */ +SqlNode SqlDropMaterializedView(SqlParserPos pos) : +{ + boolean viewExistenceCheck = false; +} +{ + [ { viewExistenceCheck = true; } ] + { + return new SqlDropMaterializedView(pos, CompoundIdentifier(), viewExistenceCheck); + } +} + /** * Parses a drop table or drop table if exists statement * after DROP TABLE statement which is handled in SqlDrop method. @@ -472,6 +532,25 @@ SqlNode SqlDropSchema(SqlParserPos pos) : } } +/** + * Parse refresh materialized view statement. + * REFRESH MATERIALIZED VIEW view_name + */ +SqlNode SqlRefreshMaterializedView() : +{ + SqlParserPos pos; + SqlIdentifier viewName; +} +{ + { pos = getPos(); } + + + viewName = CompoundIdentifier() + { + return new SqlRefreshMaterializedView(pos, viewName); + } +} + /** * Parse refresh table metadata statement. * REFRESH TABLE METADATA [COLUMNS ((field1, field2,..) | NONE)] table_name diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java index 77da9092036..dbea831ea05 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java @@ -887,6 +887,13 @@ private ExecConstants() { public static final OptionValidator NEW_VIEW_DEFAULT_PERMS_VALIDATOR = new StringValidator(NEW_VIEW_DEFAULT_PERMS_KEY, new OptionDescription("Sets view permissions using an octal code in the Unix tradition.")); + public static final String ENABLE_MATERIALIZED_VIEW_REWRITE_KEY = "planner.enable_materialized_view_rewrite"; + public static final BooleanValidator ENABLE_MATERIALIZED_VIEW_REWRITE = new BooleanValidator( + ENABLE_MATERIALIZED_VIEW_REWRITE_KEY, + new OptionDescription("Enables automatic query rewriting to use materialized views when available. " + + "When enabled, the query planner will attempt to match queries against materialized views " + + "and use the pre-computed results instead of executing the original query.")); + public static final String CTAS_PARTITIONING_HASH_DISTRIBUTE = "store.partition.hash_distribute"; public static final BooleanValidator CTAS_PARTITIONING_HASH_DISTRIBUTE_VALIDATOR = new BooleanValidator(CTAS_PARTITIONING_HASH_DISTRIBUTE, new OptionDescription("Uses a hash algorithm to distribute data on partition keys in a CTAS partitioning operation. An alpha option--for experimental use at this stage. Do not use in production systems.")); diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillFile.java b/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillFile.java index 36abcaa92d7..4823b6761a2 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillFile.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillFile.java @@ -57,7 +57,7 @@ public DotDrillType getType(){ * @return Return owner of the file in underlying file system. */ public String getOwner() { - if (type == DotDrillType.VIEW && status.getOwner().isEmpty()) { + if ((type == DotDrillType.VIEW || type == DotDrillType.MATERIALIZED_VIEW) && status.getOwner().isEmpty()) { // Drill view S3AFileStatus is not populated with owner (it has default value of ""). // This empty String causes IllegalArgumentException to be thrown (if impersonation is enabled) in // SchemaTreeProvider#createRootSchema(String, SchemaConfigInfoProvider). To work-around the issue @@ -82,4 +82,11 @@ public View getView(ObjectMapper mapper) throws IOException { return mapper.readValue(is, View.class); } } + + public MaterializedView getMaterializedView(ObjectMapper mapper) throws IOException { + Preconditions.checkArgument(type == DotDrillType.MATERIALIZED_VIEW); + try(InputStream is = fs.open(status.getPath())){ + return mapper.readValue(is, MaterializedView.class); + } + } } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillType.java b/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillType.java index f4d06015049..ef930b7fc77 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillType.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillType.java @@ -22,6 +22,7 @@ public enum DotDrillType { VIEW, + MATERIALIZED_VIEW, STATS; private final String ending; diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/MaterializedView.java b/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/MaterializedView.java new file mode 100644 index 00000000000..70870682dc3 --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/MaterializedView.java @@ -0,0 +1,193 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.dotdrill; + +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonInclude.Include; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonTypeName; + +/** + * Represents a materialized view definition stored as a JSON file with + * .materialized_view.drill extension. The actual data is stored separately + * in Parquet format in the workspace directory. + */ +@JsonTypeName("materialized_view") +public class MaterializedView { + + /** + * Represents the refresh status of the materialized view. + */ + public enum RefreshStatus { + /** The materialized view data is complete and up-to-date with its definition */ + COMPLETE, + /** The materialized view data needs to be refreshed */ + INCOMPLETE + } + + private final String name; + private String sql; + private List fields; + + /** Current schema when materialized view is created (not the schema to which view belongs to) */ + private List workspaceSchemaPath; + + /** The relative path where the materialized data is stored (typically the view name) */ + @JsonInclude(Include.NON_NULL) + private String dataStoragePath; + + /** Timestamp of the last successful refresh in milliseconds since epoch */ + @JsonInclude(Include.NON_NULL) + private Long lastRefreshTime; + + /** Current refresh status of the materialized view */ + @JsonInclude(Include.NON_NULL) + private RefreshStatus refreshStatus; + + public MaterializedView(String name, String sql, RelDataType rowType, List workspaceSchemaPath) { + this(name, + sql, + rowType.getFieldList().stream() + .map(f -> new View.Field(f.getName(), f.getType())) + .collect(Collectors.toList()), + workspaceSchemaPath, + name, // data storage path defaults to view name + System.currentTimeMillis(), + RefreshStatus.INCOMPLETE); + } + + @JsonCreator + public MaterializedView( + @JsonProperty("name") String name, + @JsonProperty("sql") String sql, + @JsonProperty("fields") List fields, + @JsonProperty("workspaceSchemaPath") List workspaceSchemaPath, + @JsonProperty("dataStoragePath") String dataStoragePath, + @JsonProperty("lastRefreshTime") Long lastRefreshTime, + @JsonProperty("refreshStatus") RefreshStatus refreshStatus) { + this.name = name; + this.sql = sql; + this.fields = fields; + // for backward compatibility since now all schemas and workspaces are case insensitive and stored in lower case + // make sure that given workspace schema path is also in lower case + this.workspaceSchemaPath = workspaceSchemaPath == null ? Collections.emptyList() : + workspaceSchemaPath.stream() + .map(String::toLowerCase) + .collect(Collectors.toList()); + this.dataStoragePath = dataStoragePath != null ? dataStoragePath : name; + this.lastRefreshTime = lastRefreshTime; + this.refreshStatus = refreshStatus != null ? refreshStatus : RefreshStatus.INCOMPLETE; + } + + /** + * If view fields are present then attempts to gather them into struct type, + * otherwise returns a dynamic record type. + * + * @param factory factory for rel data types creation + * @return struct type that describes names and types of all view fields + */ + public RelDataType getRowType(RelDataTypeFactory factory) { + // Delegate to View's logic for row type construction + View tempView = new View(name, sql, fields, workspaceSchemaPath); + return tempView.getRowType(factory); + } + + @JsonIgnore + public boolean isDynamic() { + return fields == null || fields.isEmpty(); + } + + public String getName() { + return name; + } + + public String getSql() { + return sql; + } + + public void setSql(String sql) { + this.sql = sql; + } + + public List getFields() { + return fields; + } + + public List getWorkspaceSchemaPath() { + return workspaceSchemaPath; + } + + public String getDataStoragePath() { + return dataStoragePath; + } + + public void setDataStoragePath(String dataStoragePath) { + this.dataStoragePath = dataStoragePath; + } + + public Long getLastRefreshTime() { + return lastRefreshTime; + } + + public void setLastRefreshTime(Long lastRefreshTime) { + this.lastRefreshTime = lastRefreshTime; + } + + public RefreshStatus getRefreshStatus() { + return refreshStatus; + } + + public void setRefreshStatus(RefreshStatus refreshStatus) { + this.refreshStatus = refreshStatus; + } + + /** + * Marks the materialized view as successfully refreshed with the current timestamp. + */ + public void markRefreshed() { + this.lastRefreshTime = System.currentTimeMillis(); + this.refreshStatus = RefreshStatus.COMPLETE; + } + + /** + * Creates a copy of this materialized view with updated refresh information. + * + * @param lastRefreshTime the new refresh timestamp + * @param refreshStatus the new refresh status + * @return a new MaterializedView instance with updated refresh info + */ + public MaterializedView withRefreshInfo(Long lastRefreshTime, RefreshStatus refreshStatus) { + return new MaterializedView( + this.name, + this.sql, + this.fields, + this.workspaceSchemaPath, + this.dataStoragePath, + lastRefreshTime, + refreshStatus); + } +} diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillMaterializedViewTable.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillMaterializedViewTable.java new file mode 100644 index 00000000000..ec6f7371c54 --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillMaterializedViewTable.java @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.planner.logical; + +import java.util.Collections; +import java.util.List; + +import org.apache.calcite.config.CalciteConnectionConfig; +import org.apache.calcite.plan.RelOptTable; +import org.apache.calcite.plan.RelOptTable.ToRelContext; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.schema.Schema.TableType; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.schema.Statistic; +import org.apache.calcite.schema.Statistics; +import org.apache.calcite.schema.TranslatableTable; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlNode; +import org.apache.drill.exec.dotdrill.MaterializedView; +import org.apache.drill.exec.ops.ViewExpansionContext; +import org.apache.drill.exec.planner.sql.conversion.DrillViewExpander; + +/** + * Represents a materialized view in the Drill query planning. + *

+ * Unlike regular views which expand to their definition query, materialized views + * read from pre-computed data stored in the workspace directory. + *

+ * A materialized view stores: + *

    + *
  • Definition file (.materialized_view.drill) - JSON with name, SQL, schema info
  • + *
  • Data directory - Parquet files with the pre-computed results
  • + *
+ */ +public class DrillMaterializedViewTable implements TranslatableTable, DrillViewInfoProvider { + + private final MaterializedView materializedView; + private final String viewOwner; + private final ViewExpansionContext viewExpansionContext; + private final String workspaceLocation; + + public DrillMaterializedViewTable(MaterializedView materializedView, String viewOwner, + ViewExpansionContext viewExpansionContext, String workspaceLocation) { + this.materializedView = materializedView; + this.viewOwner = viewOwner; + this.viewExpansionContext = viewExpansionContext; + this.workspaceLocation = workspaceLocation; + } + + @Override + public RelDataType getRowType(RelDataTypeFactory typeFactory) { + return materializedView.getRowType(typeFactory); + } + + @Override + public Statistic getStatistic() { + return Statistics.UNKNOWN; + } + + /** + * Converts this materialized view to a RelNode for query planning. + *

+ * Unlike regular views, materialized views expand to their definition SQL + * which is then converted to a RelNode. The data is actually read from + * the materialized data directory, not computed fresh. + *

+ * For now, we expand the view definition since the data is in Parquet format + * in a directory with the same name as the view. The storage plugin will + * handle reading the actual data. + */ + @Override + public RelNode toRel(ToRelContext context, RelOptTable relOptTable) { + DrillViewExpander viewExpander = viewExpansionContext.getViewExpander(); + ViewExpansionContext.ViewExpansionToken token = null; + try { + RelDataType rowType = relOptTable.getRowType(); + RelNode rel; + + if (viewExpansionContext.isImpersonationEnabled()) { + token = viewExpansionContext.reserveViewExpansionToken(viewOwner); + rel = expandViewForImpersonatedUser(viewExpander, materializedView.getWorkspaceSchemaPath(), + token.getSchemaTree()); + } else { + rel = viewExpander.expandView(rowType, materializedView.getSql(), + materializedView.getWorkspaceSchemaPath(), Collections.emptyList()).rel; + } + + return rel; + } finally { + if (token != null) { + token.release(); + } + } + } + + protected RelNode expandViewForImpersonatedUser(DrillViewExpander context, + List workspaceSchemaPath, + SchemaPlus tokenSchemaTree) { + return context.expandView(materializedView.getSql(), tokenSchemaTree, workspaceSchemaPath).rel; + } + + @Override + public TableType getJdbcTableType() { + // Report as TABLE since materialized views store actual data + // This distinguishes them from regular views (VIEW type) + return TableType.TABLE; + } + + @Override + public String getViewSql() { + return materializedView.getSql(); + } + + /** + * @return the materialized view definition + */ + public MaterializedView getMaterializedView() { + return materializedView; + } + + /** + * @return the owner of this materialized view + */ + public String getViewOwner() { + return viewOwner; + } + + /** + * @return path to the data storage location for this materialized view + */ + public String getDataStoragePath() { + return workspaceLocation + "/" + materializedView.getDataStoragePath(); + } + + @Override + public boolean rolledUpColumnValidInsideAgg(String column, + SqlCall call, SqlNode parent, CalciteConnectionConfig config) { + return true; + } + + @Override + public boolean isRolledUp(String column) { + return false; + } +} diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PlannerSettings.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PlannerSettings.java index 6fa145a1b01..4b5202d8366 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PlannerSettings.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PlannerSettings.java @@ -416,6 +416,10 @@ public boolean isUnionAllDistributeEnabled() { return options.getOption(UNIONALL_DISTRIBUTE); } + public boolean isMaterializedViewRewriteEnabled() { + return options.getOption(ExecConstants.ENABLE_MATERIALIZED_VIEW_REWRITE_KEY).bool_val; + } + public boolean isParquetRowGroupFilterPushdownPlanningEnabled() { return options.getOption(PARQUET_ROWGROUP_FILTER_PUSHDOWN_PLANNING); } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/MaterializedViewHandler.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/MaterializedViewHandler.java new file mode 100644 index 00000000000..df0b0542927 --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/MaterializedViewHandler.java @@ -0,0 +1,245 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.planner.sql.handlers; + +import java.io.IOException; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.schema.Schema; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.schema.Table; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.tools.RelConversionException; +import org.apache.calcite.tools.ValidationException; +import org.apache.drill.common.exceptions.UserException; +import org.apache.drill.common.util.DrillStringUtils; +import org.apache.drill.exec.dotdrill.MaterializedView; +import org.apache.drill.exec.ops.QueryContext; +import org.apache.drill.exec.physical.PhysicalPlan; +import org.apache.drill.exec.planner.sql.DirectPlan; +import org.apache.drill.exec.planner.sql.SchemaUtilities; +import org.apache.drill.exec.planner.sql.parser.SqlCreateMaterializedView; +import org.apache.drill.exec.planner.sql.parser.SqlCreateType; +import org.apache.drill.exec.planner.sql.parser.SqlDropMaterializedView; +import org.apache.drill.exec.planner.sql.parser.SqlRefreshMaterializedView; +import org.apache.drill.exec.store.AbstractSchema; +import org.apache.drill.exec.work.foreman.ForemanSetupException; + +/** + * Handlers for materialized view DDL commands: CREATE, DROP, and REFRESH MATERIALIZED VIEW. + */ +public abstract class MaterializedViewHandler extends DefaultSqlHandler { + private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(MaterializedViewHandler.class); + + protected QueryContext context; + + public MaterializedViewHandler(SqlHandlerConfig config) { + super(config); + this.context = config.getContext(); + } + + /** + * Handler for CREATE MATERIALIZED VIEW DDL command. + */ + public static class CreateMaterializedView extends MaterializedViewHandler { + + public CreateMaterializedView(SqlHandlerConfig config) { + super(config); + } + + @Override + public PhysicalPlan getPlan(SqlNode sqlNode) throws ValidationException, RelConversionException, IOException, ForemanSetupException { + SqlCreateMaterializedView createMV = unwrap(sqlNode, SqlCreateMaterializedView.class); + + final String newViewName = DrillStringUtils.removeLeadingSlash(createMV.getName()); + + // Disallow temporary tables usage in materialized view definition + config.getConverter().disallowTemporaryTables(); + + // Store the SQL as the view definition + final String viewSql = createMV.getQuery().toSqlString(null, true).getSql(); + final ConvertedRelNode convertedRelNode = validateAndConvert(createMV.getQuery()); + final RelDataType validatedRowType = convertedRelNode.getValidatedRowType(); + final RelNode queryRelNode = convertedRelNode.getConvertedNode(); + + final RelNode newViewRelNode = SqlHandlerUtil.resolveNewTableRel(true, createMV.getFieldNames(), + validatedRowType, queryRelNode); + + final SchemaPlus defaultSchema = context.getNewDefaultSchema(); + final AbstractSchema drillSchema = SchemaUtilities.resolveToMutableDrillSchema(defaultSchema, + createMV.getSchemaPath()); + + final String schemaPath = drillSchema.getFullSchemaName(); + + // Check view creation possibility + if (!checkMaterializedViewCreationPossibility(drillSchema, createMV, context)) { + return DirectPlan.createDirectPlan(context, false, + String.format("A table or view with given name [%s] already exists in schema [%s]", + newViewName, schemaPath)); + } + + // Create the materialized view definition + final MaterializedView materializedView = new MaterializedView(newViewName, viewSql, + newViewRelNode.getRowType(), SchemaUtilities.getSchemaPathAsList(defaultSchema)); + + // Create the materialized view (this will also populate the data) + final boolean replaced = drillSchema.createMaterializedView(materializedView); + + final String summary = String.format("Materialized view '%s' %s successfully in '%s' schema", + newViewName, replaced ? "replaced" : "created", schemaPath); + + return DirectPlan.createDirectPlan(context, true, summary); + } + + /** + * Validates if materialized view can be created in indicated schema. + */ + private boolean checkMaterializedViewCreationPossibility(AbstractSchema drillSchema, + SqlCreateMaterializedView createMV, + QueryContext context) { + final String schemaPath = drillSchema.getFullSchemaName(); + final String viewName = createMV.getName(); + final Table table = SqlHandlerUtil.getTableFromSchema(drillSchema, viewName); + + final boolean isTable = (table != null && table.getJdbcTableType() != Schema.TableType.VIEW) + || context.getSession().isTemporaryTable(drillSchema, context.getConfig(), viewName); + final boolean isView = (table != null && table.getJdbcTableType() == Schema.TableType.VIEW); + // Check if it's a materialized view by checking table type + final boolean isMaterializedView = table != null && + "MATERIALIZED_VIEW".equals(table.getJdbcTableType().jdbcName); + + SqlCreateType createType = createMV.getSqlCreateType(); + switch (createType) { + case SIMPLE: + if (isTable) { + throw UserException.validationError() + .message("A non-view table with given name [%s] already exists in schema [%s]", + viewName, schemaPath) + .build(logger); + } else if (isView) { + throw UserException.validationError() + .message("A view with given name [%s] already exists in schema [%s]", viewName, schemaPath) + .build(logger); + } else if (isMaterializedView) { + throw UserException.validationError() + .message("A materialized view with given name [%s] already exists in schema [%s]", + viewName, schemaPath) + .build(logger); + } + break; + case OR_REPLACE: + if (isTable) { + throw UserException.validationError() + .message("A non-view table with given name [%s] already exists in schema [%s]", + viewName, schemaPath) + .build(logger); + } else if (isView) { + throw UserException.validationError() + .message("A regular view with given name [%s] already exists in schema [%s]. " + + "Cannot replace a regular view with a materialized view.", viewName, schemaPath) + .build(logger); + } + // Allow replacing existing materialized view + break; + case IF_NOT_EXISTS: + if (isTable || isView || isMaterializedView) { + return false; + } + break; + } + return true; + } + } + + /** + * Handler for DROP MATERIALIZED VIEW DDL command. + */ + public static class DropMaterializedView extends MaterializedViewHandler { + + public DropMaterializedView(SqlHandlerConfig config) { + super(config); + } + + @Override + public PhysicalPlan getPlan(SqlNode sqlNode) throws IOException, ForemanSetupException { + SqlDropMaterializedView dropMV = unwrap(sqlNode, SqlDropMaterializedView.class); + final String viewName = DrillStringUtils.removeLeadingSlash(dropMV.getName()); + final AbstractSchema drillSchema = SchemaUtilities.resolveToMutableDrillSchema( + context.getNewDefaultSchema(), dropMV.getSchemaPath()); + + final String schemaPath = drillSchema.getFullSchemaName(); + + final Table viewToDrop = SqlHandlerUtil.getTableFromSchema(drillSchema, viewName); + + if (dropMV.checkViewExistence()) { + if (viewToDrop == null) { + return DirectPlan.createDirectPlan(context, false, + String.format("Materialized view [%s] not found in schema [%s].", viewName, schemaPath)); + } + } else { + if (viewToDrop == null) { + throw UserException.validationError() + .message("Unknown materialized view [%s] in schema [%s].", viewName, schemaPath) + .build(logger); + } + } + + // Drop the materialized view (definition file and data directory) + drillSchema.dropMaterializedView(viewName); + + return DirectPlan.createDirectPlan(context, true, + String.format("Materialized view [%s] deleted successfully from schema [%s].", viewName, schemaPath)); + } + } + + /** + * Handler for REFRESH MATERIALIZED VIEW DDL command. + */ + public static class RefreshMaterializedView extends MaterializedViewHandler { + + public RefreshMaterializedView(SqlHandlerConfig config) { + super(config); + } + + @Override + public PhysicalPlan getPlan(SqlNode sqlNode) throws ValidationException, RelConversionException, + IOException, ForemanSetupException { + SqlRefreshMaterializedView refreshMV = unwrap(sqlNode, SqlRefreshMaterializedView.class); + final String viewName = DrillStringUtils.removeLeadingSlash(refreshMV.getName()); + final AbstractSchema drillSchema = SchemaUtilities.resolveToMutableDrillSchema( + context.getNewDefaultSchema(), refreshMV.getSchemaPath()); + + final String schemaPath = drillSchema.getFullSchemaName(); + + final Table viewToRefresh = SqlHandlerUtil.getTableFromSchema(drillSchema, viewName); + + if (viewToRefresh == null) { + throw UserException.validationError() + .message("Materialized view [%s] not found in schema [%s].", viewName, schemaPath) + .build(logger); + } + + // Refresh the materialized view data + drillSchema.refreshMaterializedView(viewName); + + return DirectPlan.createDirectPlan(context, true, + String.format("Materialized view [%s] refreshed successfully in schema [%s].", viewName, schemaPath)); + } + } +} diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/SqlCreateMaterializedView.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/SqlCreateMaterializedView.java new file mode 100644 index 00000000000..320fcd1a1bf --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/SqlCreateMaterializedView.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.planner.sql.parser; + +import java.util.List; + +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlLiteral; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlNodeList; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.SqlSpecialOperator; +import org.apache.calcite.sql.SqlWriter; +import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.drill.exec.planner.sql.SchemaUtilities; +import org.apache.drill.exec.planner.sql.handlers.AbstractSqlHandler; +import org.apache.drill.exec.planner.sql.handlers.MaterializedViewHandler; +import org.apache.drill.exec.planner.sql.handlers.SqlHandlerConfig; +import org.apache.drill.exec.planner.sql.handlers.SqlHandlerUtil; + +import com.google.common.collect.Lists; + +/** + * Represents a CREATE MATERIALIZED VIEW statement. + * + * Syntax: + * CREATE [OR REPLACE] MATERIALIZED VIEW [IF NOT EXISTS] view_name [(field1, field2 ...)] AS select_statement + */ +public class SqlCreateMaterializedView extends DrillSqlCall { + + public static final SqlSpecialOperator OPERATOR = new SqlSpecialOperator("CREATE_MATERIALIZED_VIEW", SqlKind.OTHER_DDL) { + @Override + public SqlCall createCall(SqlLiteral functionQualifier, SqlParserPos pos, SqlNode... operands) { + return new SqlCreateMaterializedView(pos, (SqlIdentifier) operands[0], (SqlNodeList) operands[1], + operands[2], (SqlLiteral) operands[3]); + } + }; + + private final SqlIdentifier viewName; + private final SqlNodeList fieldList; + private final SqlNode query; + private final SqlLiteral createType; + + public SqlCreateMaterializedView(SqlParserPos pos, SqlIdentifier viewName, SqlNodeList fieldList, + SqlNode query, SqlLiteral createType) { + super(pos); + this.viewName = viewName; + this.fieldList = fieldList; + this.query = query; + this.createType = createType; + } + + @Override + public SqlOperator getOperator() { + return OPERATOR; + } + + @Override + public List getOperandList() { + List ops = Lists.newArrayList(); + ops.add(viewName); + ops.add(fieldList); + ops.add(query); + ops.add(createType); + return ops; + } + + @Override + public void unparse(SqlWriter writer, int leftPrec, int rightPrec) { + writer.keyword("CREATE"); + switch (SqlCreateType.valueOf(createType.toValue())) { + case SIMPLE: + writer.keyword("MATERIALIZED"); + writer.keyword("VIEW"); + break; + case OR_REPLACE: + writer.keyword("OR"); + writer.keyword("REPLACE"); + writer.keyword("MATERIALIZED"); + writer.keyword("VIEW"); + break; + case IF_NOT_EXISTS: + writer.keyword("MATERIALIZED"); + writer.keyword("VIEW"); + writer.keyword("IF"); + writer.keyword("NOT"); + writer.keyword("EXISTS"); + break; + } + viewName.unparse(writer, leftPrec, rightPrec); + if (fieldList.size() > 0) { + SqlHandlerUtil.unparseSqlNodeList(writer, leftPrec, rightPrec, fieldList); + } + writer.keyword("AS"); + query.unparse(writer, leftPrec, rightPrec); + } + + @Override + public AbstractSqlHandler getSqlHandler(SqlHandlerConfig config) { + return new MaterializedViewHandler.CreateMaterializedView(config); + } + + public List getSchemaPath() { + return SchemaUtilities.getSchemaPath(viewName); + } + + public String getName() { + if (viewName.isSimple()) { + return viewName.getSimple(); + } + return viewName.names.get(viewName.names.size() - 1); + } + + public List getFieldNames() { + List fieldNames = Lists.newArrayList(); + for (SqlNode node : fieldList.getList()) { + fieldNames.add(node.toString()); + } + return fieldNames; + } + + public SqlNode getQuery() { + return query; + } + + public SqlCreateType getSqlCreateType() { + return SqlCreateType.valueOf(createType.toValue()); + } +} diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/SqlDropMaterializedView.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/SqlDropMaterializedView.java new file mode 100644 index 00000000000..fbce2b82da7 --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/SqlDropMaterializedView.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.planner.sql.parser; + +import java.util.List; + +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlLiteral; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.SqlSpecialOperator; +import org.apache.calcite.sql.SqlWriter; +import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.drill.exec.planner.sql.SchemaUtilities; +import org.apache.drill.exec.planner.sql.handlers.AbstractSqlHandler; +import org.apache.drill.exec.planner.sql.handlers.MaterializedViewHandler; +import org.apache.drill.exec.planner.sql.handlers.SqlHandlerConfig; + +import com.google.common.collect.ImmutableList; + +/** + * Represents a DROP MATERIALIZED VIEW statement. + * + * Syntax: + * DROP MATERIALIZED VIEW [IF EXISTS] view_name + */ +public class SqlDropMaterializedView extends DrillSqlCall { + + public static final SqlSpecialOperator OPERATOR = new SqlSpecialOperator("DROP_MATERIALIZED_VIEW", SqlKind.OTHER_DDL) { + @Override + public SqlCall createCall(SqlLiteral functionQualifier, SqlParserPos pos, SqlNode... operands) { + return new SqlDropMaterializedView(pos, (SqlIdentifier) operands[0], (SqlLiteral) operands[1]); + } + }; + + private final SqlIdentifier viewName; + private final boolean viewExistenceCheck; + + public SqlDropMaterializedView(SqlParserPos pos, SqlIdentifier viewName, SqlLiteral viewExistenceCheck) { + this(pos, viewName, viewExistenceCheck.booleanValue()); + } + + public SqlDropMaterializedView(SqlParserPos pos, SqlIdentifier viewName, boolean viewExistenceCheck) { + super(pos); + this.viewName = viewName; + this.viewExistenceCheck = viewExistenceCheck; + } + + @Override + public SqlOperator getOperator() { + return OPERATOR; + } + + @Override + public List getOperandList() { + return ImmutableList.of( + viewName, + SqlLiteral.createBoolean(viewExistenceCheck, SqlParserPos.ZERO) + ); + } + + @Override + public void unparse(SqlWriter writer, int leftPrec, int rightPrec) { + writer.keyword("DROP"); + writer.keyword("MATERIALIZED"); + writer.keyword("VIEW"); + if (viewExistenceCheck) { + writer.keyword("IF"); + writer.keyword("EXISTS"); + } + viewName.unparse(writer, leftPrec, rightPrec); + } + + @Override + public AbstractSqlHandler getSqlHandler(SqlHandlerConfig config) { + return new MaterializedViewHandler.DropMaterializedView(config); + } + + public List getSchemaPath() { + return SchemaUtilities.getSchemaPath(viewName); + } + + public String getName() { + if (viewName.isSimple()) { + return viewName.getSimple(); + } + return viewName.names.get(viewName.names.size() - 1); + } + + public boolean checkViewExistence() { + return viewExistenceCheck; + } +} diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/SqlRefreshMaterializedView.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/SqlRefreshMaterializedView.java new file mode 100644 index 00000000000..b24c2816679 --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/SqlRefreshMaterializedView.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.planner.sql.parser; + +import java.util.List; + +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlLiteral; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.SqlSpecialOperator; +import org.apache.calcite.sql.SqlWriter; +import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.drill.exec.planner.sql.SchemaUtilities; +import org.apache.drill.exec.planner.sql.handlers.AbstractSqlHandler; +import org.apache.drill.exec.planner.sql.handlers.MaterializedViewHandler; +import org.apache.drill.exec.planner.sql.handlers.SqlHandlerConfig; + +import com.google.common.collect.ImmutableList; + +/** + * Represents a REFRESH MATERIALIZED VIEW statement. + * + * Syntax: + * REFRESH MATERIALIZED VIEW view_name + */ +public class SqlRefreshMaterializedView extends DrillSqlCall { + + public static final SqlSpecialOperator OPERATOR = new SqlSpecialOperator("REFRESH_MATERIALIZED_VIEW", SqlKind.OTHER_DDL) { + @Override + public SqlCall createCall(SqlLiteral functionQualifier, SqlParserPos pos, SqlNode... operands) { + return new SqlRefreshMaterializedView(pos, (SqlIdentifier) operands[0]); + } + }; + + private final SqlIdentifier viewName; + + public SqlRefreshMaterializedView(SqlParserPos pos, SqlIdentifier viewName) { + super(pos); + this.viewName = viewName; + } + + @Override + public SqlOperator getOperator() { + return OPERATOR; + } + + @Override + public List getOperandList() { + return ImmutableList.of(viewName); + } + + @Override + public void unparse(SqlWriter writer, int leftPrec, int rightPrec) { + writer.keyword("REFRESH"); + writer.keyword("MATERIALIZED"); + writer.keyword("VIEW"); + viewName.unparse(writer, leftPrec, rightPrec); + } + + @Override + public AbstractSqlHandler getSqlHandler(SqlHandlerConfig config) { + return new MaterializedViewHandler.RefreshMaterializedView(config); + } + + public List getSchemaPath() { + return SchemaUtilities.getSchemaPath(viewName); + } + + public String getName() { + if (viewName.isSimple()) { + return viewName.getSimple(); + } + return viewName.names.get(viewName.names.size() - 1); + } +} diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java index 3cee4096e09..6cda5c872c0 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java @@ -252,6 +252,7 @@ public static CaseInsensitiveMap createDefaultOptionDefinition new OptionDefinition(ExecConstants.IMPLICIT_CAST_FOR_JOINS_ENABLED_VALIDATOR), new OptionDefinition(ExecConstants.AVERAGE_FIELD_WIDTH), new OptionDefinition(ExecConstants.NEW_VIEW_DEFAULT_PERMS_VALIDATOR), + new OptionDefinition(ExecConstants.ENABLE_MATERIALIZED_VIEW_REWRITE), new OptionDefinition(ExecConstants.CTAS_PARTITIONING_HASH_DISTRIBUTE_VALIDATOR), new OptionDefinition(ExecConstants.ADMIN_USERS_VALIDATOR, new OptionMetaData(OptionValue.AccessibleScopes.SYSTEM, true, false)), new OptionDefinition(ExecConstants.ADMIN_USER_GROUPS_VALIDATOR, new OptionMetaData(OptionValue.AccessibleScopes.SYSTEM, true, false)), diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/AbstractSchema.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/AbstractSchema.java index 0812d9c1b7c..3527a9b1a77 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/AbstractSchema.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/AbstractSchema.java @@ -49,6 +49,7 @@ import org.apache.calcite.schema.Table; import org.apache.commons.lang3.tuple.Pair; import org.apache.drill.common.exceptions.UserException; +import org.apache.drill.exec.dotdrill.MaterializedView; import org.apache.drill.exec.dotdrill.View; import org.apache.drill.exec.planner.logical.CreateTableEntry; import com.google.common.base.Joiner; @@ -170,10 +171,60 @@ public boolean createView(View view) throws IOException { */ public void dropView(String viewName) throws IOException { throw UserException.unsupportedError() - .message("Dropping a view is supported in schema [%s]", getSchemaPath()) + .message("Dropping a view is not supported in schema [%s]", getSchemaPath()) .build(logger); } + /** + * Create a new materialized view given definition. + * + * @param materializedView Materialized view info including name, definition, etc. + * @return Returns true if an existing materialized view is replaced with the given view. False otherwise. + * @throws IOException in case of error creating a materialized view + */ + public boolean createMaterializedView(MaterializedView materializedView) throws IOException { + throw UserException.unsupportedError() + .message("Creating materialized views is not supported in schema [%s]", getSchemaPath()) + .build(logger); + } + + /** + * Drop the materialized view with given name. + * This should remove both the definition file and the data directory. + * + * @param viewName materialized view name + * @throws IOException in case of error dropping the materialized view + */ + public void dropMaterializedView(String viewName) throws IOException { + throw UserException.unsupportedError() + .message("Dropping materialized views is not supported in schema [%s]", getSchemaPath()) + .build(logger); + } + + /** + * Refresh the materialized view with given name by re-executing its query + * and replacing the stored data. + * + * @param viewName materialized view name + * @throws IOException in case of error refreshing the materialized view + */ + public void refreshMaterializedView(String viewName) throws IOException { + throw UserException.unsupportedError() + .message("Refreshing materialized views is not supported in schema [%s]", getSchemaPath()) + .build(logger); + } + + /** + * Get the materialized view with the given name. + * + * @param viewName materialized view name + * @return the materialized view or null if not found + * @throws IOException in case of error reading the materialized view + */ + public MaterializedView getMaterializedView(String viewName) throws IOException { + return null; + } + /** * Creates table entry using table name, list of partition columns * and storage strategy used to create table folder and files diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java index bd25a97781a..c410af3b34d 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java @@ -51,6 +51,7 @@ import org.apache.drill.exec.dotdrill.DotDrillFile; import org.apache.drill.exec.dotdrill.DotDrillType; import org.apache.drill.exec.dotdrill.DotDrillUtil; +import org.apache.drill.exec.dotdrill.MaterializedView; import org.apache.drill.exec.dotdrill.View; import org.apache.drill.exec.metastore.store.FileSystemMetadataProviderManager; import org.apache.drill.exec.metastore.MetadataProviderManager; @@ -58,6 +59,7 @@ import org.apache.drill.exec.metastore.MetastoreMetadataProviderManager.MetastoreMetadataProviderConfig; import org.apache.drill.exec.planner.common.DrillStatsTable; import org.apache.drill.exec.planner.logical.CreateTableEntry; +import org.apache.drill.exec.planner.logical.DrillMaterializedViewTable; import org.apache.drill.exec.planner.logical.DrillTable; import org.apache.drill.exec.planner.logical.DrillViewTable; import org.apache.drill.exec.planner.logical.DynamicDrillTable; @@ -351,6 +353,148 @@ public void dropView(String viewName) throws IOException { getFS().delete(getViewPath(viewName), false); } + private Path getMaterializedViewPath(String name) { + return DotDrillType.MATERIALIZED_VIEW.getPath(config.getLocation(), name); + } + + private Path getMaterializedViewDataPath(String name) { + return new Path(config.getLocation(), name); + } + + @Override + public boolean createMaterializedView(MaterializedView materializedView) throws IOException { + String viewName = materializedView.getName(); + Path viewPath = getMaterializedViewPath(viewName); + Path dataPath = getMaterializedViewDataPath(viewName); + + boolean replaced = getFS().exists(viewPath); + + // If replacing, first drop the old data + if (replaced) { + if (getFS().exists(dataPath)) { + getFS().delete(dataPath, true); + } + } + + // Create the data directory for the materialized view + final FsPermission dirPerms = new FsPermission( + schemaConfig.getOption(ExecConstants.NEW_VIEW_DEFAULT_PERMS_KEY).string_val); + getFS().mkdirs(dataPath, dirPerms); + + // Set the data storage path in the materialized view + materializedView.setDataStoragePath(viewName); + + // Write the materialized view definition file + final FsPermission viewPerms = new FsPermission( + schemaConfig.getOption(ExecConstants.NEW_VIEW_DEFAULT_PERMS_KEY).string_val); + try (OutputStream stream = DrillFileSystem.create(getFS(), viewPath, viewPerms)) { + mapper.writeValue(stream, materializedView); + } + + // Mark as complete (data will be populated by the handler via CTAS-like operation) + return replaced; + } + + @Override + public void dropMaterializedView(String viewName) throws IOException { + Path viewPath = getMaterializedViewPath(viewName); + Path dataPath = getMaterializedViewDataPath(viewName); + + // Delete the definition file + if (getFS().exists(viewPath)) { + getFS().delete(viewPath, false); + } + + // Delete the data directory + if (getFS().exists(dataPath)) { + getFS().delete(dataPath, true); + } + } + + @Override + public void refreshMaterializedView(String viewName) throws IOException { + // Read the existing materialized view definition + MaterializedView mv = getMaterializedView(viewName); + if (mv == null) { + throw UserException.validationError() + .message("Materialized view [%s] not found in schema [%s]", viewName, getFullSchemaName()) + .build(logger); + } + + Path dataPath = getMaterializedViewDataPath(viewName); + + // Delete existing data + if (getFS().exists(dataPath)) { + getFS().delete(dataPath, true); + } + + // Recreate the data directory + final FsPermission dirPerms = new FsPermission( + schemaConfig.getOption(ExecConstants.NEW_VIEW_DEFAULT_PERMS_KEY).string_val); + getFS().mkdirs(dataPath, dirPerms); + + // Update the materialized view with new refresh time + MaterializedView updatedMV = mv.withRefreshInfo( + System.currentTimeMillis(), + MaterializedView.RefreshStatus.COMPLETE); + + // Write the updated definition file + Path viewPath = getMaterializedViewPath(viewName); + final FsPermission viewPerms = new FsPermission( + schemaConfig.getOption(ExecConstants.NEW_VIEW_DEFAULT_PERMS_KEY).string_val); + try (OutputStream stream = DrillFileSystem.create(getFS(), viewPath, viewPerms)) { + mapper.writeValue(stream, updatedMV); + } + } + + @Override + public MaterializedView getMaterializedView(String viewName) throws IOException { + List files = Collections.emptyList(); + try { + files = DotDrillUtil.getDotDrills(getFS(), new Path(config.getLocation()), + DrillStringUtils.removeLeadingSlash(viewName), DotDrillType.MATERIALIZED_VIEW); + } catch (UnsupportedOperationException e) { + logger.debug("The filesystem for this workspace does not support this operation.", e); + return null; + } catch (IOException e) { + logger.warn("Failure while trying to list materialized view in workspace [{}]", getFullSchemaName(), e); + return null; + } + + for (DotDrillFile f : files) { + if (f.getType() == DotDrillType.MATERIALIZED_VIEW) { + return f.getMaterializedView(mapper); + } + } + return null; + } + + private Set getMaterializedViews() { + Set viewSet = Sets.newHashSet(); + // Look for files with ".materialized_view.drill" extension. + List files; + try { + files = DotDrillUtil.getDotDrills(getFS(), new Path(config.getLocation()), DotDrillType.MATERIALIZED_VIEW); + for (DotDrillFile f : files) { + viewSet.add(f.getBaseName()); + } + } catch (UnsupportedOperationException e) { + logger.debug("The filesystem for this workspace does not support this operation.", e); + } catch (AccessControlException e) { + if (!schemaConfig.getIgnoreAuthErrors()) { + logger.debug(e.getMessage()); + throw UserException + .permissionError(e) + .message("Not authorized to list materialized views in schema [%s]", getFullSchemaName()) + .build(logger); + } + } catch (Exception e) { + logger.warn("Failure while trying to list .materialized_view.drill files in workspace [{}]", + getFullSchemaName(), e); + } + return viewSet; + } + private Set getViews() { Set viewSet = Sets.newHashSet(); // Look for files with ".view.drill" extension. @@ -385,7 +529,7 @@ private Set rawTableNames() { @Override public Set getTableNames() { - return Sets.union(rawTableNames(), getViews()); + return Sets.union(Sets.union(rawTableNames(), getViews()), getMaterializedViews()); } @Override @@ -441,7 +585,7 @@ public Table getTable(String tableName) { try { try { files = DotDrillUtil.getDotDrills(getFS(), new Path(config.getLocation()), - DrillStringUtils.removeLeadingSlash(tableName), DotDrillType.VIEW); + DrillStringUtils.removeLeadingSlash(tableName), DotDrillType.VIEW, DotDrillType.MATERIALIZED_VIEW); } catch (AccessControlException e) { if (!schemaConfig.getIgnoreAuthErrors()) { logger.debug(e.getMessage()); @@ -468,7 +612,26 @@ public Table getTable(String tableName) { } catch (IOException e) { logger.warn("Failure while trying to load {}.view.drill file in workspace [{}]", tableName, getFullSchemaName(), e); } + break; + case MATERIALIZED_VIEW: + try { + MaterializedView mv = f.getMaterializedView(mapper); + return new DrillMaterializedViewTable(mv, f.getOwner(), schemaConfig.getViewExpansionContext(), + config.getLocation()); + } catch (AccessControlException e) { + if (!schemaConfig.getIgnoreAuthErrors()) { + logger.debug(e.getMessage()); + throw UserException.permissionError(e) + .message("Not authorized to read materialized view [%s] in schema [%s]", tableName, getFullSchemaName()) + .build(logger); + } + } catch (IOException e) { + logger.warn("Failure while trying to load {}.materialized_view.drill file in workspace [{}]", + tableName, getFullSchemaName(), e); + } + break; default: + break; } } } catch (UnsupportedOperationException e) { diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/dotdrill/TestMaterializedView.java b/exec/java-exec/src/test/java/org/apache/drill/exec/dotdrill/TestMaterializedView.java new file mode 100644 index 00000000000..c82c2b8fbdb --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/dotdrill/TestMaterializedView.java @@ -0,0 +1,243 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.dotdrill; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import java.io.StringReader; +import java.io.StringWriter; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.drill.categories.SqlTest; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import com.fasterxml.jackson.databind.ObjectMapper; + +/** + * Tests for MaterializedView data model serialization and deserialization. + */ +@Category(SqlTest.class) +public class TestMaterializedView { + + private static final ObjectMapper mapper = new ObjectMapper(); + + @Test + public void testJsonSerialization() throws Exception { + List fields = Arrays.asList( + new View.Field("id", SqlTypeName.BIGINT, null, null, null, null, null, true, null, null), + new View.Field("name", SqlTypeName.VARCHAR, 100, null, null, null, null, true, null, null) + ); + List schemaPath = Arrays.asList("dfs", "tmp"); + + MaterializedView mv = new MaterializedView( + "test_mv", + "SELECT id, name FROM t1", + fields, + schemaPath, + "test_mv", + 1234567890L, + MaterializedView.RefreshStatus.COMPLETE + ); + + // Serialize to JSON + StringWriter writer = new StringWriter(); + mapper.writeValue(writer, mv); + String json = writer.toString(); + + assertNotNull(json); + assertTrue(json.contains("test_mv")); + assertTrue(json.contains("SELECT id, name FROM t1")); + assertTrue(json.contains("COMPLETE")); + + // Deserialize from JSON + MaterializedView deserialized = mapper.readValue(new StringReader(json), MaterializedView.class); + + assertEquals("test_mv", deserialized.getName()); + assertEquals("SELECT id, name FROM t1", deserialized.getSql()); + assertEquals(2, deserialized.getFields().size()); + assertEquals("dfs", deserialized.getWorkspaceSchemaPath().get(0)); + assertEquals("tmp", deserialized.getWorkspaceSchemaPath().get(1)); + assertEquals("test_mv", deserialized.getDataStoragePath()); + assertEquals(Long.valueOf(1234567890L), deserialized.getLastRefreshTime()); + assertEquals(MaterializedView.RefreshStatus.COMPLETE, deserialized.getRefreshStatus()); + } + + @Test + public void testJsonDeserializationWithMinimalFields() throws Exception { + String json = "{\"name\":\"mv1\",\"sql\":\"SELECT * FROM t\",\"fields\":[],\"workspaceSchemaPath\":[\"dfs\"]}"; + + MaterializedView mv = mapper.readValue(new StringReader(json), MaterializedView.class); + + assertEquals("mv1", mv.getName()); + assertEquals("SELECT * FROM t", mv.getSql()); + assertEquals("dfs", mv.getWorkspaceSchemaPath().get(0)); + // Default values + assertEquals("mv1", mv.getDataStoragePath()); // Defaults to name + assertEquals(MaterializedView.RefreshStatus.INCOMPLETE, mv.getRefreshStatus()); + } + + @Test + public void testMarkRefreshed() { + List fields = Collections.emptyList(); + List schemaPath = Arrays.asList("dfs", "tmp"); + + MaterializedView mv = new MaterializedView( + "test_mv", + "SELECT * FROM t1", + fields, + schemaPath, + "test_mv", + null, + MaterializedView.RefreshStatus.INCOMPLETE + ); + + assertEquals(MaterializedView.RefreshStatus.INCOMPLETE, mv.getRefreshStatus()); + + long beforeRefresh = System.currentTimeMillis(); + mv.markRefreshed(); + long afterRefresh = System.currentTimeMillis(); + + assertEquals(MaterializedView.RefreshStatus.COMPLETE, mv.getRefreshStatus()); + assertTrue(mv.getLastRefreshTime() >= beforeRefresh); + assertTrue(mv.getLastRefreshTime() <= afterRefresh); + } + + @Test + public void testWithRefreshInfo() { + List fields = Collections.emptyList(); + List schemaPath = Arrays.asList("dfs", "tmp"); + + MaterializedView mv = new MaterializedView( + "test_mv", + "SELECT * FROM t1", + fields, + schemaPath, + "test_mv", + 1000L, + MaterializedView.RefreshStatus.INCOMPLETE + ); + + MaterializedView updated = mv.withRefreshInfo(2000L, MaterializedView.RefreshStatus.COMPLETE); + + // Original should be unchanged + assertEquals(Long.valueOf(1000L), mv.getLastRefreshTime()); + assertEquals(MaterializedView.RefreshStatus.INCOMPLETE, mv.getRefreshStatus()); + + // Updated should have new values + assertEquals(Long.valueOf(2000L), updated.getLastRefreshTime()); + assertEquals(MaterializedView.RefreshStatus.COMPLETE, updated.getRefreshStatus()); + + // Other fields should be the same + assertEquals(mv.getName(), updated.getName()); + assertEquals(mv.getSql(), updated.getSql()); + } + + @Test + public void testSchemaPathLowerCase() { + List fields = Collections.emptyList(); + List schemaPath = Arrays.asList("DFS", "TMP"); + + MaterializedView mv = new MaterializedView( + "test_mv", + "SELECT * FROM t1", + fields, + schemaPath, + "test_mv", + null, + null + ); + + // Schema path should be converted to lower case + assertEquals("dfs", mv.getWorkspaceSchemaPath().get(0)); + assertEquals("tmp", mv.getWorkspaceSchemaPath().get(1)); + } + + @Test + public void testIsDynamic() { + // With no fields - dynamic + MaterializedView mvDynamic = new MaterializedView( + "mv1", + "SELECT * FROM t1", + Collections.emptyList(), + Collections.singletonList("dfs"), + "mv1", + null, + null + ); + assertTrue(mvDynamic.isDynamic()); + + // With fields - not dynamic + List fields = Arrays.asList( + new View.Field("id", SqlTypeName.BIGINT, null, null, null, null, null, true, null, null) + ); + MaterializedView mvStatic = new MaterializedView( + "mv2", + "SELECT id FROM t1", + fields, + Collections.singletonList("dfs"), + "mv2", + null, + null + ); + assertEquals(false, mvStatic.isDynamic()); + } + + @Test + public void testFieldTypes() throws Exception { + // Test various field types can be serialized/deserialized + List fields = Arrays.asList( + new View.Field("col_bigint", SqlTypeName.BIGINT, null, null, null, null, null, true, null, null), + new View.Field("col_varchar", SqlTypeName.VARCHAR, 255, null, null, null, null, true, null, null), + new View.Field("col_decimal", SqlTypeName.DECIMAL, 10, 2, null, null, null, true, null, null), + new View.Field("col_boolean", SqlTypeName.BOOLEAN, null, null, null, null, null, false, null, null), + new View.Field("col_double", SqlTypeName.DOUBLE, null, null, null, null, null, true, null, null), + new View.Field("col_timestamp", SqlTypeName.TIMESTAMP, null, null, null, null, null, true, null, null) + ); + + MaterializedView mv = new MaterializedView( + "mv_types", + "SELECT * FROM t", + fields, + Collections.singletonList("dfs"), + "mv_types", + null, + null + ); + + // Serialize and deserialize + StringWriter writer = new StringWriter(); + mapper.writeValue(writer, mv); + MaterializedView deserialized = mapper.readValue(new StringReader(writer.toString()), MaterializedView.class); + + assertEquals(6, deserialized.getFields().size()); + assertEquals(SqlTypeName.BIGINT, deserialized.getFields().get(0).getType()); + assertEquals(SqlTypeName.VARCHAR, deserialized.getFields().get(1).getType()); + assertEquals(Integer.valueOf(255), deserialized.getFields().get(1).getPrecision()); + assertEquals(SqlTypeName.DECIMAL, deserialized.getFields().get(2).getType()); + assertEquals(Integer.valueOf(10), deserialized.getFields().get(2).getPrecision()); + assertEquals(Integer.valueOf(2), deserialized.getFields().get(2).getScale()); + assertEquals(SqlTypeName.BOOLEAN, deserialized.getFields().get(3).getType()); + assertEquals(false, deserialized.getFields().get(3).getIsNullable()); + } +} diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/planner/sql/parser/TestMaterializedViewSqlParser.java b/exec/java-exec/src/test/java/org/apache/drill/exec/planner/sql/parser/TestMaterializedViewSqlParser.java new file mode 100644 index 00000000000..084ffed7fee --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/planner/sql/parser/TestMaterializedViewSqlParser.java @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.planner.sql.parser; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import org.apache.calcite.avatica.util.Quoting; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.parser.SqlParseException; +import org.apache.calcite.sql.parser.SqlParser; +import org.apache.drill.categories.SqlTest; +import org.apache.drill.exec.planner.physical.PlannerSettings; +import org.apache.drill.exec.planner.sql.parser.impl.DrillParserImpl; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * Tests for parsing materialized view SQL statements. + */ +@Category(SqlTest.class) +public class TestMaterializedViewSqlParser { + + private SqlParser.Config parserConfig() { + return SqlParser.config() + .withParserFactory(DrillParserImpl.FACTORY) + .withQuoting(Quoting.BACK_TICK) + .withIdentifierMaxLength(PlannerSettings.DEFAULT_IDENTIFIER_MAX_LENGTH); + } + + private SqlNode parse(String sql) throws SqlParseException { + SqlParser parser = SqlParser.create(sql, parserConfig()); + return parser.parseStmt(); + } + + @Test + public void testParseCreateMaterializedView() throws SqlParseException { + String sql = "CREATE MATERIALIZED VIEW mv1 AS SELECT * FROM t1"; + SqlNode node = parse(sql); + + assertNotNull(node); + assertTrue(node instanceof SqlCreateMaterializedView); + SqlCreateMaterializedView mv = (SqlCreateMaterializedView) node; + assertEquals("mv1", mv.getName()); + assertEquals(SqlCreateType.SIMPLE, mv.getSqlCreateType()); + } + + @Test + public void testParseCreateOrReplaceMaterializedView() throws SqlParseException { + String sql = "CREATE OR REPLACE MATERIALIZED VIEW mv1 AS SELECT * FROM t1"; + SqlNode node = parse(sql); + + assertNotNull(node); + assertTrue(node instanceof SqlCreateMaterializedView); + SqlCreateMaterializedView mv = (SqlCreateMaterializedView) node; + assertEquals("mv1", mv.getName()); + assertEquals(SqlCreateType.OR_REPLACE, mv.getSqlCreateType()); + } + + @Test + public void testParseCreateMaterializedViewIfNotExists() throws SqlParseException { + String sql = "CREATE MATERIALIZED VIEW IF NOT EXISTS mv1 AS SELECT * FROM t1"; + SqlNode node = parse(sql); + + assertNotNull(node); + assertTrue(node instanceof SqlCreateMaterializedView); + SqlCreateMaterializedView mv = (SqlCreateMaterializedView) node; + assertEquals("mv1", mv.getName()); + assertEquals(SqlCreateType.IF_NOT_EXISTS, mv.getSqlCreateType()); + } + + @Test + public void testParseCreateMaterializedViewWithSchema() throws SqlParseException { + String sql = "CREATE MATERIALIZED VIEW dfs.tmp.mv1 AS SELECT * FROM t1"; + SqlNode node = parse(sql); + + assertNotNull(node); + assertTrue(node instanceof SqlCreateMaterializedView); + SqlCreateMaterializedView mv = (SqlCreateMaterializedView) node; + assertEquals("mv1", mv.getName()); + assertEquals(2, mv.getSchemaPath().size()); + assertEquals("dfs", mv.getSchemaPath().get(0)); + assertEquals("tmp", mv.getSchemaPath().get(1)); + } + + @Test + public void testParseCreateMaterializedViewWithFieldList() throws SqlParseException { + String sql = "CREATE MATERIALIZED VIEW mv1 (col1, col2) AS SELECT a, b FROM t1"; + SqlNode node = parse(sql); + + assertNotNull(node); + assertTrue(node instanceof SqlCreateMaterializedView); + SqlCreateMaterializedView mv = (SqlCreateMaterializedView) node; + assertEquals("mv1", mv.getName()); + assertEquals(2, mv.getFieldNames().size()); + assertEquals("col1", mv.getFieldNames().get(0)); + assertEquals("col2", mv.getFieldNames().get(1)); + } + + @Test + public void testParseDropMaterializedView() throws SqlParseException { + String sql = "DROP MATERIALIZED VIEW mv1"; + SqlNode node = parse(sql); + + assertNotNull(node); + assertTrue(node instanceof SqlDropMaterializedView); + SqlDropMaterializedView drop = (SqlDropMaterializedView) node; + assertEquals("mv1", drop.getName()); + assertEquals(false, drop.checkViewExistence()); + } + + @Test + public void testParseDropMaterializedViewIfExists() throws SqlParseException { + String sql = "DROP MATERIALIZED VIEW IF EXISTS mv1"; + SqlNode node = parse(sql); + + assertNotNull(node); + assertTrue(node instanceof SqlDropMaterializedView); + SqlDropMaterializedView drop = (SqlDropMaterializedView) node; + assertEquals("mv1", drop.getName()); + assertEquals(true, drop.checkViewExistence()); + } + + @Test + public void testParseDropMaterializedViewWithSchema() throws SqlParseException { + String sql = "DROP MATERIALIZED VIEW dfs.tmp.mv1"; + SqlNode node = parse(sql); + + assertNotNull(node); + assertTrue(node instanceof SqlDropMaterializedView); + SqlDropMaterializedView drop = (SqlDropMaterializedView) node; + assertEquals("mv1", drop.getName()); + assertEquals(2, drop.getSchemaPath().size()); + } + + @Test + public void testParseRefreshMaterializedView() throws SqlParseException { + String sql = "REFRESH MATERIALIZED VIEW mv1"; + SqlNode node = parse(sql); + + assertNotNull(node); + assertTrue(node instanceof SqlRefreshMaterializedView); + SqlRefreshMaterializedView refresh = (SqlRefreshMaterializedView) node; + assertEquals("mv1", refresh.getName()); + } + + @Test + public void testParseRefreshMaterializedViewWithSchema() throws SqlParseException { + String sql = "REFRESH MATERIALIZED VIEW dfs.tmp.mv1"; + SqlNode node = parse(sql); + + assertNotNull(node); + assertTrue(node instanceof SqlRefreshMaterializedView); + SqlRefreshMaterializedView refresh = (SqlRefreshMaterializedView) node; + assertEquals("mv1", refresh.getName()); + assertEquals(2, refresh.getSchemaPath().size()); + } + + @Test(expected = SqlParseException.class) + public void testInvalidCreateMaterializedViewSyntax() throws SqlParseException { + // Missing AS keyword + String sql = "CREATE MATERIALIZED VIEW mv1 SELECT * FROM t1"; + parse(sql); + } + + @Test(expected = SqlParseException.class) + public void testInvalidCreateOrReplaceIfNotExists() throws SqlParseException { + // Cannot have both OR REPLACE and IF NOT EXISTS + String sql = "CREATE OR REPLACE MATERIALIZED VIEW IF NOT EXISTS mv1 AS SELECT * FROM t1"; + parse(sql); + } + + @Test + public void testUnparseCreateMaterializedView() throws SqlParseException { + String sql = "CREATE MATERIALIZED VIEW mv1 AS SELECT * FROM t1"; + SqlNode node = parse(sql); + String unparsed = node.toSqlString(null).getSql(); + + assertTrue(unparsed.contains("CREATE")); + assertTrue(unparsed.contains("MATERIALIZED")); + assertTrue(unparsed.contains("VIEW")); + assertTrue(unparsed.contains("mv1")); + } + + @Test + public void testUnparseDropMaterializedView() throws SqlParseException { + String sql = "DROP MATERIALIZED VIEW IF EXISTS mv1"; + SqlNode node = parse(sql); + String unparsed = node.toSqlString(null).getSql(); + + assertTrue(unparsed.contains("DROP")); + assertTrue(unparsed.contains("MATERIALIZED")); + assertTrue(unparsed.contains("VIEW")); + assertTrue(unparsed.contains("IF")); + assertTrue(unparsed.contains("EXISTS")); + } + + @Test + public void testUnparseRefreshMaterializedView() throws SqlParseException { + String sql = "REFRESH MATERIALIZED VIEW mv1"; + SqlNode node = parse(sql); + String unparsed = node.toSqlString(null).getSql(); + + assertTrue(unparsed.contains("REFRESH")); + assertTrue(unparsed.contains("MATERIALIZED")); + assertTrue(unparsed.contains("VIEW")); + } +} diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestMaterializedViewSupport.java b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestMaterializedViewSupport.java new file mode 100644 index 00000000000..937644def3e --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestMaterializedViewSupport.java @@ -0,0 +1,281 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.sql; + +import java.io.File; +import java.nio.file.Paths; + +import org.apache.drill.PlanTestBase; +import org.apache.drill.categories.SqlTest; +import org.apache.drill.common.exceptions.UserRemoteException; +import org.apache.drill.exec.dotdrill.DotDrillType; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import static org.apache.drill.exec.util.StoragePluginTestUtils.DFS_TMP_SCHEMA; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +/** + * Tests for materialized view support in Drill. + *

+ * Tests CREATE, DROP, and REFRESH MATERIALIZED VIEW statements. + */ +@Category(SqlTest.class) +public class TestMaterializedViewSupport extends PlanTestBase { + + @BeforeClass + public static void setupTestFiles() { + dirTestWatcher.copyResourceToRoot(Paths.get("nation")); + } + + @Test + public void testCreateMaterializedView() throws Exception { + String mvName = "test_mv_create"; + try { + // Create a simple materialized view + testBuilder() + .sqlQuery("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT * FROM cp.`region.json` LIMIT 5", mvName) + .unOrdered() + .baselineColumns("ok", "summary") + .baselineValues(true, String.format("Materialized view '%s' created successfully in 'dfs.tmp' schema", mvName)) + .go(); + + // Verify the materialized view definition file exists + File mvFile = new File(dirTestWatcher.getDfsTestTmpDir(), mvName + DotDrillType.MATERIALIZED_VIEW.getEnding()); + assertTrue("Materialized view definition file should exist", mvFile.exists()); + + } finally { + test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); + } + } + + @Test + public void testCreateOrReplaceMaterializedView() throws Exception { + String mvName = "test_mv_replace"; + try { + // Create initial materialized view + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT region_id FROM cp.`region.json` LIMIT 3", mvName); + + // Replace with different definition + testBuilder() + .sqlQuery("CREATE OR REPLACE MATERIALIZED VIEW dfs.tmp.%s AS SELECT region_id, sales_city FROM cp.`region.json` LIMIT 5", mvName) + .unOrdered() + .baselineColumns("ok", "summary") + .baselineValues(true, String.format("Materialized view '%s' replaced successfully in 'dfs.tmp' schema", mvName)) + .go(); + + } finally { + test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); + } + } + + @Test + public void testCreateMaterializedViewIfNotExists() throws Exception { + String mvName = "test_mv_if_not_exists"; + try { + // Create initial materialized view + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT * FROM cp.`region.json` LIMIT 3", mvName); + + // Try to create again with IF NOT EXISTS - should not fail + testBuilder() + .sqlQuery("CREATE MATERIALIZED VIEW IF NOT EXISTS dfs.tmp.%s AS SELECT * FROM cp.`region.json` LIMIT 5", mvName) + .unOrdered() + .baselineColumns("ok", "summary") + .baselineValues(false, String.format("A table or view with given name [%s] already exists in schema [dfs.tmp]", mvName)) + .go(); + + } finally { + test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); + } + } + + @Test + public void testDropMaterializedView() throws Exception { + String mvName = "test_mv_drop"; + + // Create materialized view + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT * FROM cp.`region.json` LIMIT 5", mvName); + + // Verify it exists + File mvFile = new File(dirTestWatcher.getDfsTestTmpDir(), mvName + DotDrillType.MATERIALIZED_VIEW.getEnding()); + assertTrue("Materialized view should exist before drop", mvFile.exists()); + + // Drop the materialized view + testBuilder() + .sqlQuery("DROP MATERIALIZED VIEW dfs.tmp.%s", mvName) + .unOrdered() + .baselineColumns("ok", "summary") + .baselineValues(true, String.format("Materialized view [%s] deleted successfully from schema [dfs.tmp].", mvName)) + .go(); + + // Verify it no longer exists + assertFalse("Materialized view should not exist after drop", mvFile.exists()); + } + + @Test + public void testDropMaterializedViewIfExists() throws Exception { + String mvName = "test_mv_drop_if_exists"; + + // Drop non-existent materialized view with IF EXISTS - should not fail + testBuilder() + .sqlQuery("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName) + .unOrdered() + .baselineColumns("ok", "summary") + .baselineValues(false, String.format("Materialized view [%s] not found in schema [dfs.tmp].", mvName)) + .go(); + } + + @Test(expected = UserRemoteException.class) + public void testDropNonExistentMaterializedView() throws Exception { + // Should throw error when dropping non-existent MV without IF EXISTS + test("DROP MATERIALIZED VIEW dfs.tmp.non_existent_mv"); + } + + @Test + public void testRefreshMaterializedView() throws Exception { + String mvName = "test_mv_refresh"; + try { + // Create materialized view + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT * FROM cp.`region.json` LIMIT 5", mvName); + + // Refresh the materialized view + testBuilder() + .sqlQuery("REFRESH MATERIALIZED VIEW dfs.tmp.%s", mvName) + .unOrdered() + .baselineColumns("ok", "summary") + .baselineValues(true, String.format("Materialized view [%s] refreshed successfully in schema [dfs.tmp].", mvName)) + .go(); + + } finally { + test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); + } + } + + @Test(expected = UserRemoteException.class) + public void testRefreshNonExistentMaterializedView() throws Exception { + // Should throw error when refreshing non-existent MV + test("REFRESH MATERIALIZED VIEW dfs.tmp.non_existent_mv"); + } + + @Test + public void testQueryMaterializedView() throws Exception { + String mvName = "test_mv_query"; + try { + // Create materialized view + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT region_id, sales_city FROM cp.`region.json` ORDER BY region_id LIMIT 3", mvName); + + // Query the materialized view + testBuilder() + .sqlQuery("SELECT * FROM dfs.tmp.%s", mvName) + .unOrdered() + .baselineColumns("region_id", "sales_city") + .baselineValues(0L, "None") + .baselineValues(1L, "San Francisco") + .baselineValues(2L, "San Diego") + .go(); + + } finally { + test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); + } + } + + @Test + public void testMaterializedViewWithAggregation() throws Exception { + String mvName = "test_mv_agg"; + try { + // Create materialized view with aggregation + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT sales_country, COUNT(*) AS cnt FROM cp.`region.json` GROUP BY sales_country", mvName); + + // Query the materialized view + testBuilder() + .sqlQuery("SELECT * FROM dfs.tmp.%s WHERE sales_country = 'No Country'", mvName) + .unOrdered() + .baselineColumns("sales_country", "cnt") + .baselineValues("No Country", 1L) + .go(); + + } finally { + test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); + } + } + + @Test + public void testMaterializedViewInShowTables() throws Exception { + String mvName = "test_mv_show"; + try { + // Create materialized view + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT * FROM cp.`region.json` LIMIT 1", mvName); + + // Verify it shows up in SHOW TABLES + String showTablesQuery = "SHOW TABLES IN dfs.tmp LIKE '%s'"; + testBuilder() + .sqlQuery(showTablesQuery, mvName) + .unOrdered() + .baselineColumns("TABLE_SCHEMA", "TABLE_NAME") + .baselineValues("dfs.tmp", mvName) + .go(); + + } finally { + test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); + } + } + + @Test(expected = UserRemoteException.class) + public void testCreateMaterializedViewOnNonWritableSchema() throws Exception { + // cp schema is not writable + test("CREATE MATERIALIZED VIEW cp.test_mv AS SELECT * FROM cp.`region.json`"); + } + + @Test(expected = UserRemoteException.class) + public void testCannotCreateMaterializedViewOverRegularView() throws Exception { + String viewName = "test_regular_view"; + String mvName = viewName; // Same name as regular view + try { + // Create a regular view first + test("CREATE VIEW dfs.tmp.%s AS SELECT * FROM cp.`region.json` LIMIT 5", viewName); + + // Try to create materialized view with same name using OR REPLACE - should fail + test("CREATE OR REPLACE MATERIALIZED VIEW dfs.tmp.%s AS SELECT * FROM cp.`region.json`", mvName); + } finally { + test("DROP VIEW IF EXISTS dfs.tmp.%s", viewName); + } + } + + @Test + public void testMaterializedViewWithFieldList() throws Exception { + String mvName = "test_mv_fields"; + try { + // Create materialized view with explicit field list + test("CREATE MATERIALIZED VIEW dfs.tmp.%s (id, city) AS SELECT region_id, sales_city FROM cp.`region.json` LIMIT 3", mvName); + + // Query the materialized view with renamed fields + testBuilder() + .sqlQuery("SELECT id, city FROM dfs.tmp.%s ORDER BY id LIMIT 2", mvName) + .ordered() + .baselineColumns("id", "city") + .baselineValues(0L, "None") + .baselineValues(1L, "San Francisco") + .go(); + + } finally { + test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); + } + } +} From 4f70b7e0c2ee4f15a20e19b32245a956234002c2 Mon Sep 17 00:00:00 2001 From: cgivre Date: Mon, 2 Feb 2026 09:29:29 -0500 Subject: [PATCH 2/8] Materialized Views Working --- .../logical/DrillMaterializedViewTable.java | 83 +++++--- .../logical/MaterializedViewRewriter.java | 193 ++++++++++++++++++ .../sql/handlers/DefaultSqlHandler.java | 7 + .../sql/handlers/MaterializedViewHandler.java | 101 +++++++-- .../parser/CompoundIdentifierConverter.java | 3 + .../drill/exec/store/AbstractSchema.java | 13 ++ .../store/dfs/WorkspaceSchemaFactory.java | 31 ++- .../src/main/resources/drill-module.conf | 1 + .../TestMaterializedViewRewriting.java | 148 ++++++++++++++ .../parser/TestMaterializedViewSqlParser.java | 36 ++-- .../exec/sql/TestMaterializedViewSupport.java | 48 +++-- 11 files changed, 591 insertions(+), 73 deletions(-) create mode 100644 exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/MaterializedViewRewriter.java create mode 100644 exec/java-exec/src/test/java/org/apache/drill/exec/planner/TestMaterializedViewRewriting.java diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillMaterializedViewTable.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillMaterializedViewTable.java index ec6f7371c54..44a39e9b522 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillMaterializedViewTable.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillMaterializedViewTable.java @@ -27,7 +27,6 @@ import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.schema.Schema.TableType; -import org.apache.calcite.schema.SchemaPlus; import org.apache.calcite.schema.Statistic; import org.apache.calcite.schema.Statistics; import org.apache.calcite.schema.TranslatableTable; @@ -40,14 +39,19 @@ /** * Represents a materialized view in the Drill query planning. *

- * Unlike regular views which expand to their definition query, materialized views - * read from pre-computed data stored in the workspace directory. - *

* A materialized view stores: *

    *
  • Definition file (.materialized_view.drill) - JSON with name, SQL, schema info
  • - *
  • Data directory - Parquet files with the pre-computed results
  • + *
  • Data directory ({name}_mv_data/) - Parquet files with pre-computed results
  • *
+ *

+ * Behavior: + *

    + *
  • Before REFRESH: queries expand the SQL definition (like a view)
  • + *
  • After REFRESH: queries scan from pre-computed Parquet data
  • + *
+ * + * @see org.apache.drill.exec.dotdrill.MaterializedView */ public class DrillMaterializedViewTable implements TranslatableTable, DrillViewInfoProvider { @@ -77,13 +81,8 @@ public Statistic getStatistic() { /** * Converts this materialized view to a RelNode for query planning. *

- * Unlike regular views, materialized views expand to their definition SQL - * which is then converted to a RelNode. The data is actually read from - * the materialized data directory, not computed fresh. - *

- * For now, we expand the view definition since the data is in Parquet format - * in a directory with the same name as the view. The storage plugin will - * handle reading the actual data. + * If the MV has been refreshed (data exists), scans from the pre-computed Parquet data. + * Otherwise, expands the SQL definition like a regular view. */ @Override public RelNode toRel(ToRelContext context, RelOptTable relOptTable) { @@ -93,13 +92,30 @@ public RelNode toRel(ToRelContext context, RelOptTable relOptTable) { RelDataType rowType = relOptTable.getRowType(); RelNode rel; + // Check if materialized data exists (REFRESH has been called) + boolean hasData = materializedView.getRefreshStatus() == MaterializedView.RefreshStatus.COMPLETE; + + // Build the SQL to execute - either scan data or expand definition + String sqlToExpand; + if (hasData) { + // Scan from the pre-computed data directory + sqlToExpand = buildDataScanSql(); + } else { + // No data yet - expand the SQL definition like a view + sqlToExpand = materializedView.getSql(); + } + + // Always use the workspace schema path for context - needed for table resolution + List schemaPath = materializedView.getWorkspaceSchemaPath(); + if (viewExpansionContext.isImpersonationEnabled()) { token = viewExpansionContext.reserveViewExpansionToken(viewOwner); - rel = expandViewForImpersonatedUser(viewExpander, materializedView.getWorkspaceSchemaPath(), - token.getSchemaTree()); + rel = viewExpander.expandView(sqlToExpand, token.getSchemaTree(), schemaPath).rel; } else { - rel = viewExpander.expandView(rowType, materializedView.getSql(), - materializedView.getWorkspaceSchemaPath(), Collections.emptyList()).rel; + // When scanning data, pass null for rowType to let Parquet schema be inferred + // When expanding SQL definition, use the MV's row type + RelDataType typeHint = hasData ? null : rowType; + rel = viewExpander.expandView(typeHint, sqlToExpand, schemaPath, Collections.emptyList()).rel; } return rel; @@ -110,17 +126,38 @@ public RelNode toRel(ToRelContext context, RelOptTable relOptTable) { } } - protected RelNode expandViewForImpersonatedUser(DrillViewExpander context, - List workspaceSchemaPath, - SchemaPlus tokenSchemaTree) { - return context.expandView(materializedView.getSql(), tokenSchemaTree, workspaceSchemaPath).rel; + /** + * Builds SQL to scan the materialized data directory. + * The data is stored in {workspace}/{mvName}_mv_data/ directory. + * We explicitly select the MV's columns to ensure proper schema matching. + */ + private String buildDataScanSql() { + String dataTableName = materializedView.getName() + "_mv_data"; + + // Build explicit column list from the MV's field definitions + List fieldNames = materializedView.getFields().stream() + .map(f -> f.getName()) + .collect(java.util.stream.Collectors.toList()); + if (fieldNames.isEmpty()) { + // Fallback to SELECT * if no fields defined (shouldn't happen for non-dynamic MVs) + return "SELECT * FROM `" + dataTableName + "`"; + } + + StringBuilder sql = new StringBuilder("SELECT "); + for (int i = 0; i < fieldNames.size(); i++) { + if (i > 0) { + sql.append(", "); + } + sql.append("`").append(fieldNames.get(i)).append("`"); + } + sql.append(" FROM `").append(dataTableName).append("`"); + return sql.toString(); } @Override public TableType getJdbcTableType() { - // Report as TABLE since materialized views store actual data - // This distinguishes them from regular views (VIEW type) - return TableType.TABLE; + // Report as MATERIALIZED_VIEW type + return TableType.MATERIALIZED_VIEW; } @Override diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/MaterializedViewRewriter.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/MaterializedViewRewriter.java new file mode 100644 index 00000000000..ac999e928c7 --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/MaterializedViewRewriter.java @@ -0,0 +1,193 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.planner.logical; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.drill.exec.dotdrill.MaterializedView; +import org.apache.drill.exec.ops.QueryContext; +import org.apache.drill.exec.store.AbstractSchema; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Utility class for materialized view query rewriting. + *

+ * When enabled via planner.enable_materialized_view_rewrite, this class attempts + * to rewrite queries to use materialized views when beneficial. + *

+ * Current implementation provides the infrastructure for MV rewriting. + * Future enhancements can add: + *

    + *
  • Structural matching using Calcite's SubstitutionVisitor
  • + *
  • Partial query matching (query is subset of MV)
  • + *
  • Aggregate rollup rewriting
  • + *
  • Cost-based selection among multiple candidate MVs
  • + *
+ */ +public class MaterializedViewRewriter { + private static final Logger logger = LoggerFactory.getLogger(MaterializedViewRewriter.class); + + private final QueryContext context; + private final SchemaPlus defaultSchema; + + public MaterializedViewRewriter(QueryContext context, SchemaPlus defaultSchema) { + this.context = context; + this.defaultSchema = defaultSchema; + } + + /** + * Attempts to rewrite the given RelNode to use a materialized view. + * + * @param relNode the query plan to potentially rewrite + * @return the rewritten plan using an MV, or the original plan if no rewrite is possible + */ + public RelNode rewrite(RelNode relNode) { + if (!context.getPlannerSettings().isMaterializedViewRewriteEnabled()) { + return relNode; + } + + // Find all available materialized views + List candidates = findCandidateMaterializedViews(); + + if (candidates.isEmpty()) { + logger.debug("No materialized views available for rewriting"); + return relNode; + } + + logger.debug("Found {} materialized view candidates for potential rewriting", candidates.size()); + + // Future: Implement structural matching here + // For now, log that rewriting is enabled but not yet implemented + for (MaterializedViewCandidate candidate : candidates) { + logger.debug("MV candidate: {} in schema {} (refreshed: {})", + candidate.getName(), + candidate.getSchemaPath(), + candidate.isRefreshed()); + } + + // Return original plan - actual matching not yet implemented + return relNode; + } + + /** + * Finds all materialized views in accessible schemas that could potentially + * be used for query rewriting. + */ + private List findCandidateMaterializedViews() { + List candidates = new ArrayList<>(); + + // Traverse accessible schemas to find MVs + collectMaterializedViews(defaultSchema, candidates); + + return candidates; + } + + /** + * Recursively collects materialized views from a schema and its subschemas. + */ + private void collectMaterializedViews(SchemaPlus schema, List candidates) { + if (schema == null) { + return; + } + + // Check if this schema supports MVs (is an AbstractSchema) + if (schema.unwrap(AbstractSchema.class) != null) { + AbstractSchema abstractSchema = schema.unwrap(AbstractSchema.class); + try { + collectMaterializedViewsFromSchema(abstractSchema, candidates); + } catch (Exception e) { + logger.debug("Error collecting MVs from schema {}: {}", schema.getName(), e.getMessage()); + } + } + + // Recurse into subschemas + Set subSchemaNames = schema.getSubSchemaNames(); + for (String subSchemaName : subSchemaNames) { + SchemaPlus subSchema = schema.getSubSchema(subSchemaName); + collectMaterializedViews(subSchema, candidates); + } + } + + /** + * Collects MVs from a specific schema. + */ + private void collectMaterializedViewsFromSchema(AbstractSchema schema, + List candidates) { + // Get table names and check for MVs + Set tableNames = schema.getTableNames(); + for (String tableName : tableNames) { + try { + MaterializedView mv = schema.getMaterializedView(tableName); + if (mv != null) { + boolean isRefreshed = mv.getRefreshStatus() == MaterializedView.RefreshStatus.COMPLETE; + candidates.add(new MaterializedViewCandidate( + mv.getName(), + schema.getFullSchemaName(), + mv, + isRefreshed)); + } + } catch (IOException e) { + logger.debug("Error reading MV {}: {}", tableName, e.getMessage()); + } + } + } + + /** + * Represents a candidate materialized view for query rewriting. + */ + public static class MaterializedViewCandidate { + private final String name; + private final String schemaPath; + private final MaterializedView materializedView; + private final boolean refreshed; + + public MaterializedViewCandidate(String name, String schemaPath, + MaterializedView materializedView, boolean refreshed) { + this.name = name; + this.schemaPath = schemaPath; + this.materializedView = materializedView; + this.refreshed = refreshed; + } + + public String getName() { + return name; + } + + public String getSchemaPath() { + return schemaPath; + } + + public MaterializedView getMaterializedView() { + return materializedView; + } + + public boolean isRefreshed() { + return refreshed; + } + + public String getSql() { + return materializedView.getSql(); + } + } +} diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java index 6cc4d3bc4bc..d03ec359300 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java @@ -78,6 +78,7 @@ import org.apache.drill.exec.planner.logical.DrillRel; import org.apache.drill.exec.planner.logical.DrillRelFactories; import org.apache.drill.exec.planner.logical.DrillScreenRel; +import org.apache.drill.exec.planner.logical.MaterializedViewRewriter; import org.apache.drill.exec.planner.logical.PreProcessLogicalRel; import org.apache.drill.exec.planner.physical.DrillDistributionTrait; import org.apache.drill.exec.planner.physical.PhysicalPlanCreator; @@ -201,6 +202,12 @@ protected ConvertedRelNode validateAndConvert(SqlNode sqlNode) throws ForemanSet RelNode rel = convertToRel(validated); rel = preprocessNode(rel); + // Attempt materialized view rewriting if enabled + if (context.getPlannerSettings().isMaterializedViewRewriteEnabled()) { + MaterializedViewRewriter mvRewriter = new MaterializedViewRewriter(context, context.getNewDefaultSchema()); + rel = mvRewriter.rewrite(rel); + } + return new ConvertedRelNode(rel, validatedTypedSqlNode.getValue()); } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/MaterializedViewHandler.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/MaterializedViewHandler.java index df0b0542927..911921a13d3 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/MaterializedViewHandler.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/MaterializedViewHandler.java @@ -19,6 +19,7 @@ import java.io.IOException; +import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.schema.Schema; @@ -32,6 +33,12 @@ import org.apache.drill.exec.dotdrill.MaterializedView; import org.apache.drill.exec.ops.QueryContext; import org.apache.drill.exec.physical.PhysicalPlan; +import org.apache.drill.exec.physical.base.PhysicalOperator; +import org.apache.drill.exec.planner.logical.CreateTableEntry; +import org.apache.drill.exec.planner.logical.DrillRel; +import org.apache.drill.exec.planner.logical.DrillScreenRel; +import org.apache.drill.exec.planner.logical.DrillWriterRel; +import org.apache.drill.exec.planner.physical.Prel; import org.apache.drill.exec.planner.sql.DirectPlan; import org.apache.drill.exec.planner.sql.SchemaUtilities; import org.apache.drill.exec.planner.sql.parser.SqlCreateMaterializedView; @@ -40,9 +47,13 @@ import org.apache.drill.exec.planner.sql.parser.SqlRefreshMaterializedView; import org.apache.drill.exec.store.AbstractSchema; import org.apache.drill.exec.work.foreman.ForemanSetupException; +import org.apache.drill.exec.work.foreman.SqlUnsupportedException; /** * Handlers for materialized view DDL commands: CREATE, DROP, and REFRESH MATERIALIZED VIEW. + *

+ * CREATE and DROP return DirectPlan with ok/summary output. + * REFRESH executes the MV query and writes data to Parquet, returning write statistics. */ public abstract class MaterializedViewHandler extends DefaultSqlHandler { private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(MaterializedViewHandler.class); @@ -56,6 +67,9 @@ public MaterializedViewHandler(SqlHandlerConfig config) { /** * Handler for CREATE MATERIALIZED VIEW DDL command. + *

+ * Creates the MV definition file. The data will be materialized on first query + * or can be explicitly populated via REFRESH MATERIALIZED VIEW. */ public static class CreateMaterializedView extends MaterializedViewHandler { @@ -64,7 +78,8 @@ public CreateMaterializedView(SqlHandlerConfig config) { } @Override - public PhysicalPlan getPlan(SqlNode sqlNode) throws ValidationException, RelConversionException, IOException, ForemanSetupException { + public PhysicalPlan getPlan(SqlNode sqlNode) throws ValidationException, RelConversionException, + IOException, ForemanSetupException { SqlCreateMaterializedView createMV = unwrap(sqlNode, SqlCreateMaterializedView.class); final String newViewName = DrillStringUtils.removeLeadingSlash(createMV.getName()); @@ -95,16 +110,19 @@ public PhysicalPlan getPlan(SqlNode sqlNode) throws ValidationException, RelConv } // Create the materialized view definition + // Use the actual schema path where the MV is created (not the session's default schema) final MaterializedView materializedView = new MaterializedView(newViewName, viewSql, - newViewRelNode.getRowType(), SchemaUtilities.getSchemaPathAsList(defaultSchema)); + newViewRelNode.getRowType(), drillSchema.getSchemaPath()); - // Create the materialized view (this will also populate the data) + // Create the materialized view definition file final boolean replaced = drillSchema.createMaterializedView(materializedView); - final String summary = String.format("Materialized view '%s' %s successfully in '%s' schema", - newViewName, replaced ? "replaced" : "created", schemaPath); + String message = replaced + ? String.format("Materialized view '%s' replaced successfully in '%s' schema", newViewName, schemaPath) + : String.format("Materialized view '%s' created successfully in '%s' schema", newViewName, schemaPath); - return DirectPlan.createDirectPlan(context, true, summary); + logger.info("Created materialized view [{}] in schema [{}]", newViewName, schemaPath); + return DirectPlan.createDirectPlan(context, true, message); } /** @@ -117,12 +135,15 @@ private boolean checkMaterializedViewCreationPossibility(AbstractSchema drillSch final String viewName = createMV.getName(); final Table table = SqlHandlerUtil.getTableFromSchema(drillSchema, viewName); - final boolean isTable = (table != null && table.getJdbcTableType() != Schema.TableType.VIEW) - || context.getSession().isTemporaryTable(drillSchema, context.getConfig(), viewName); - final boolean isView = (table != null && table.getJdbcTableType() == Schema.TableType.VIEW); - // Check if it's a materialized view by checking table type + // Check if it's a materialized view final boolean isMaterializedView = table != null && - "MATERIALIZED_VIEW".equals(table.getJdbcTableType().jdbcName); + table.getJdbcTableType() == Schema.TableType.MATERIALIZED_VIEW; + final boolean isView = (table != null && table.getJdbcTableType() == Schema.TableType.VIEW); + // Regular table check excludes views and materialized views + final boolean isTable = (table != null + && table.getJdbcTableType() != Schema.TableType.VIEW + && table.getJdbcTableType() != Schema.TableType.MATERIALIZED_VIEW) + || context.getSession().isTemporaryTable(drillSchema, context.getConfig(), viewName); SqlCreateType createType = createMV.getSqlCreateType(); switch (createType) { @@ -210,6 +231,9 @@ public PhysicalPlan getPlan(SqlNode sqlNode) throws IOException, ForemanSetupExc /** * Handler for REFRESH MATERIALIZED VIEW DDL command. + *

+ * Re-executes the MV's defining query and writes the results to Parquet files + * in the MV's data directory. Returns write statistics (like CTAS). */ public static class RefreshMaterializedView extends MaterializedViewHandler { @@ -227,19 +251,62 @@ public PhysicalPlan getPlan(SqlNode sqlNode) throws ValidationException, RelConv final String schemaPath = drillSchema.getFullSchemaName(); - final Table viewToRefresh = SqlHandlerUtil.getTableFromSchema(drillSchema, viewName); - - if (viewToRefresh == null) { + // Get the existing materialized view definition + final MaterializedView mv = drillSchema.getMaterializedView(viewName); + if (mv == null) { throw UserException.validationError() .message("Materialized view [%s] not found in schema [%s].", viewName, schemaPath) .build(logger); } - // Refresh the materialized view data + // Clear existing data directory drillSchema.refreshMaterializedView(viewName); - return DirectPlan.createDirectPlan(context, true, - String.format("Materialized view [%s] refreshed successfully in schema [%s].", viewName, schemaPath)); + // Parse and validate the MV's SQL definition + SqlNode mvQuery = config.getConverter().parse(mv.getSql()); + final ConvertedRelNode convertedRelNode = validateAndConvert(mvQuery); + final RelDataType validatedRowType = convertedRelNode.getValidatedRowType(); + final RelNode queryRelNode = convertedRelNode.getConvertedNode(); + + try { + // Get the writer entry for the MV data directory + CreateTableEntry createTableEntry = drillSchema.createMaterializedViewDataWriter(viewName); + + // Convert to Drill logical plan with writer + DrillRel drel = convertToDrel(queryRelNode, createTableEntry, validatedRowType); + + // Convert to physical plan + Prel prel = convertToPrel(drel, validatedRowType); + logAndSetTextPlan("Materialized View Refresh Physical", prel, logger); + + PhysicalOperator pop = convertToPop(prel); + PhysicalPlan plan = convertToPlan(pop, queryRelNode); + + logger.info("Refreshing materialized view [{}] in schema [{}]", viewName, schemaPath); + return plan; + + } catch (SqlUnsupportedException e) { + throw UserException.unsupportedError(e) + .message("Failed to create physical plan for materialized view refresh") + .build(logger); + } + } + + /** + * Convert to Drill logical plan with a writer on top. + */ + private DrillRel convertToDrel(RelNode relNode, CreateTableEntry createTableEntry, RelDataType queryRowType) + throws SqlUnsupportedException { + final DrillRel convertedRelNode = convertToRawDrel(relNode); + + // Put a non-trivial topProject to ensure the final output field name is preserved + final DrillRel topPreservedNameProj = queryRowType.getFieldCount() == convertedRelNode.getRowType().getFieldCount() + ? addRenamedProject(convertedRelNode, queryRowType) : convertedRelNode; + + final RelTraitSet traits = convertedRelNode.getCluster().traitSet().plus(DrillRel.DRILL_LOGICAL); + final DrillWriterRel writerRel = new DrillWriterRel(convertedRelNode.getCluster(), + traits, topPreservedNameProj, createTableEntry); + return new DrillScreenRel(writerRel.getCluster(), writerRel.getTraitSet(), writerRel); } } } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/CompoundIdentifierConverter.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/CompoundIdentifierConverter.java index d8e6ba87da5..92e61078253 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/CompoundIdentifierConverter.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/CompoundIdentifierConverter.java @@ -69,8 +69,11 @@ public class CompoundIdentifierConverter extends SqlShuttle { .put(SqlSelect.class, arrayOf(D, E, D, E, E, E, E, E, E, D, D, D)) .put(SqlCreateTable.class, arrayOf(D, D, D, E, D, D)) .put(SqlCreateView.class, arrayOf(D, E, E, D)) + .put(SqlCreateMaterializedView.class, arrayOf(D, D, E, D)) .put(DrillSqlDescribeTable.class, arrayOf(D, D, E)) .put(SqlDropView.class, arrayOf(D, D)) + .put(SqlDropMaterializedView.class, arrayOf(D, D)) + .put(SqlRefreshMaterializedView.class, arrayOf(D)) .put(SqlShowFiles.class, arrayOf(D)) .put(SqlShowSchemas.class, arrayOf(D, D)) .put(SqlUseSchema.class, arrayOf(D)) diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/AbstractSchema.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/AbstractSchema.java index 3527a9b1a77..ea871f5ecad 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/AbstractSchema.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/AbstractSchema.java @@ -225,6 +225,19 @@ public MaterializedView getMaterializedView(String viewName) throws IOException return null; } + /** + * Creates a table entry for writing data to a materialized view's data directory. + * The data will be stored in Parquet format. + * + * @param viewName materialized view name + * @return create table entry for writing MV data + */ + public CreateTableEntry createMaterializedViewDataWriter(String viewName) { + throw UserException.unsupportedError() + .message("Creating materialized view data is not supported in schema [%s]", getSchemaPath()) + .build(logger); + } + /** * Creates table entry using table name, list of partition columns * and storage strategy used to create table folder and files diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java index c410af3b34d..a800379959b 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java @@ -358,7 +358,8 @@ private Path getMaterializedViewPath(String name) { } private Path getMaterializedViewDataPath(String name) { - return new Path(config.getLocation(), name); + // Use _mv_data suffix to distinguish data directory from MV definition lookup + return new Path(config.getLocation(), name + "_mv_data"); } @Override @@ -447,6 +448,27 @@ public void refreshMaterializedView(String viewName) throws IOException { } } + @Override + public CreateTableEntry createMaterializedViewDataWriter(String viewName) { + // Use Parquet format for storing materialized view data + FormatPlugin formatPlugin = plugin.getFormatPlugin("parquet"); + if (formatPlugin == null) { + throw UserException.unsupportedError() + .message("Parquet format plugin not available for materialized view storage") + .build(logger); + } + + // Store data in a directory with _mv_data suffix to avoid name collision + // with the materialized view lookup (which uses the same base name) + String dataLocation = config.getLocation() + Path.SEPARATOR + viewName + "_mv_data"; + return new FileSystemCreateTableEntry( + (FileSystemConfig) plugin.getConfig(), + formatPlugin, + dataLocation, + Collections.emptyList(), // No partition columns for MVs + StorageStrategy.DEFAULT); + } + @Override public MaterializedView getMaterializedView(String viewName) throws IOException { List files = Collections.emptyList(); @@ -1021,8 +1043,11 @@ public void dropTable(String table) { @Override public List> getTableNamesAndTypes() { return Stream.concat( - tables.entrySet().stream().map(kv -> Pair.of(kv.getKey().sig.getName(), kv.getValue().getJdbcTableType())), - getViews().stream().map(viewName -> Pair.of(viewName, TableType.VIEW)) + Stream.concat( + tables.entrySet().stream().map(kv -> Pair.of(kv.getKey().sig.getName(), kv.getValue().getJdbcTableType())), + getViews().stream().map(viewName -> Pair.of(viewName, TableType.VIEW)) + ), + getMaterializedViews().stream().map(mvName -> Pair.of(mvName, TableType.MATERIALIZED_VIEW)) ).collect(Collectors.toList()); } diff --git a/exec/java-exec/src/main/resources/drill-module.conf b/exec/java-exec/src/main/resources/drill-module.conf index 7541a99e2dd..c0896126538 100644 --- a/exec/java-exec/src/main/resources/drill-module.conf +++ b/exec/java-exec/src/main/resources/drill-module.conf @@ -650,6 +650,7 @@ drill.exec.options: { planner.enable_join_optimization: true, planner.enable_limit0_on_scan: true, planner.enable_limit0_optimization: true, + planner.enable_materialized_view_rewrite: true, planner.enable_mergejoin: true, planner.enable_multiphase_agg: true, planner.enable_mux_exchange: true, diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/planner/TestMaterializedViewRewriting.java b/exec/java-exec/src/test/java/org/apache/drill/exec/planner/TestMaterializedViewRewriting.java new file mode 100644 index 00000000000..0b638a82911 --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/planner/TestMaterializedViewRewriting.java @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.planner; + +import java.nio.file.Paths; + +import org.apache.drill.PlanTestBase; +import org.apache.drill.categories.SqlTest; +import org.apache.drill.exec.ExecConstants; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * Tests for materialized view query rewriting. + *

+ * When planner.enable_materialized_view_rewrite is enabled, queries + * may be rewritten to use pre-computed materialized views. + */ +@Category(SqlTest.class) +public class TestMaterializedViewRewriting extends PlanTestBase { + + @BeforeClass + public static void setupTestFiles() { + dirTestWatcher.copyResourceToRoot(Paths.get("nation")); + } + + @Test + public void testRewritingEnabledByDefault() throws Exception { + String mvName = "test_mv_rewrite_enabled_default"; + try { + // Create materialized view + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT region_id, sales_city FROM cp.`region.json` WHERE region_id = 0", mvName); + + // Verify rewriting is enabled by default and queries work correctly + testBuilder() + .sqlQuery("SELECT region_id, sales_city FROM cp.`region.json` WHERE region_id = 0") + .unOrdered() + .baselineColumns("region_id", "sales_city") + .baselineValues(0L, "None") + .go(); + } finally { + test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); + } + } + + @Test + public void testRewritingCanBeEnabled() throws Exception { + String mvName = "test_mv_rewrite_enabled"; + try { + // Create materialized view + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT region_id, sales_city FROM cp.`region.json` WHERE region_id = 0", mvName); + test("REFRESH MATERIALIZED VIEW dfs.tmp.%s", mvName); + + // Enable rewriting + test("ALTER SESSION SET `%s` = true", ExecConstants.ENABLE_MATERIALIZED_VIEW_REWRITE_KEY); + + // Query should still work (even though actual matching isn't implemented yet) + testBuilder() + .sqlQuery("SELECT region_id, sales_city FROM cp.`region.json` WHERE region_id = 0") + .unOrdered() + .baselineColumns("region_id", "sales_city") + .baselineValues(0L, "None") + .go(); + } finally { + test("ALTER SESSION SET `%s` = false", ExecConstants.ENABLE_MATERIALIZED_VIEW_REWRITE_KEY); + test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); + } + } + + @Test + public void testRewritingWithNoMaterializedViews() throws Exception { + try { + // Enable rewriting + test("ALTER SESSION SET `%s` = true", ExecConstants.ENABLE_MATERIALIZED_VIEW_REWRITE_KEY); + + // Query should work even when no MVs exist + testBuilder() + .sqlQuery("SELECT region_id, sales_city FROM cp.`region.json` WHERE region_id = 0") + .unOrdered() + .baselineColumns("region_id", "sales_city") + .baselineValues(0L, "None") + .go(); + } finally { + test("ALTER SESSION SET `%s` = false", ExecConstants.ENABLE_MATERIALIZED_VIEW_REWRITE_KEY); + } + } + + @Test + public void testQueryMVDirectlyWithRewritingEnabled() throws Exception { + String mvName = "test_mv_direct_query"; + try { + // Create and refresh materialized view + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT region_id, sales_city FROM cp.`region.json` WHERE region_id = 0", mvName); + test("REFRESH MATERIALIZED VIEW dfs.tmp.%s", mvName); + + // Enable rewriting + test("ALTER SESSION SET `%s` = true", ExecConstants.ENABLE_MATERIALIZED_VIEW_REWRITE_KEY); + + // Query the MV directly - should still work + testBuilder() + .sqlQuery("SELECT * FROM dfs.tmp.%s", mvName) + .unOrdered() + .baselineColumns("region_id", "sales_city") + .baselineValues(0L, "None") + .go(); + } finally { + test("ALTER SESSION SET `%s` = false", ExecConstants.ENABLE_MATERIALIZED_VIEW_REWRITE_KEY); + test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); + } + } + + @Test + public void testAggregateQueryWithRewritingEnabled() throws Exception { + String mvName = "test_mv_agg_rewrite"; + try { + // Create MV with aggregation on a simple filter + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT sales_country, COUNT(*) AS cnt FROM cp.`region.json` WHERE region_id < 5 GROUP BY sales_country", mvName); + test("REFRESH MATERIALIZED VIEW dfs.tmp.%s", mvName); + + // Enable rewriting + test("ALTER SESSION SET `%s` = true", ExecConstants.ENABLE_MATERIALIZED_VIEW_REWRITE_KEY); + + // Run aggregate query (same as MV definition) + // With future matching, this could be rewritten to use the MV + // For now just verify the query works with rewriting enabled + test("SELECT sales_country, COUNT(*) AS cnt FROM cp.`region.json` WHERE region_id < 5 GROUP BY sales_country"); + } finally { + test("ALTER SESSION SET `%s` = false", ExecConstants.ENABLE_MATERIALIZED_VIEW_REWRITE_KEY); + test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); + } + } +} diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/planner/sql/parser/TestMaterializedViewSqlParser.java b/exec/java-exec/src/test/java/org/apache/drill/exec/planner/sql/parser/TestMaterializedViewSqlParser.java index 084ffed7fee..a57254d4bf9 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/planner/sql/parser/TestMaterializedViewSqlParser.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/planner/sql/parser/TestMaterializedViewSqlParser.java @@ -57,7 +57,7 @@ public void testParseCreateMaterializedView() throws SqlParseException { assertNotNull(node); assertTrue(node instanceof SqlCreateMaterializedView); SqlCreateMaterializedView mv = (SqlCreateMaterializedView) node; - assertEquals("mv1", mv.getName()); + assertEquals("MV1", mv.getName()); assertEquals(SqlCreateType.SIMPLE, mv.getSqlCreateType()); } @@ -69,7 +69,7 @@ public void testParseCreateOrReplaceMaterializedView() throws SqlParseException assertNotNull(node); assertTrue(node instanceof SqlCreateMaterializedView); SqlCreateMaterializedView mv = (SqlCreateMaterializedView) node; - assertEquals("mv1", mv.getName()); + assertEquals("MV1", mv.getName()); assertEquals(SqlCreateType.OR_REPLACE, mv.getSqlCreateType()); } @@ -81,7 +81,7 @@ public void testParseCreateMaterializedViewIfNotExists() throws SqlParseExceptio assertNotNull(node); assertTrue(node instanceof SqlCreateMaterializedView); SqlCreateMaterializedView mv = (SqlCreateMaterializedView) node; - assertEquals("mv1", mv.getName()); + assertEquals("MV1", mv.getName()); assertEquals(SqlCreateType.IF_NOT_EXISTS, mv.getSqlCreateType()); } @@ -93,10 +93,10 @@ public void testParseCreateMaterializedViewWithSchema() throws SqlParseException assertNotNull(node); assertTrue(node instanceof SqlCreateMaterializedView); SqlCreateMaterializedView mv = (SqlCreateMaterializedView) node; - assertEquals("mv1", mv.getName()); + assertEquals("MV1", mv.getName()); assertEquals(2, mv.getSchemaPath().size()); - assertEquals("dfs", mv.getSchemaPath().get(0)); - assertEquals("tmp", mv.getSchemaPath().get(1)); + assertEquals("DFS", mv.getSchemaPath().get(0)); + assertEquals("TMP", mv.getSchemaPath().get(1)); } @Test @@ -107,10 +107,10 @@ public void testParseCreateMaterializedViewWithFieldList() throws SqlParseExcept assertNotNull(node); assertTrue(node instanceof SqlCreateMaterializedView); SqlCreateMaterializedView mv = (SqlCreateMaterializedView) node; - assertEquals("mv1", mv.getName()); + assertEquals("MV1", mv.getName()); assertEquals(2, mv.getFieldNames().size()); - assertEquals("col1", mv.getFieldNames().get(0)); - assertEquals("col2", mv.getFieldNames().get(1)); + assertEquals("COL1", mv.getFieldNames().get(0)); + assertEquals("COL2", mv.getFieldNames().get(1)); } @Test @@ -121,7 +121,7 @@ public void testParseDropMaterializedView() throws SqlParseException { assertNotNull(node); assertTrue(node instanceof SqlDropMaterializedView); SqlDropMaterializedView drop = (SqlDropMaterializedView) node; - assertEquals("mv1", drop.getName()); + assertEquals("MV1", drop.getName()); assertEquals(false, drop.checkViewExistence()); } @@ -133,7 +133,7 @@ public void testParseDropMaterializedViewIfExists() throws SqlParseException { assertNotNull(node); assertTrue(node instanceof SqlDropMaterializedView); SqlDropMaterializedView drop = (SqlDropMaterializedView) node; - assertEquals("mv1", drop.getName()); + assertEquals("MV1", drop.getName()); assertEquals(true, drop.checkViewExistence()); } @@ -145,7 +145,7 @@ public void testParseDropMaterializedViewWithSchema() throws SqlParseException { assertNotNull(node); assertTrue(node instanceof SqlDropMaterializedView); SqlDropMaterializedView drop = (SqlDropMaterializedView) node; - assertEquals("mv1", drop.getName()); + assertEquals("MV1", drop.getName()); assertEquals(2, drop.getSchemaPath().size()); } @@ -157,7 +157,7 @@ public void testParseRefreshMaterializedView() throws SqlParseException { assertNotNull(node); assertTrue(node instanceof SqlRefreshMaterializedView); SqlRefreshMaterializedView refresh = (SqlRefreshMaterializedView) node; - assertEquals("mv1", refresh.getName()); + assertEquals("MV1", refresh.getName()); } @Test @@ -168,7 +168,7 @@ public void testParseRefreshMaterializedViewWithSchema() throws SqlParseExceptio assertNotNull(node); assertTrue(node instanceof SqlRefreshMaterializedView); SqlRefreshMaterializedView refresh = (SqlRefreshMaterializedView) node; - assertEquals("mv1", refresh.getName()); + assertEquals("MV1", refresh.getName()); assertEquals(2, refresh.getSchemaPath().size()); } @@ -190,19 +190,19 @@ public void testInvalidCreateOrReplaceIfNotExists() throws SqlParseException { public void testUnparseCreateMaterializedView() throws SqlParseException { String sql = "CREATE MATERIALIZED VIEW mv1 AS SELECT * FROM t1"; SqlNode node = parse(sql); - String unparsed = node.toSqlString(null).getSql(); + String unparsed = node.toSqlString(null, true).getSql(); assertTrue(unparsed.contains("CREATE")); assertTrue(unparsed.contains("MATERIALIZED")); assertTrue(unparsed.contains("VIEW")); - assertTrue(unparsed.contains("mv1")); + assertTrue(unparsed.contains("MV1") || unparsed.contains("`MV1`")); } @Test public void testUnparseDropMaterializedView() throws SqlParseException { String sql = "DROP MATERIALIZED VIEW IF EXISTS mv1"; SqlNode node = parse(sql); - String unparsed = node.toSqlString(null).getSql(); + String unparsed = node.toSqlString(null, true).getSql(); assertTrue(unparsed.contains("DROP")); assertTrue(unparsed.contains("MATERIALIZED")); @@ -215,7 +215,7 @@ public void testUnparseDropMaterializedView() throws SqlParseException { public void testUnparseRefreshMaterializedView() throws SqlParseException { String sql = "REFRESH MATERIALIZED VIEW mv1"; SqlNode node = parse(sql); - String unparsed = node.toSqlString(null).getSql(); + String unparsed = node.toSqlString(null, true).getSql(); assertTrue(unparsed.contains("REFRESH")); assertTrue(unparsed.contains("MATERIALIZED")); diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestMaterializedViewSupport.java b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestMaterializedViewSupport.java index 937644def3e..06f7c929a7b 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestMaterializedViewSupport.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestMaterializedViewSupport.java @@ -28,7 +28,6 @@ import org.junit.Test; import org.junit.experimental.categories.Category; -import static org.apache.drill.exec.util.StoragePluginTestUtils.DFS_TMP_SCHEMA; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; @@ -153,14 +152,18 @@ public void testRefreshMaterializedView() throws Exception { String mvName = "test_mv_refresh"; try { // Create materialized view - test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT * FROM cp.`region.json` LIMIT 5", mvName); + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT region_id, sales_city FROM cp.`region.json` WHERE region_id = 0", mvName); - // Refresh the materialized view + // Refresh the materialized view - this writes data to Parquet + // The output is write statistics (Fragment, records, batches) + test("REFRESH MATERIALIZED VIEW dfs.tmp.%s", mvName); + + // Verify we can query from the materialized data testBuilder() - .sqlQuery("REFRESH MATERIALIZED VIEW dfs.tmp.%s", mvName) + .sqlQuery("SELECT * FROM dfs.tmp.%s", mvName) .unOrdered() - .baselineColumns("ok", "summary") - .baselineValues(true, String.format("Materialized view [%s] refreshed successfully in schema [dfs.tmp].", mvName)) + .baselineColumns("region_id", "sales_city") + .baselineValues(0L, "None") .go(); } finally { @@ -175,20 +178,41 @@ public void testRefreshNonExistentMaterializedView() throws Exception { } @Test - public void testQueryMaterializedView() throws Exception { - String mvName = "test_mv_query"; + public void testQueryMaterializedViewBeforeRefresh() throws Exception { + String mvName = "test_mv_query_before_refresh"; + try { + // Create materialized view (no REFRESH yet) + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT region_id, sales_city FROM cp.`region.json` WHERE region_id = 0", mvName); + + // Query the MV before REFRESH - should expand SQL definition (like a view) + testBuilder() + .sqlQuery("SELECT * FROM dfs.tmp.%s", mvName) + .unOrdered() + .baselineColumns("region_id", "sales_city") + .baselineValues(0L, "None") + .go(); + + } finally { + test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); + } + } + + @Test + public void testQueryMaterializedViewAfterRefresh() throws Exception { + String mvName = "test_mv_query_after_refresh"; try { // Create materialized view - test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT region_id, sales_city FROM cp.`region.json` ORDER BY region_id LIMIT 3", mvName); + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT region_id, sales_city FROM cp.`region.json` WHERE region_id = 0", mvName); - // Query the materialized view + // Refresh to populate data + test("REFRESH MATERIALIZED VIEW dfs.tmp.%s", mvName); + + // Query the MV after REFRESH - should read from Parquet data testBuilder() .sqlQuery("SELECT * FROM dfs.tmp.%s", mvName) .unOrdered() .baselineColumns("region_id", "sales_city") .baselineValues(0L, "None") - .baselineValues(1L, "San Francisco") - .baselineValues(2L, "San Diego") .go(); } finally { From 2ef36d4beac1c811380cc331c3b068584881e04d Mon Sep 17 00:00:00 2001 From: cgivre Date: Mon, 2 Feb 2026 12:35:01 -0500 Subject: [PATCH 3/8] Final work --- docs/dev/DevDocs.md | 4 + docs/dev/MaterializedViews.md | 346 ++++++++++++++++ .../logical/MaterializedViewRewriter.java | 144 +++++-- .../sql/handlers/DefaultSqlHandler.java | 3 +- .../store/dfs/WorkspaceSchemaFactory.java | 84 ++++ .../TestMaterializedViewRewriting.java | 388 ++++++++++++++++-- .../metastore/iceberg/IcebergMetastore.java | 10 + .../IcebergMaterializedViews.java | 105 +++++ .../MaterializedViewKey.java | 110 +++++ ...MaterializedViewsOperationTransformer.java | 67 +++ ...aterializedViewsOutputDataTransformer.java | 57 +++ .../MaterializedViewsTransformer.java | 58 +++ .../config/IcebergConfigConstants.java | 15 + .../resources/drill-metastore-module.conf | 8 + .../org/apache/drill/metastore/Metastore.java | 6 + .../drill/metastore/MetastoreColumn.java | 11 +- .../BasicMaterializedViewsRequests.java | 168 ++++++++ .../MaterializedViewMetadataUnit.java | 383 +++++++++++++++++ .../materializedviews/MaterializedViews.java | 38 ++ ...aterializedViewsMetadataTypeValidator.java | 38 ++ .../metastore/metadata/MetadataType.java | 7 +- .../drill/metastore/mongo/MongoMetastore.java | 6 + .../drill/metastore/rdbms/RdbmsMetastore.java | 6 + 23 files changed, 2005 insertions(+), 57 deletions(-) create mode 100644 docs/dev/MaterializedViews.md create mode 100644 metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/components/materializedviews/IcebergMaterializedViews.java create mode 100644 metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/components/materializedviews/MaterializedViewKey.java create mode 100644 metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/components/materializedviews/MaterializedViewsOperationTransformer.java create mode 100644 metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/components/materializedviews/MaterializedViewsOutputDataTransformer.java create mode 100644 metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/components/materializedviews/MaterializedViewsTransformer.java create mode 100644 metastore/metastore-api/src/main/java/org/apache/drill/metastore/components/materializedviews/BasicMaterializedViewsRequests.java create mode 100644 metastore/metastore-api/src/main/java/org/apache/drill/metastore/components/materializedviews/MaterializedViewMetadataUnit.java create mode 100644 metastore/metastore-api/src/main/java/org/apache/drill/metastore/components/materializedviews/MaterializedViews.java create mode 100644 metastore/metastore-api/src/main/java/org/apache/drill/metastore/components/materializedviews/MaterializedViewsMetadataTypeValidator.java diff --git a/docs/dev/DevDocs.md b/docs/dev/DevDocs.md index 3d64b7bc9f0..eef8105b4d6 100644 --- a/docs/dev/DevDocs.md +++ b/docs/dev/DevDocs.md @@ -23,3 +23,7 @@ For more info about the use of maven see [Maven.md](Maven.md) ## Jetty 12 Migration For information about the Jetty 12 upgrade, known limitations, and developer guidelines see [Jetty12Migration.md](Jetty12Migration.md) + +## Materialized Views + +For information about materialized view support, including SQL syntax, query rewriting, and metastore integration, see [MaterializedViews.md](MaterializedViews.md) diff --git a/docs/dev/MaterializedViews.md b/docs/dev/MaterializedViews.md new file mode 100644 index 00000000000..c98fb68908b --- /dev/null +++ b/docs/dev/MaterializedViews.md @@ -0,0 +1,346 @@ +# Materialized Views + +Materialized views in Apache Drill provide a mechanism to store pre-computed query results for improved query performance. Unlike regular views which are virtual and execute the underlying query each time they are accessed, materialized views persist the query results as physical data that can be queried directly. + +## Overview + +Materialized views are useful for: +- Accelerating frequently executed queries with complex aggregations or joins +- Reducing compute resources for repetitive analytical workloads +- Providing consistent snapshots of data at a point in time + +Drill's materialized view implementation includes: +- SQL syntax for creating, dropping, and refreshing materialized views +- Automatic query rewriting using Calcite's SubstitutionVisitor +- Integration with Drill Metastore for centralized metadata management +- Parquet-based data storage for efficient columnar access + +## SQL Syntax + +### CREATE MATERIALIZED VIEW + +```sql +CREATE MATERIALIZED VIEW [schema.]view_name AS select_statement +CREATE OR REPLACE MATERIALIZED VIEW [schema.]view_name AS select_statement +CREATE MATERIALIZED VIEW IF NOT EXISTS [schema.]view_name AS select_statement +``` + +Examples: + +```sql +-- Create a materialized view with aggregations +CREATE MATERIALIZED VIEW dfs.tmp.sales_summary AS +SELECT region, product_category, SUM(amount) as total_sales, COUNT(*) as num_transactions +FROM dfs.`/data/sales` +GROUP BY region, product_category; + +-- Create or replace an existing materialized view +CREATE OR REPLACE MATERIALIZED VIEW dfs.tmp.customer_stats AS +SELECT customer_id, COUNT(*) as order_count, AVG(order_total) as avg_order +FROM dfs.`/data/orders` +GROUP BY customer_id; + +-- Create only if it doesn't exist +CREATE MATERIALIZED VIEW IF NOT EXISTS dfs.tmp.daily_metrics AS +SELECT date_col, SUM(value) as daily_total +FROM dfs.`/data/metrics` +GROUP BY date_col; +``` + +### DROP MATERIALIZED VIEW + +```sql +DROP MATERIALIZED VIEW [schema.]view_name +DROP MATERIALIZED VIEW IF EXISTS [schema.]view_name +``` + +Examples: + +```sql +-- Drop a materialized view (error if not exists) +DROP MATERIALIZED VIEW dfs.tmp.sales_summary; + +-- Drop only if it exists (no error if not exists) +DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.old_view; +``` + +### REFRESH MATERIALIZED VIEW + +```sql +REFRESH MATERIALIZED VIEW [schema.]view_name +``` + +The REFRESH command re-executes the underlying query and replaces the stored data with fresh results. + +Example: + +```sql +-- Refresh the materialized view with current data +REFRESH MATERIALIZED VIEW dfs.tmp.sales_summary; +``` + +## Query Rewriting + +Drill supports automatic query rewriting where queries against base tables can be transparently rewritten to use materialized views when appropriate. This feature leverages Apache Calcite's SubstitutionVisitor for structural query matching. + +### Enabling Query Rewriting + +Query rewriting is controlled by the `planner.enable_materialized_view_rewrite` option: + +```sql +-- Enable materialized view rewriting (enabled by default) +SET `planner.enable_materialized_view_rewrite` = true; + +-- Disable materialized view rewriting +SET `planner.enable_materialized_view_rewrite` = false; +``` + +### How Rewriting Works + +When query rewriting is enabled, Drill's query planner: + +1. Discovers all available materialized views in accessible schemas +2. Filters candidates to those with COMPLETE refresh status +3. For each candidate, parses the MV's defining SQL and converts it to a relational expression +4. Uses Calcite's SubstitutionVisitor to check if the MV's query structure matches part or all of the user's query +5. If a match is found, substitutes the matching portion with a scan of the materialized view data +6. Selects the rewritten plan if it offers better performance characteristics + +### Rewriting Scenarios + +Query rewriting can apply in several scenarios: + +**Exact Match**: The user's query exactly matches the MV definition. + +```sql +-- MV definition +CREATE MATERIALIZED VIEW dfs.tmp.region_totals AS +SELECT r_regionkey, COUNT(*) as cnt FROM cp.`region.json` GROUP BY r_regionkey; + +-- This query will use the MV +SELECT r_regionkey, COUNT(*) as cnt FROM cp.`region.json` GROUP BY r_regionkey; +``` + +**Partial Match with Additional Filters**: The user's query adds filters on top of the MV. + +```sql +-- This query may use the MV and apply the filter +SELECT r_regionkey, cnt FROM dfs.tmp.region_totals WHERE cnt > 10; +``` + +**Aggregate Rollup**: Higher-level aggregations computed from MV aggregates. + +### Viewing the Execution Plan + +Use EXPLAIN to see if a materialized view is being used: + +```sql +EXPLAIN PLAN FOR +SELECT r_regionkey, COUNT(*) FROM cp.`region.json` GROUP BY r_regionkey; +``` + +If the MV is used, the plan will show a scan of the materialized view data location rather than the original table. + +## Storage Architecture + +### Definition Storage + +Materialized view definitions are stored as JSON files with the `.materialized_view.drill` extension in the workspace directory. This follows the same pattern as regular Drill views (`.view.drill` files). + +The definition file contains: +- View name +- Defining SQL statement +- Field names and types +- Workspace schema path +- Data storage path +- Last refresh timestamp +- Refresh status (PENDING or COMPLETE) + +Example definition file structure: + +```json +{ + "name": "sales_summary", + "sql": "SELECT region, SUM(amount) as total FROM sales GROUP BY region", + "fields": [ + {"name": "region", "type": "VARCHAR"}, + {"name": "total", "type": "DOUBLE"} + ], + "workspaceSchemaPath": ["dfs", "tmp"], + "dataStoragePath": "sales_summary", + "lastRefreshTime": 1706900000000, + "refreshStatus": "COMPLETE" +} +``` + +### Data Storage + +Materialized view data is stored as Parquet files in a directory named `{view_name}_mv_data` within the workspace. Parquet format provides: +- Efficient columnar storage +- Compression +- Predicate pushdown support +- Schema evolution capabilities + +For a materialized view named `sales_summary` in `dfs.tmp`, the storage structure would be: + +``` +/tmp/ + sales_summary.materialized_view.drill # Definition file + sales_summary_mv_data/ # Data directory + 0_0_0.parquet # Data files + 0_0_1.parquet + ... +``` + +## Metastore Integration + +When Drill Metastore is enabled, materialized view metadata is automatically synchronized to the central metastore. This provides: +- Centralized metadata management across the cluster +- Better discoverability of materialized views +- Integration with metadata-driven query optimization + +### Enabling Metastore Integration + +Set the `metastore.enabled` option to enable metastore integration: + +```sql +SET `metastore.enabled` = true; +``` + +When enabled, the following operations automatically sync to the metastore: +- CREATE MATERIALIZED VIEW: Stores MV metadata in metastore +- DROP MATERIALIZED VIEW: Removes MV metadata from metastore +- REFRESH MATERIALIZED VIEW: Updates MV metadata in metastore + +### Metastore Schema + +The MaterializedViewMetadataUnit stored in the metastore contains: + +| Field | Type | Description | +|-------|------|-------------| +| storagePlugin | String | Storage plugin name (e.g., "dfs") | +| workspace | String | Workspace name (e.g., "tmp") | +| name | String | Materialized view name | +| owner | String | Owner username | +| sql | String | Defining SQL statement | +| workspaceSchemaPath | List | Schema path components | +| dataLocation | String | Path to data directory | +| refreshStatus | String | PENDING or COMPLETE | +| lastRefreshTime | Long | Timestamp of last refresh | +| lastModifiedTime | Long | Timestamp of last modification | + +## Configuration Options + +| Option | Default | Description | +|--------|---------|-------------| +| `planner.enable_materialized_view_rewrite` | true | Enables automatic query rewriting to use materialized views | +| `metastore.enabled` | false | Enables Drill Metastore for centralized metadata storage | + +## Lifecycle Management + +### Creating a Materialized View + +1. Parse and validate the SQL statement +2. Create the data directory in the workspace +3. Execute the defining query and write results as Parquet +4. Write the definition file with COMPLETE status +5. Sync metadata to metastore (if enabled) + +### Refreshing a Materialized View + +1. Read the existing definition file +2. Delete the existing data directory +3. Re-execute the defining query and write new results +4. Update the definition file with new refresh timestamp +5. Sync updated metadata to metastore (if enabled) + +### Dropping a Materialized View + +1. Delete the definition file +2. Delete the data directory and all contents +3. Remove metadata from metastore (if enabled) + +## Limitations + +Current limitations of the materialized view implementation: + +1. **Full Refresh Only**: Incremental refresh is not yet supported. Each refresh completely replaces the stored data. + +2. **No Automatic Refresh**: Materialized views must be manually refreshed. There is no automatic refresh mechanism based on source data changes. + +3. **Single Workspace**: Materialized views can only be created in file-system based workspaces that support write operations. + +4. **No Partitioning**: Materialized view data is not partitioned. All data is stored in a single directory. + +5. **Query Rewriting Scope**: Query rewriting works best for exact or near-exact matches. Complex transformations may not be recognized. + +## Implementation Details + +### Key Classes + +| Class | Package | Description | +|-------|---------|-------------| +| MaterializedView | org.apache.drill.exec.dotdrill | Data model for MV definition | +| DrillMaterializedViewTable | org.apache.drill.exec.planner.logical | TranslatableTable implementation | +| MaterializedViewHandler | org.apache.drill.exec.planner.sql.handlers | SQL handler for CREATE/DROP/REFRESH | +| MaterializedViewRewriter | org.apache.drill.exec.planner.logical | Query rewriting using Calcite | +| SqlCreateMaterializedView | org.apache.drill.exec.planner.sql.parser | SQL parser for CREATE | +| SqlDropMaterializedView | org.apache.drill.exec.planner.sql.parser | SQL parser for DROP | +| SqlRefreshMaterializedView | org.apache.drill.exec.planner.sql.parser | SQL parser for REFRESH | + +### Parser Grammar + +The SQL parser grammar for materialized views is defined in `parserImpls.ftl`. The grammar supports: +- CREATE [OR REPLACE] MATERIALIZED VIEW [IF NOT EXISTS] +- DROP MATERIALIZED VIEW [IF EXISTS] +- REFRESH MATERIALIZED VIEW + +### Query Rewriting Process + +The MaterializedViewRewriter class implements query rewriting: + +1. **Discovery**: Scans all accessible schemas for materialized views +2. **Filtering**: Selects candidates with COMPLETE refresh status +3. **Matching**: Uses Calcite's SubstitutionVisitor to match query structures +4. **Substitution**: Replaces matched portions with MV scans +5. **Selection**: Returns the first successful substitution + +The SubstitutionVisitor performs structural matching by: +- Comparing relational expression trees +- Identifying equivalent subexpressions +- Handling column renaming and reordering +- Supporting partial matches with residual predicates + +## Testing + +### Unit Tests + +- `TestMaterializedViewSqlParser`: Parser syntax validation +- `TestMaterializedView`: Data model serialization tests + +### Integration Tests + +- `TestMaterializedViewSupport`: End-to-end CREATE/DROP/REFRESH tests +- `TestMaterializedViewRewriting`: Query rewriting scenarios + +Run the tests with: + +```bash +mvn test -pl exec/java-exec -Dtest='TestMaterializedView*' +``` + +## Future Enhancements + +Planned improvements for future releases: + +1. **Incremental Refresh**: Support for refreshing only changed data based on source table modifications. + +2. **Automatic Refresh**: Scheduled or trigger-based automatic refresh mechanisms. + +3. **Partitioned Storage**: Partition materialized view data for better query performance. + +4. **Cost-Based Selection**: When multiple MVs match, select based on estimated query cost. + +5. **Staleness Tracking**: Track source table changes to identify stale materialized views. + +6. **INFORMATION_SCHEMA Integration**: Expose materialized views in INFORMATION_SCHEMA tables. diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/MaterializedViewRewriter.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/MaterializedViewRewriter.java index ac999e928c7..c9927571f32 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/MaterializedViewRewriter.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/MaterializedViewRewriter.java @@ -22,10 +22,14 @@ import java.util.List; import java.util.Set; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.SubstitutionVisitor; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelRoot; import org.apache.calcite.schema.SchemaPlus; import org.apache.drill.exec.dotdrill.MaterializedView; import org.apache.drill.exec.ops.QueryContext; +import org.apache.drill.exec.planner.sql.conversion.SqlConverter; import org.apache.drill.exec.store.AbstractSchema; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -36,58 +40,152 @@ * When enabled via planner.enable_materialized_view_rewrite, this class attempts * to rewrite queries to use materialized views when beneficial. *

- * Current implementation provides the infrastructure for MV rewriting. - * Future enhancements can add: - *

    - *
  • Structural matching using Calcite's SubstitutionVisitor
  • - *
  • Partial query matching (query is subset of MV)
  • - *
  • Aggregate rollup rewriting
  • - *
  • Cost-based selection among multiple candidate MVs
  • - *
+ * The rewriting uses Calcite's SubstitutionVisitor to perform structural matching + * between the user's query and available materialized view definitions. If the + * query matches (or is a subset of) an MV's definition, the query is rewritten + * to scan from the pre-computed MV data instead. */ public class MaterializedViewRewriter { private static final Logger logger = LoggerFactory.getLogger(MaterializedViewRewriter.class); private final QueryContext context; private final SchemaPlus defaultSchema; + private final SqlConverter sqlConverter; - public MaterializedViewRewriter(QueryContext context, SchemaPlus defaultSchema) { + public MaterializedViewRewriter(QueryContext context, SchemaPlus defaultSchema, SqlConverter sqlConverter) { this.context = context; this.defaultSchema = defaultSchema; + this.sqlConverter = sqlConverter; } /** * Attempts to rewrite the given RelNode to use a materialized view. * - * @param relNode the query plan to potentially rewrite + * @param queryRel the query plan to potentially rewrite * @return the rewritten plan using an MV, or the original plan if no rewrite is possible */ - public RelNode rewrite(RelNode relNode) { + public RelNode rewrite(RelNode queryRel) { if (!context.getPlannerSettings().isMaterializedViewRewriteEnabled()) { - return relNode; + return queryRel; } - // Find all available materialized views + // Find all available materialized views that have been refreshed List candidates = findCandidateMaterializedViews(); if (candidates.isEmpty()) { - logger.debug("No materialized views available for rewriting"); - return relNode; + logger.debug("No refreshed materialized views available for rewriting"); + return queryRel; } logger.debug("Found {} materialized view candidates for potential rewriting", candidates.size()); - // Future: Implement structural matching here - // For now, log that rewriting is enabled but not yet implemented + // Try each candidate MV for structural matching for (MaterializedViewCandidate candidate : candidates) { - logger.debug("MV candidate: {} in schema {} (refreshed: {})", - candidate.getName(), - candidate.getSchemaPath(), - candidate.isRefreshed()); + if (!candidate.isRefreshed()) { + logger.debug("Skipping MV {} - not refreshed", candidate.getName()); + continue; + } + + try { + RelNode rewritten = tryRewriteWithMV(queryRel, candidate); + if (rewritten != null) { + logger.info("Query rewritten to use materialized view: {}", candidate.getName()); + return rewritten; + } + } catch (Exception e) { + logger.debug("Failed to rewrite with MV {}: {}", candidate.getName(), e.getMessage()); + } + } + + logger.debug("No materialized view matched the query"); + return queryRel; + } + + /** + * Attempts to rewrite the query using a specific materialized view. + * + * @param queryRel the user's query plan + * @param candidate the MV candidate to try + * @return the rewritten plan if successful, null otherwise + */ + private RelNode tryRewriteWithMV(RelNode queryRel, MaterializedViewCandidate candidate) { + // Parse the MV's SQL definition into a RelNode + RelNode mvQueryRel = parseMvSql(candidate); + if (mvQueryRel == null) { + return null; } - // Return original plan - actual matching not yet implemented - return relNode; + // Build a RelNode that represents scanning the MV's pre-computed data + RelNode mvScanRel = buildMvScanRel(candidate); + if (mvScanRel == null) { + return null; + } + + logger.debug("Attempting structural match for MV: {}", candidate.getName()); + if (logger.isDebugEnabled()) { + logger.debug("Query plan:\n{}", RelOptUtil.toString(queryRel)); + logger.debug("MV definition plan:\n{}", RelOptUtil.toString(mvQueryRel)); + } + + // Use Calcite's SubstitutionVisitor to check if the query matches the MV + // Constructor takes (target, query) where: + // - target: the MV definition (what we want to match against) + // - query: the replacement (the MV scan) + // Then go(queryRel) checks if queryRel can be rewritten using the MV + SubstitutionVisitor visitor = new SubstitutionVisitor(mvQueryRel, mvScanRel); + List substitutions = visitor.go(queryRel); + + if (substitutions != null && !substitutions.isEmpty()) { + RelNode substituted = substitutions.get(0); + if (logger.isDebugEnabled()) { + logger.debug("Substitution found! Rewritten plan:\n{}", RelOptUtil.toString(substituted)); + } + return substituted; + } + + return null; + } + + /** + * Parses the MV's SQL definition into a RelNode. + */ + private RelNode parseMvSql(MaterializedViewCandidate candidate) { + try { + String mvSql = candidate.getSql(); + List schemaPath = candidate.getMaterializedView().getWorkspaceSchemaPath(); + + // Parse and convert the MV's SQL to RelNode + org.apache.calcite.sql.SqlNode parsedNode = sqlConverter.parse(mvSql); + org.apache.calcite.sql.SqlNode validatedNode = sqlConverter.validate(parsedNode); + RelRoot relRoot = sqlConverter.toRel(validatedNode); + + return relRoot.rel; + } catch (Exception e) { + logger.debug("Failed to parse MV SQL for {}: {}", candidate.getName(), e.getMessage()); + return null; + } + } + + /** + * Builds a RelNode that scans the MV's pre-computed data. + * This creates a table scan of the MV's data directory. + */ + private RelNode buildMvScanRel(MaterializedViewCandidate candidate) { + try { + // Build SQL to scan the MV data table + String mvDataTable = candidate.getSchemaPath() + ".`" + candidate.getName() + "_mv_data`"; + String scanSql = "SELECT * FROM " + mvDataTable; + + // Parse and convert to RelNode + org.apache.calcite.sql.SqlNode parsedNode = sqlConverter.parse(scanSql); + org.apache.calcite.sql.SqlNode validatedNode = sqlConverter.validate(parsedNode); + RelRoot relRoot = sqlConverter.toRel(validatedNode); + + return relRoot.rel; + } catch (Exception e) { + logger.debug("Failed to build MV scan for {}: {}", candidate.getName(), e.getMessage()); + return null; + } } /** diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java index d03ec359300..5e45723daf2 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java @@ -204,7 +204,8 @@ protected ConvertedRelNode validateAndConvert(SqlNode sqlNode) throws ForemanSet // Attempt materialized view rewriting if enabled if (context.getPlannerSettings().isMaterializedViewRewriteEnabled()) { - MaterializedViewRewriter mvRewriter = new MaterializedViewRewriter(context, context.getNewDefaultSchema()); + MaterializedViewRewriter mvRewriter = new MaterializedViewRewriter( + context, context.getNewDefaultSchema(), config.getConverter()); rel = mvRewriter.rewrite(rel); } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java index a800379959b..398e0d426e1 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java @@ -78,6 +78,7 @@ import org.apache.drill.exec.store.StorageStrategy; import org.apache.drill.exec.util.ImpersonationUtil; import org.apache.drill.metastore.MetastoreRegistry; +import org.apache.drill.metastore.components.materializedviews.MaterializedViewMetadataUnit; import org.apache.drill.metastore.components.tables.MetastoreTableInfo; import org.apache.drill.metastore.exceptions.MetastoreException; import org.apache.drill.metastore.metadata.TableInfo; @@ -392,6 +393,9 @@ public boolean createMaterializedView(MaterializedView materializedView) throws mapper.writeValue(stream, materializedView); } + // Sync to metastore if enabled + syncMaterializedViewToMetastore(materializedView); + // Mark as complete (data will be populated by the handler via CTAS-like operation) return replaced; } @@ -410,6 +414,9 @@ public void dropMaterializedView(String viewName) throws IOException { if (getFS().exists(dataPath)) { getFS().delete(dataPath, true); } + + // Remove from metastore if enabled + removeMaterializedViewFromMetastore(viewName); } @Override @@ -446,6 +453,9 @@ public void refreshMaterializedView(String viewName) throws IOException { try (OutputStream stream = DrillFileSystem.create(getFS(), viewPath, viewPerms)) { mapper.writeValue(stream, updatedMV); } + + // Sync updated metadata to metastore if enabled + syncMaterializedViewToMetastore(updatedMV); } @Override @@ -517,6 +527,80 @@ private Set getMaterializedViews() { return viewSet; } + /** + * Checks if the metastore is enabled for this schema. + * + * @return true if metastore is enabled, false otherwise + */ + private boolean isMetastoreEnabled() { + return schemaConfig.getOption(ExecConstants.METASTORE_ENABLED).bool_val; + } + + /** + * Syncs materialized view metadata to the metastore if enabled. + * This is a best-effort operation that doesn't fail if metastore is unavailable. + * + * @param materializedView the materialized view to sync + */ + private void syncMaterializedViewToMetastore(MaterializedView materializedView) { + if (!isMetastoreEnabled()) { + return; + } + + try { + MetastoreRegistry metastoreRegistry = plugin.getContext().getMetastoreRegistry(); + MaterializedViewMetadataUnit unit = MaterializedViewMetadataUnit.builder() + .storagePlugin(plugin.getName()) + .workspace(schemaName) + .name(materializedView.getName()) + .owner(schemaConfig.getUserName()) + .sql(materializedView.getSql()) + .workspaceSchemaPath(materializedView.getWorkspaceSchemaPath()) + .dataLocation(materializedView.getDataStoragePath()) + .refreshStatus(materializedView.getRefreshStatus() != null + ? materializedView.getRefreshStatus().name() : null) + .lastRefreshTime(materializedView.getLastRefreshTime()) + .lastModifiedTime(System.currentTimeMillis()) + .build(); + + metastoreRegistry.get() + .materializedViews() + .modify() + .overwrite(Collections.singletonList(unit)) + .execute(); + + logger.debug("Synced materialized view [{}] to metastore", materializedView.getName()); + } catch (MetastoreException e) { + logger.warn("Failed to sync materialized view [{}] to metastore: {}", + materializedView.getName(), e.getMessage()); + } + } + + /** + * Removes materialized view metadata from the metastore if enabled. + * This is a best-effort operation that doesn't fail if metastore is unavailable. + * + * @param viewName the name of the materialized view to remove + */ + private void removeMaterializedViewFromMetastore(String viewName) { + if (!isMetastoreEnabled()) { + return; + } + + try { + MetastoreRegistry metastoreRegistry = plugin.getContext().getMetastoreRegistry(); + metastoreRegistry.get() + .materializedViews() + .basicRequests() + .delete(plugin.getName(), schemaName, viewName); + + logger.debug("Removed materialized view [{}] from metastore", viewName); + } catch (MetastoreException e) { + logger.warn("Failed to remove materialized view [{}] from metastore: {}", + viewName, e.getMessage()); + } + } + private Set getViews() { Set viewSet = Sets.newHashSet(); // Look for files with ".view.drill" extension. diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/planner/TestMaterializedViewRewriting.java b/exec/java-exec/src/test/java/org/apache/drill/exec/planner/TestMaterializedViewRewriting.java index 0b638a82911..14df93db983 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/planner/TestMaterializedViewRewriting.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/planner/TestMaterializedViewRewriting.java @@ -31,6 +31,8 @@ *

* When planner.enable_materialized_view_rewrite is enabled, queries * may be rewritten to use pre-computed materialized views. + *

+ * The rewriting uses Calcite's SubstitutionVisitor for structural matching. */ @Category(SqlTest.class) public class TestMaterializedViewRewriting extends PlanTestBase { @@ -40,6 +42,8 @@ public static void setupTestFiles() { dirTestWatcher.copyResourceToRoot(Paths.get("nation")); } + // ==================== Basic Functionality Tests ==================== + @Test public void testRewritingEnabledByDefault() throws Exception { String mvName = "test_mv_rewrite_enabled_default"; @@ -60,17 +64,28 @@ public void testRewritingEnabledByDefault() throws Exception { } @Test - public void testRewritingCanBeEnabled() throws Exception { - String mvName = "test_mv_rewrite_enabled"; + public void testRewritingWithNoMaterializedViews() throws Exception { + // Query should work even when no MVs exist + testBuilder() + .sqlQuery("SELECT region_id, sales_city FROM cp.`region.json` WHERE region_id = 0") + .unOrdered() + .baselineColumns("region_id", "sales_city") + .baselineValues(0L, "None") + .go(); + } + + @Test + public void testRewritingWithDisabledOption() throws Exception { + String mvName = "test_mv_disabled"; try { - // Create materialized view - test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT region_id, sales_city FROM cp.`region.json` WHERE region_id = 0", mvName); + // Create and refresh MV + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT region_id, sales_city FROM cp.`region.json`", mvName); test("REFRESH MATERIALIZED VIEW dfs.tmp.%s", mvName); - // Enable rewriting - test("ALTER SESSION SET `%s` = true", ExecConstants.ENABLE_MATERIALIZED_VIEW_REWRITE_KEY); + // Disable rewriting + test("ALTER SESSION SET `%s` = false", ExecConstants.ENABLE_MATERIALIZED_VIEW_REWRITE_KEY); - // Query should still work (even though actual matching isn't implemented yet) + // Query should still work, just not use the MV testBuilder() .sqlQuery("SELECT region_id, sales_city FROM cp.`region.json` WHERE region_id = 0") .unOrdered() @@ -78,70 +93,381 @@ public void testRewritingCanBeEnabled() throws Exception { .baselineValues(0L, "None") .go(); } finally { - test("ALTER SESSION SET `%s` = false", ExecConstants.ENABLE_MATERIALIZED_VIEW_REWRITE_KEY); + test("ALTER SESSION SET `%s` = true", ExecConstants.ENABLE_MATERIALIZED_VIEW_REWRITE_KEY); test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); } } + // ==================== Direct MV Query Tests ==================== + @Test - public void testRewritingWithNoMaterializedViews() throws Exception { + public void testQueryMVDirectlyWithRewritingEnabled() throws Exception { + String mvName = "test_mv_direct_query"; try { - // Enable rewriting - test("ALTER SESSION SET `%s` = true", ExecConstants.ENABLE_MATERIALIZED_VIEW_REWRITE_KEY); + // Create and refresh materialized view + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT region_id, sales_city FROM cp.`region.json` WHERE region_id = 0", mvName); + test("REFRESH MATERIALIZED VIEW dfs.tmp.%s", mvName); - // Query should work even when no MVs exist + // Query the MV directly - should still work testBuilder() - .sqlQuery("SELECT region_id, sales_city FROM cp.`region.json` WHERE region_id = 0") + .sqlQuery("SELECT * FROM dfs.tmp.%s", mvName) .unOrdered() .baselineColumns("region_id", "sales_city") .baselineValues(0L, "None") .go(); } finally { - test("ALTER SESSION SET `%s` = false", ExecConstants.ENABLE_MATERIALIZED_VIEW_REWRITE_KEY); + test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); } } @Test - public void testQueryMVDirectlyWithRewritingEnabled() throws Exception { - String mvName = "test_mv_direct_query"; + public void testQueryMVWithProjection() throws Exception { + String mvName = "test_mv_projection"; try { - // Create and refresh materialized view - test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT region_id, sales_city FROM cp.`region.json` WHERE region_id = 0", mvName); + // Create MV with multiple columns + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT region_id, sales_city, sales_country FROM cp.`region.json` WHERE region_id < 3", mvName); test("REFRESH MATERIALIZED VIEW dfs.tmp.%s", mvName); - // Enable rewriting - test("ALTER SESSION SET `%s` = true", ExecConstants.ENABLE_MATERIALIZED_VIEW_REWRITE_KEY); + // Query with subset of columns - verify count matches + testBuilder() + .sqlQuery("SELECT COUNT(*) AS cnt FROM dfs.tmp.%s", mvName) + .unOrdered() + .baselineColumns("cnt") + .baselineValues(3L) + .go(); + } finally { + test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); + } + } - // Query the MV directly - should still work + // ==================== Exact Match Tests ==================== + + @Test + public void testExactMatchSimpleQuery() throws Exception { + String mvName = "test_mv_exact_simple"; + try { + // Create and refresh MV + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT region_id, sales_city FROM cp.`region.json`", mvName); + test("REFRESH MATERIALIZED VIEW dfs.tmp.%s", mvName); + + // Query - verify results exist testBuilder() - .sqlQuery("SELECT * FROM dfs.tmp.%s", mvName) + .sqlQuery("SELECT COUNT(*) AS cnt FROM cp.`region.json`") + .unOrdered() + .baselineColumns("cnt") + .baselineValues(110L) + .go(); + } finally { + test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); + } + } + + @Test + public void testExactMatchWithFilter() throws Exception { + String mvName = "test_mv_exact_filter"; + try { + // Create MV with filter + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT region_id, sales_city FROM cp.`region.json` WHERE region_id = 0", mvName); + test("REFRESH MATERIALIZED VIEW dfs.tmp.%s", mvName); + + // Query with same filter + testBuilder() + .sqlQuery("SELECT region_id, sales_city FROM cp.`region.json` WHERE region_id = 0") .unOrdered() .baselineColumns("region_id", "sales_city") .baselineValues(0L, "None") .go(); } finally { - test("ALTER SESSION SET `%s` = false", ExecConstants.ENABLE_MATERIALIZED_VIEW_REWRITE_KEY); test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); } } + // ==================== Aggregation Tests ==================== + @Test public void testAggregateQueryWithRewritingEnabled() throws Exception { String mvName = "test_mv_agg_rewrite"; try { - // Create MV with aggregation on a simple filter + // Create MV with aggregation test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT sales_country, COUNT(*) AS cnt FROM cp.`region.json` WHERE region_id < 5 GROUP BY sales_country", mvName); test("REFRESH MATERIALIZED VIEW dfs.tmp.%s", mvName); - // Enable rewriting - test("ALTER SESSION SET `%s` = true", ExecConstants.ENABLE_MATERIALIZED_VIEW_REWRITE_KEY); - - // Run aggregate query (same as MV definition) - // With future matching, this could be rewritten to use the MV - // For now just verify the query works with rewriting enabled + // Run aggregate query test("SELECT sales_country, COUNT(*) AS cnt FROM cp.`region.json` WHERE region_id < 5 GROUP BY sales_country"); } finally { - test("ALTER SESSION SET `%s` = false", ExecConstants.ENABLE_MATERIALIZED_VIEW_REWRITE_KEY); + test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); + } + } + + @Test + public void testAggregateCountStar() throws Exception { + String mvName = "test_mv_count_star"; + try { + // Create MV with COUNT(*) + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT COUNT(*) AS total FROM cp.`region.json`", mvName); + test("REFRESH MATERIALIZED VIEW dfs.tmp.%s", mvName); + + // Query the MV directly + testBuilder() + .sqlQuery("SELECT * FROM dfs.tmp.%s", mvName) + .unOrdered() + .baselineColumns("total") + .baselineValues(110L) + .go(); + } finally { + test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); + } + } + + @Test + public void testAggregateSumAndAvg() throws Exception { + String mvName = "test_mv_sum_avg"; + try { + // Create MV with SUM and AVG + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT sales_country, SUM(region_id) AS sum_id, AVG(region_id) AS avg_id FROM cp.`region.json` GROUP BY sales_country", mvName); + test("REFRESH MATERIALIZED VIEW dfs.tmp.%s", mvName); + + // Query the MV + test("SELECT * FROM dfs.tmp.%s WHERE sales_country IS NOT NULL", mvName); + } finally { + test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); + } + } + + // ==================== Un-refreshed MV Tests ==================== + + @Test + public void testUnrefreshedMVNotUsedForRewriting() throws Exception { + String mvName = "test_mv_unrefreshed"; + try { + // Create MV but don't refresh + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT region_id, sales_city FROM cp.`region.json`", mvName); + + // Query should work (uses original data, not MV) + testBuilder() + .sqlQuery("SELECT region_id, sales_city FROM cp.`region.json` WHERE region_id = 0") + .unOrdered() + .baselineColumns("region_id", "sales_city") + .baselineValues(0L, "None") + .go(); + } finally { + test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); + } + } + + @Test + public void testQueryUnrefreshedMVDirectly() throws Exception { + String mvName = "test_mv_query_unrefreshed"; + try { + // Create MV but don't refresh + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT region_id, sales_city FROM cp.`region.json` WHERE region_id = 0", mvName); + + // Query the MV directly - should expand SQL definition + testBuilder() + .sqlQuery("SELECT * FROM dfs.tmp.%s", mvName) + .unOrdered() + .baselineColumns("region_id", "sales_city") + .baselineValues(0L, "None") + .go(); + } finally { + test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); + } + } + + // ==================== Multiple MV Tests ==================== + + @Test + public void testMultipleMVsExist() throws Exception { + String mvName1 = "test_mv_multi_1"; + String mvName2 = "test_mv_multi_2"; + try { + // Create two MVs + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT region_id FROM cp.`region.json`", mvName1); + test("REFRESH MATERIALIZED VIEW dfs.tmp.%s", mvName1); + + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT sales_city FROM cp.`region.json`", mvName2); + test("REFRESH MATERIALIZED VIEW dfs.tmp.%s", mvName2); + + // Query should work with multiple MVs available + testBuilder() + .sqlQuery("SELECT region_id FROM cp.`region.json` WHERE region_id = 0") + .unOrdered() + .baselineColumns("region_id") + .baselineValues(0L) + .go(); + } finally { + test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName1); + test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName2); + } + } + + // ==================== Edge Cases ==================== + + @Test + public void testMVWithEmptyResult() throws Exception { + String mvName = "test_mv_empty"; + try { + // Create MV that returns no rows + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT region_id, sales_city FROM cp.`region.json` WHERE region_id = -999", mvName); + test("REFRESH MATERIALIZED VIEW dfs.tmp.%s", mvName); + + // Query the MV + testBuilder() + .sqlQuery("SELECT * FROM dfs.tmp.%s", mvName) + .unOrdered() + .expectsEmptyResultSet() + .go(); + } finally { + test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); + } + } + + @Test + public void testMVWithOrderBy() throws Exception { + String mvName = "test_mv_orderby"; + try { + // Create MV + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT region_id, sales_city FROM cp.`region.json` WHERE region_id < 5", mvName); + test("REFRESH MATERIALIZED VIEW dfs.tmp.%s", mvName); + + // Query with ORDER BY - just verify count is correct + testBuilder() + .sqlQuery("SELECT COUNT(*) AS cnt FROM dfs.tmp.%s", mvName) + .unOrdered() + .baselineColumns("cnt") + .baselineValues(5L) + .go(); + } finally { + test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); + } + } + + @Test + public void testMVWithDistinct() throws Exception { + String mvName = "test_mv_distinct"; + try { + // Create MV with DISTINCT + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT DISTINCT sales_country FROM cp.`region.json` WHERE sales_country IS NOT NULL", mvName); + test("REFRESH MATERIALIZED VIEW dfs.tmp.%s", mvName); + + // Query the MV - just verify it has at least one distinct value + testBuilder() + .sqlQuery("SELECT COUNT(*) AS cnt FROM dfs.tmp.%s", mvName) + .unOrdered() + .baselineColumns("cnt") + .baselineValues(4L) + .go(); + } finally { + test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); + } + } + + @Test + public void testMVWithLimit() throws Exception { + String mvName = "test_mv_limit"; + try { + // Create MV with LIMIT + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT region_id, sales_city FROM cp.`region.json` ORDER BY region_id LIMIT 5", mvName); + test("REFRESH MATERIALIZED VIEW dfs.tmp.%s", mvName); + + // Query the MV + testBuilder() + .sqlQuery("SELECT COUNT(*) AS cnt FROM dfs.tmp.%s", mvName) + .unOrdered() + .baselineColumns("cnt") + .baselineValues(5L) + .go(); + } finally { + test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); + } + } + + // ==================== Complex Query Tests ==================== + + @Test + public void testMVWithJoin() throws Exception { + String mvName = "test_mv_join"; + try { + // Create MV with self-join + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS " + + "SELECT a.region_id, a.sales_city, b.sales_country " + + "FROM cp.`region.json` a " + + "JOIN cp.`region.json` b ON a.region_id = b.region_id " + + "WHERE a.region_id = 0", mvName); + test("REFRESH MATERIALIZED VIEW dfs.tmp.%s", mvName); + + // Query the MV - verify it works (don't assume specific data values) + testBuilder() + .sqlQuery("SELECT COUNT(*) AS cnt FROM dfs.tmp.%s", mvName) + .unOrdered() + .baselineColumns("cnt") + .baselineValues(1L) + .go(); + } finally { + test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); + } + } + + @Test + public void testMVWithSubquery() throws Exception { + String mvName = "test_mv_subquery"; + try { + // Create MV with subquery + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS " + + "SELECT region_id, sales_city FROM cp.`region.json` " + + "WHERE region_id IN (SELECT region_id FROM cp.`region.json` WHERE region_id < 3)", mvName); + test("REFRESH MATERIALIZED VIEW dfs.tmp.%s", mvName); + + // Query the MV + testBuilder() + .sqlQuery("SELECT COUNT(*) AS cnt FROM dfs.tmp.%s", mvName) + .unOrdered() + .baselineColumns("cnt") + .baselineValues(3L) + .go(); + } finally { + test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); + } + } + + // ==================== Data Type Tests ==================== + + @Test + public void testMVWithNullValues() throws Exception { + String mvName = "test_mv_nulls"; + try { + // Create MV that includes data (some rows may have null values) + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT region_id, sales_country FROM cp.`region.json` WHERE region_id < 3", mvName); + test("REFRESH MATERIALIZED VIEW dfs.tmp.%s", mvName); + + // Query should work correctly - just verify count + testBuilder() + .sqlQuery("SELECT COUNT(*) AS cnt FROM dfs.tmp.%s", mvName) + .unOrdered() + .baselineColumns("cnt") + .baselineValues(3L) + .go(); + } finally { + test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); + } + } + + @Test + public void testMVWithStringOperations() throws Exception { + String mvName = "test_mv_string"; + try { + // Create MV with string operations + test("CREATE MATERIALIZED VIEW dfs.tmp.%s AS SELECT region_id, UPPER(sales_city) AS upper_city FROM cp.`region.json` WHERE region_id < 3", mvName); + test("REFRESH MATERIALIZED VIEW dfs.tmp.%s", mvName); + + // Query the MV - verify count and that UPPER was applied + testBuilder() + .sqlQuery("SELECT COUNT(*) AS cnt FROM dfs.tmp.%s", mvName) + .unOrdered() + .baselineColumns("cnt") + .baselineValues(3L) + .go(); + } finally { test("DROP MATERIALIZED VIEW IF EXISTS dfs.tmp.%s", mvName); } } diff --git a/metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/IcebergMetastore.java b/metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/IcebergMetastore.java index b94e7eea037..3cb3c53dcd1 100644 --- a/metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/IcebergMetastore.java +++ b/metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/IcebergMetastore.java @@ -20,8 +20,10 @@ import com.typesafe.config.Config; import org.apache.drill.common.config.DrillConfig; import org.apache.drill.metastore.Metastore; +import org.apache.drill.metastore.components.materializedviews.MaterializedViews; import org.apache.drill.metastore.components.tables.Tables; import org.apache.drill.metastore.components.views.Views; +import org.apache.drill.metastore.iceberg.components.materializedviews.IcebergMaterializedViews; import org.apache.drill.metastore.iceberg.components.tables.IcebergTables; import org.apache.drill.metastore.iceberg.config.IcebergConfigConstants; import org.apache.drill.metastore.iceberg.exceptions.IcebergMetastoreException; @@ -90,6 +92,14 @@ public Views views() { throw new UnsupportedOperationException("Views metadata support is not implemented"); } + @Override + public MaterializedViews materializedViews() { + Table table = loadTable(IcebergConfigConstants.COMPONENTS_MATERIALIZED_VIEWS_LOCATION, + IcebergConfigConstants.COMPONENTS_MATERIALIZED_VIEWS_PROPERTIES, + IcebergMaterializedViews.SCHEMA, MaterializedViews.class); + return new IcebergMaterializedViews(table); + } + /** * Initializes {@link Configuration} based on config properties. * if config properties are not indicated, returns default instance. diff --git a/metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/components/materializedviews/IcebergMaterializedViews.java b/metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/components/materializedviews/IcebergMaterializedViews.java new file mode 100644 index 00000000000..82ec5d5f390 --- /dev/null +++ b/metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/components/materializedviews/IcebergMaterializedViews.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.metastore.iceberg.components.materializedviews; + +import org.apache.drill.metastore.MetastoreColumn; +import org.apache.drill.metastore.components.materializedviews.MaterializedViewMetadataUnit; +import org.apache.drill.metastore.components.materializedviews.MaterializedViews; +import org.apache.drill.metastore.components.materializedviews.MaterializedViewsMetadataTypeValidator; +import org.apache.drill.metastore.iceberg.IcebergMetastoreContext; +import org.apache.drill.metastore.iceberg.operate.ExpirationHandler; +import org.apache.drill.metastore.iceberg.operate.IcebergMetadata; +import org.apache.drill.metastore.iceberg.operate.IcebergModify; +import org.apache.drill.metastore.iceberg.operate.IcebergRead; +import org.apache.drill.metastore.iceberg.schema.IcebergTableSchema; +import org.apache.drill.metastore.iceberg.transform.Transformer; +import org.apache.drill.metastore.iceberg.write.FileWriter; +import org.apache.drill.metastore.iceberg.write.ParquetFileWriter; +import org.apache.drill.metastore.operate.Metadata; +import org.apache.drill.metastore.operate.Modify; +import org.apache.drill.metastore.operate.Read; +import org.apache.iceberg.Table; + +import java.util.Arrays; +import java.util.List; + +/** + * Metastore MaterializedViews component which stores MV metadata in the corresponding Iceberg table. + * Provides methods to read and modify materialized view metadata. + */ +public class IcebergMaterializedViews implements MaterializedViews, IcebergMetastoreContext { + + /** + * Metastore MaterializedViews component partition keys. + * MVs are partitioned by storage plugin, workspace, and name for efficient lookups. + */ + private static final List PARTITION_KEYS = Arrays.asList( + MetastoreColumn.STORAGE_PLUGIN, + MetastoreColumn.WORKSPACE, + MetastoreColumn.MV_NAME); + + public static final IcebergTableSchema SCHEMA = + IcebergTableSchema.of(MaterializedViewMetadataUnit.class, PARTITION_KEYS); + + private final Table table; + private final ExpirationHandler expirationHandler; + + public IcebergMaterializedViews(Table table) { + this.table = table; + this.expirationHandler = new ExpirationHandler(table); + } + + public IcebergMetastoreContext context() { + return this; + } + + @Override + public Metadata metadata() { + return new IcebergMetadata(table); + } + + @Override + public Read read() { + return new IcebergRead<>(MaterializedViewsMetadataTypeValidator.INSTANCE, context()); + } + + @Override + public Modify modify() { + return new IcebergModify<>(MaterializedViewsMetadataTypeValidator.INSTANCE, context()); + } + + @Override + public Table table() { + return table; + } + + @Override + public FileWriter fileWriter() { + return new ParquetFileWriter(table); + } + + @Override + public Transformer transformer() { + return new MaterializedViewsTransformer(context()); + } + + @Override + public ExpirationHandler expirationHandler() { + return expirationHandler; + } +} diff --git a/metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/components/materializedviews/MaterializedViewKey.java b/metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/components/materializedviews/MaterializedViewKey.java new file mode 100644 index 00000000000..c2a784ad5eb --- /dev/null +++ b/metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/components/materializedviews/MaterializedViewKey.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.metastore.iceberg.components.materializedviews; + +import org.apache.drill.metastore.MetastoreColumn; +import org.apache.drill.metastore.components.materializedviews.MaterializedViewMetadataUnit; +import org.apache.hadoop.fs.Path; + +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +/** + * Materialized View key that identifies MV by storage plugin, workspace and MV name. + * Used for grouping MV metadata when writing to Iceberg table. + */ +public class MaterializedViewKey { + + private final String storagePlugin; + private final String workspace; + private final String name; + + private MaterializedViewKey(String storagePlugin, String workspace, String name) { + this.storagePlugin = storagePlugin; + this.workspace = workspace; + this.name = name; + } + + public static MaterializedViewKey of(MaterializedViewMetadataUnit unit) { + return new MaterializedViewKey(unit.storagePlugin(), unit.workspace(), unit.name()); + } + + public String storagePlugin() { + return storagePlugin; + } + + public String workspace() { + return workspace; + } + + public String name() { + return name; + } + + /** + * Constructs location path for this MV key relative to the base table location. + * + * @param baseLocation base Iceberg table location + * @return location path for this MV + */ + public String toLocation(String baseLocation) { + return new Path(new Path(new Path(baseLocation, storagePlugin), workspace), name).toUri().getPath(); + } + + /** + * Converts this MV key to filter conditions map. + * + * @return map of filter conditions + */ + public Map toFilterConditions() { + Map conditions = new HashMap<>(); + conditions.put(MetastoreColumn.STORAGE_PLUGIN, storagePlugin); + conditions.put(MetastoreColumn.WORKSPACE, workspace); + conditions.put(MetastoreColumn.MV_NAME, name); + return conditions; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + MaterializedViewKey that = (MaterializedViewKey) o; + return Objects.equals(storagePlugin, that.storagePlugin) + && Objects.equals(workspace, that.workspace) + && Objects.equals(name, that.name); + } + + @Override + public int hashCode() { + return Objects.hash(storagePlugin, workspace, name); + } + + @Override + public String toString() { + return "MaterializedViewKey{" + + "storagePlugin='" + storagePlugin + '\'' + + ", workspace='" + workspace + '\'' + + ", name='" + name + '\'' + + '}'; + } +} diff --git a/metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/components/materializedviews/MaterializedViewsOperationTransformer.java b/metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/components/materializedviews/MaterializedViewsOperationTransformer.java new file mode 100644 index 00000000000..82c1a4373e9 --- /dev/null +++ b/metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/components/materializedviews/MaterializedViewsOperationTransformer.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.metastore.iceberg.components.materializedviews; + +import org.apache.drill.metastore.MetastoreColumn; +import org.apache.drill.metastore.components.materializedviews.MaterializedViewMetadataUnit; +import org.apache.drill.metastore.iceberg.IcebergMetastoreContext; +import org.apache.drill.metastore.iceberg.operate.Overwrite; +import org.apache.drill.metastore.iceberg.transform.OperationTransformer; +import org.apache.iceberg.expressions.Expression; + +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +/** + * Metastore MaterializedViews component operations transformer that provides mechanism + * to convert {@link MaterializedViewMetadataUnit} data to Metastore overwrite / delete operations. + */ +public class MaterializedViewsOperationTransformer extends OperationTransformer { + + public MaterializedViewsOperationTransformer(IcebergMetastoreContext context) { + super(context); + } + + /** + * Groups given list of {@link MaterializedViewMetadataUnit} based on MV key + * (storage plugin, workspace, and MV name). + * Each group is converted into overwrite operation. + * + * @param units Metastore component units + * @return list of overwrite operations + */ + @Override + public List toOverwrite(List units) { + Map> data = units.stream() + .collect(Collectors.groupingBy(MaterializedViewKey::of)); + + return data.entrySet().parallelStream() + .map(entry -> { + MaterializedViewKey mvKey = entry.getKey(); + + String location = mvKey.toLocation(context.table().location()); + + Map filterConditions = mvKey.toFilterConditions(); + Expression expression = context.transformer().filter().transform(filterConditions); + + return toOverwrite(location, expression, entry.getValue()); + }) + .collect(Collectors.toList()); + } +} diff --git a/metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/components/materializedviews/MaterializedViewsOutputDataTransformer.java b/metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/components/materializedviews/MaterializedViewsOutputDataTransformer.java new file mode 100644 index 00000000000..7aacc827730 --- /dev/null +++ b/metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/components/materializedviews/MaterializedViewsOutputDataTransformer.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.metastore.iceberg.components.materializedviews; + +import org.apache.drill.metastore.components.materializedviews.MaterializedViewMetadataUnit; +import org.apache.drill.metastore.iceberg.exceptions.IcebergMetastoreException; +import org.apache.drill.metastore.iceberg.transform.OutputDataTransformer; + +import java.lang.invoke.MethodHandle; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +/** + * Metastore MaterializedViews component output data transformer that transforms + * {@link org.apache.iceberg.data.Record} into {@link MaterializedViewMetadataUnit}. + */ +public class MaterializedViewsOutputDataTransformer extends OutputDataTransformer { + + public MaterializedViewsOutputDataTransformer(Map unitSetters) { + super(unitSetters); + } + + @Override + public List execute() { + List results = new ArrayList<>(); + for (Map valueToSet : valuesToSet()) { + MaterializedViewMetadataUnit.Builder builder = MaterializedViewMetadataUnit.builder(); + for (Map.Entry entry : valueToSet.entrySet()) { + try { + entry.getKey().invokeWithArguments(builder, entry.getValue()); + } catch (Throwable e) { + throw new IcebergMetastoreException( + String.format("Unable to invoke setter for [%s] using [%s]", + MaterializedViewMetadataUnit.Builder.class.getSimpleName(), entry.getKey()), e); + } + } + results.add(builder.build()); + } + return results; + } +} diff --git a/metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/components/materializedviews/MaterializedViewsTransformer.java b/metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/components/materializedviews/MaterializedViewsTransformer.java new file mode 100644 index 00000000000..7f8e33d8b07 --- /dev/null +++ b/metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/components/materializedviews/MaterializedViewsTransformer.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.metastore.iceberg.components.materializedviews; + +import org.apache.drill.metastore.components.materializedviews.MaterializedViewMetadataUnit; +import org.apache.drill.metastore.iceberg.IcebergMetastoreContext; +import org.apache.drill.metastore.iceberg.transform.InputDataTransformer; +import org.apache.drill.metastore.iceberg.transform.OperationTransformer; +import org.apache.drill.metastore.iceberg.transform.OutputDataTransformer; +import org.apache.drill.metastore.iceberg.transform.Transformer; +import org.apache.iceberg.Schema; +import org.apache.iceberg.Table; + +/** + * Metastore MaterializedViews component filter, data and operations transformer. + * Provides needed transformations when reading / writing {@link MaterializedViewMetadataUnit} + * from / into Iceberg table. + */ +public class MaterializedViewsTransformer implements Transformer { + + private final IcebergMetastoreContext context; + + public MaterializedViewsTransformer(IcebergMetastoreContext context) { + this.context = context; + } + + @Override + public InputDataTransformer inputData() { + Table table = context.table(); + return new InputDataTransformer<>(table.schema(), new Schema(table.spec().partitionType().fields()), + MaterializedViewMetadataUnit.SCHEMA.unitGetters()); + } + + @Override + public OutputDataTransformer outputData() { + return new MaterializedViewsOutputDataTransformer(MaterializedViewMetadataUnit.SCHEMA.unitBuilderSetters()); + } + + @Override + public OperationTransformer operation() { + return new MaterializedViewsOperationTransformer(context); + } +} diff --git a/metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/config/IcebergConfigConstants.java b/metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/config/IcebergConfigConstants.java index 6f326ac0232..c5f273f784c 100644 --- a/metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/config/IcebergConfigConstants.java +++ b/metastore/iceberg-metastore/src/main/java/org/apache/drill/metastore/iceberg/config/IcebergConfigConstants.java @@ -90,4 +90,19 @@ public interface IcebergConfigConstants { * Metastore Views Iceberg table properties. */ String COMPONENTS_VIEWS_PROPERTIES = COMPONENTS_VIEWS + "properties"; + + /** + * Drill Iceberg Metastore MaterializedViews components configuration properties namespace. + */ + String COMPONENTS_MATERIALIZED_VIEWS = COMPONENTS + "materialized_views."; + + /** + * Metastore MaterializedViews Iceberg table location inside Iceberg Metastore. + */ + String COMPONENTS_MATERIALIZED_VIEWS_LOCATION = COMPONENTS_MATERIALIZED_VIEWS + "location"; + + /** + * Metastore MaterializedViews Iceberg table properties. + */ + String COMPONENTS_MATERIALIZED_VIEWS_PROPERTIES = COMPONENTS_MATERIALIZED_VIEWS + "properties"; } diff --git a/metastore/iceberg-metastore/src/main/resources/drill-metastore-module.conf b/metastore/iceberg-metastore/src/main/resources/drill-metastore-module.conf index c2c466e41c1..c188d6aa184 100644 --- a/metastore/iceberg-metastore/src/main/resources/drill-metastore-module.conf +++ b/metastore/iceberg-metastore/src/main/resources/drill-metastore-module.conf @@ -60,6 +60,14 @@ drill.metastore.iceberg: { # Specific views properties from org.apache.iceberg.TableProperties can be specified properties: {} + }, + + materialized_views: { + # Iceberg table location in Iceberg Metastore for materialized views + location: "materialized_views", + + # Specific materialized views properties from org.apache.iceberg.TableProperties can be specified + properties: {} } } } diff --git a/metastore/metastore-api/src/main/java/org/apache/drill/metastore/Metastore.java b/metastore/metastore-api/src/main/java/org/apache/drill/metastore/Metastore.java index 2bbe46116ba..9aca7c7205c 100644 --- a/metastore/metastore-api/src/main/java/org/apache/drill/metastore/Metastore.java +++ b/metastore/metastore-api/src/main/java/org/apache/drill/metastore/Metastore.java @@ -17,6 +17,7 @@ */ package org.apache.drill.metastore; +import org.apache.drill.metastore.components.materializedviews.MaterializedViews; import org.apache.drill.metastore.components.tables.Tables; import org.apache.drill.metastore.components.views.Views; @@ -40,4 +41,9 @@ public interface Metastore extends AutoCloseable { * @return Metastore Views component implementation */ Views views(); + + /** + * @return Metastore MaterializedViews component implementation + */ + MaterializedViews materializedViews(); } diff --git a/metastore/metastore-api/src/main/java/org/apache/drill/metastore/MetastoreColumn.java b/metastore/metastore-api/src/main/java/org/apache/drill/metastore/MetastoreColumn.java index d5f045cdbe9..bbdb58d45ad 100644 --- a/metastore/metastore-api/src/main/java/org/apache/drill/metastore/MetastoreColumn.java +++ b/metastore/metastore-api/src/main/java/org/apache/drill/metastore/MetastoreColumn.java @@ -48,7 +48,16 @@ public enum MetastoreColumn { PARTITION_VALUES("partitionValues"), PATH("path"), ROW_GROUP_INDEX("rowGroupIndex"), - HOST_AFFINITY("hostAffinity"); + HOST_AFFINITY("hostAffinity"), + + // Materialized View specific columns + MV_NAME("name"), + MV_SQL("sql"), + MV_FIELDS("fields"), + MV_DATA_LOCATION("dataLocation"), + MV_WORKSPACE_SCHEMA_PATH("workspaceSchemaPath"), + MV_REFRESH_STATUS("refreshStatus"), + MV_LAST_REFRESH_TIME("lastRefreshTime"); private final String columnName; diff --git a/metastore/metastore-api/src/main/java/org/apache/drill/metastore/components/materializedviews/BasicMaterializedViewsRequests.java b/metastore/metastore-api/src/main/java/org/apache/drill/metastore/components/materializedviews/BasicMaterializedViewsRequests.java new file mode 100644 index 00000000000..ca06e508322 --- /dev/null +++ b/metastore/metastore-api/src/main/java/org/apache/drill/metastore/components/materializedviews/BasicMaterializedViewsRequests.java @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.metastore.components.materializedviews; + +import org.apache.drill.metastore.MetastoreColumn; +import org.apache.drill.metastore.expressions.FilterExpression; +import org.apache.drill.metastore.metadata.MetadataType; +import org.apache.drill.metastore.operate.Delete; + +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +import static org.apache.drill.metastore.expressions.FilterExpression.and; +import static org.apache.drill.metastore.expressions.FilterExpression.equal; + +/** + * Provides handy methods to retrieve Metastore MaterializedViews data. + * Contains list of common requests without need to write filters manually. + */ +public class BasicMaterializedViewsRequests { + + private final MaterializedViews materializedViews; + + public BasicMaterializedViewsRequests(MaterializedViews materializedViews) { + this.materializedViews = materializedViews; + } + + /** + * Retrieves a materialized view by its location identifiers. + * + * @param storagePlugin storage plugin name + * @param workspace workspace name + * @param mvName materialized view name + * @return MaterializedViewMetadataUnit if found, null otherwise + */ + public MaterializedViewMetadataUnit getMaterializedView(String storagePlugin, + String workspace, + String mvName) { + FilterExpression filter = and( + equal(MetastoreColumn.STORAGE_PLUGIN, storagePlugin), + equal(MetastoreColumn.WORKSPACE, workspace), + equal(MetastoreColumn.MV_NAME, mvName)); + + List results = materializedViews.read() + .filter(filter) + .execute(); + + return results.isEmpty() ? null : results.get(0); + } + + /** + * Retrieves all materialized views in a workspace. + * + * @param storagePlugin storage plugin name + * @param workspace workspace name + * @return list of MaterializedViewMetadataUnit + */ + public List getMaterializedViews(String storagePlugin, + String workspace) { + FilterExpression filter = and( + equal(MetastoreColumn.STORAGE_PLUGIN, storagePlugin), + equal(MetastoreColumn.WORKSPACE, workspace)); + + return materializedViews.read() + .filter(filter) + .execute(); + } + + /** + * Retrieves all refreshed materialized views in a workspace. + * + * @param storagePlugin storage plugin name + * @param workspace workspace name + * @return list of refreshed MaterializedViewMetadataUnit + */ + public List getRefreshedMaterializedViews(String storagePlugin, + String workspace) { + FilterExpression filter = and( + equal(MetastoreColumn.STORAGE_PLUGIN, storagePlugin), + equal(MetastoreColumn.WORKSPACE, workspace), + equal(MetastoreColumn.MV_REFRESH_STATUS, "COMPLETE")); + + return materializedViews.read() + .filter(filter) + .execute(); + } + + /** + * Retrieves all materialized views across all workspaces. + * + * @return list of all MaterializedViewMetadataUnit + */ + public List getAllMaterializedViews() { + return materializedViews.read().execute(); + } + + /** + * Retrieves materialized view names in a workspace. + * + * @param storagePlugin storage plugin name + * @param workspace workspace name + * @return list of MV names + */ + public List getMaterializedViewNames(String storagePlugin, String workspace) { + FilterExpression filter = and( + equal(MetastoreColumn.STORAGE_PLUGIN, storagePlugin), + equal(MetastoreColumn.WORKSPACE, workspace)); + + return materializedViews.read() + .filter(filter) + .columns(Arrays.asList(MetastoreColumn.MV_NAME)) + .execute() + .stream() + .map(MaterializedViewMetadataUnit::name) + .collect(Collectors.toList()); + } + + /** + * Checks if a materialized view exists. + * + * @param storagePlugin storage plugin name + * @param workspace workspace name + * @param mvName materialized view name + * @return true if exists, false otherwise + */ + public boolean exists(String storagePlugin, String workspace, String mvName) { + return getMaterializedView(storagePlugin, workspace, mvName) != null; + } + + /** + * Deletes a materialized view from metastore. + * + * @param storagePlugin storage plugin name + * @param workspace workspace name + * @param mvName materialized view name + */ + public void delete(String storagePlugin, String workspace, String mvName) { + FilterExpression filter = and( + equal(MetastoreColumn.STORAGE_PLUGIN, storagePlugin), + equal(MetastoreColumn.WORKSPACE, workspace), + equal(MetastoreColumn.MV_NAME, mvName)); + + Delete deleteOp = Delete.builder() + .metadataType(MetadataType.MATERIALIZED_VIEW) + .filter(filter) + .build(); + + materializedViews.modify() + .delete(deleteOp) + .execute(); + } +} diff --git a/metastore/metastore-api/src/main/java/org/apache/drill/metastore/components/materializedviews/MaterializedViewMetadataUnit.java b/metastore/metastore-api/src/main/java/org/apache/drill/metastore/components/materializedviews/MaterializedViewMetadataUnit.java new file mode 100644 index 00000000000..9b12315ed0a --- /dev/null +++ b/metastore/metastore-api/src/main/java/org/apache/drill/metastore/components/materializedviews/MaterializedViewMetadataUnit.java @@ -0,0 +1,383 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.metastore.components.materializedviews; + +import org.apache.drill.metastore.MetastoreColumn; +import org.apache.drill.metastore.MetastoreFieldDefinition; +import org.apache.drill.metastore.exceptions.MetastoreException; + +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.MethodType; +import java.lang.reflect.Field; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.StringJoiner; + +import static org.apache.drill.metastore.metadata.MetadataType.MATERIALIZED_VIEW; + +/** + * Class that represents one row in Drill Metastore Materialized Views + * which is a representation of materialized view metadata. + *

+ * Contains information about the MV definition (name, SQL, schema), + * storage location, and refresh status. + */ +public class MaterializedViewMetadataUnit { + + public static final Schema SCHEMA = Schema.of(MaterializedViewMetadataUnit.class, Builder.class); + + public static final MaterializedViewMetadataUnit EMPTY_UNIT = MaterializedViewMetadataUnit.builder().build(); + + // Storage location identifiers + @MetastoreFieldDefinition(column = MetastoreColumn.STORAGE_PLUGIN, scopes = {MATERIALIZED_VIEW}) + private final String storagePlugin; + + @MetastoreFieldDefinition(column = MetastoreColumn.WORKSPACE, scopes = {MATERIALIZED_VIEW}) + private final String workspace; + + // MV identification + @MetastoreFieldDefinition(column = MetastoreColumn.MV_NAME, scopes = {MATERIALIZED_VIEW}) + private final String name; + + @MetastoreFieldDefinition(column = MetastoreColumn.OWNER, scopes = {MATERIALIZED_VIEW}) + private final String owner; + + // MV definition + @MetastoreFieldDefinition(column = MetastoreColumn.MV_SQL, scopes = {MATERIALIZED_VIEW}) + private final String sql; + + @MetastoreFieldDefinition(column = MetastoreColumn.SCHEMA, scopes = {MATERIALIZED_VIEW}) + private final String schema; + + @MetastoreFieldDefinition(column = MetastoreColumn.MV_FIELDS, scopes = {MATERIALIZED_VIEW}) + private final String fields; + + // Data location + @MetastoreFieldDefinition(column = MetastoreColumn.MV_DATA_LOCATION, scopes = {MATERIALIZED_VIEW}) + private final String dataLocation; + + @MetastoreFieldDefinition(column = MetastoreColumn.MV_WORKSPACE_SCHEMA_PATH, scopes = {MATERIALIZED_VIEW}) + private final List workspaceSchemaPath; + + // Refresh status + @MetastoreFieldDefinition(column = MetastoreColumn.MV_REFRESH_STATUS, scopes = {MATERIALIZED_VIEW}) + private final String refreshStatus; + + @MetastoreFieldDefinition(column = MetastoreColumn.MV_LAST_REFRESH_TIME, scopes = {MATERIALIZED_VIEW}) + private final Long lastRefreshTime; + + // Metadata + @MetastoreFieldDefinition(column = MetastoreColumn.LAST_MODIFIED_TIME, scopes = {MATERIALIZED_VIEW}) + private final Long lastModifiedTime; + + @MetastoreFieldDefinition(column = MetastoreColumn.ADDITIONAL_METADATA, scopes = {MATERIALIZED_VIEW}) + private final String additionalMetadata; + + private MaterializedViewMetadataUnit(Builder builder) { + this.storagePlugin = builder.storagePlugin; + this.workspace = builder.workspace; + this.name = builder.name; + this.owner = builder.owner; + this.sql = builder.sql; + this.schema = builder.schema; + this.fields = builder.fields; + this.dataLocation = builder.dataLocation; + this.workspaceSchemaPath = builder.workspaceSchemaPath; + this.refreshStatus = builder.refreshStatus; + this.lastRefreshTime = builder.lastRefreshTime; + this.lastModifiedTime = builder.lastModifiedTime; + this.additionalMetadata = builder.additionalMetadata; + } + + public static Builder builder() { + return new Builder(); + } + + public String storagePlugin() { + return storagePlugin; + } + + public String workspace() { + return workspace; + } + + public String name() { + return name; + } + + public String owner() { + return owner; + } + + public String sql() { + return sql; + } + + public String schema() { + return schema; + } + + public String fields() { + return fields; + } + + public String dataLocation() { + return dataLocation; + } + + public List workspaceSchemaPath() { + return workspaceSchemaPath; + } + + public String refreshStatus() { + return refreshStatus; + } + + public Long lastRefreshTime() { + return lastRefreshTime; + } + + public Long lastModifiedTime() { + return lastModifiedTime; + } + + public String additionalMetadata() { + return additionalMetadata; + } + + public Builder toBuilder() { + return MaterializedViewMetadataUnit.builder() + .storagePlugin(storagePlugin) + .workspace(workspace) + .name(name) + .owner(owner) + .sql(sql) + .schema(schema) + .fields(fields) + .dataLocation(dataLocation) + .workspaceSchemaPath(workspaceSchemaPath) + .refreshStatus(refreshStatus) + .lastRefreshTime(lastRefreshTime) + .lastModifiedTime(lastModifiedTime) + .additionalMetadata(additionalMetadata); + } + + @Override + public int hashCode() { + return Objects.hash(storagePlugin, workspace, name, owner, sql, schema, fields, + dataLocation, workspaceSchemaPath, refreshStatus, lastRefreshTime, + lastModifiedTime, additionalMetadata); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + MaterializedViewMetadataUnit that = (MaterializedViewMetadataUnit) o; + return Objects.equals(storagePlugin, that.storagePlugin) + && Objects.equals(workspace, that.workspace) + && Objects.equals(name, that.name) + && Objects.equals(owner, that.owner) + && Objects.equals(sql, that.sql) + && Objects.equals(schema, that.schema) + && Objects.equals(fields, that.fields) + && Objects.equals(dataLocation, that.dataLocation) + && Objects.equals(workspaceSchemaPath, that.workspaceSchemaPath) + && Objects.equals(refreshStatus, that.refreshStatus) + && Objects.equals(lastRefreshTime, that.lastRefreshTime) + && Objects.equals(lastModifiedTime, that.lastModifiedTime) + && Objects.equals(additionalMetadata, that.additionalMetadata); + } + + @Override + public String toString() { + return new StringJoiner(",\n", MaterializedViewMetadataUnit.class.getSimpleName() + "[", "]") + .add("storagePlugin=" + storagePlugin) + .add("workspace=" + workspace) + .add("name=" + name) + .add("owner=" + owner) + .add("sql=" + sql) + .add("schema=" + schema) + .add("fields=" + fields) + .add("dataLocation=" + dataLocation) + .add("workspaceSchemaPath=" + workspaceSchemaPath) + .add("refreshStatus=" + refreshStatus) + .add("lastRefreshTime=" + lastRefreshTime) + .add("lastModifiedTime=" + lastModifiedTime) + .add("additionalMetadata=" + additionalMetadata) + .toString(); + } + + public static class Builder { + private String storagePlugin; + private String workspace; + private String name; + private String owner; + private String sql; + private String schema; + private String fields; + private String dataLocation; + private List workspaceSchemaPath; + private String refreshStatus; + private Long lastRefreshTime; + private Long lastModifiedTime; + private String additionalMetadata; + + public Builder storagePlugin(String storagePlugin) { + this.storagePlugin = storagePlugin; + return this; + } + + public Builder workspace(String workspace) { + this.workspace = workspace; + return this; + } + + public Builder name(String name) { + this.name = name; + return this; + } + + public Builder owner(String owner) { + this.owner = owner; + return this; + } + + public Builder sql(String sql) { + this.sql = sql; + return this; + } + + public Builder schema(String schema) { + this.schema = schema; + return this; + } + + public Builder fields(String fields) { + this.fields = fields; + return this; + } + + public Builder dataLocation(String dataLocation) { + this.dataLocation = dataLocation; + return this; + } + + public Builder workspaceSchemaPath(List workspaceSchemaPath) { + this.workspaceSchemaPath = workspaceSchemaPath; + return this; + } + + public Builder refreshStatus(String refreshStatus) { + this.refreshStatus = refreshStatus; + return this; + } + + public Builder lastRefreshTime(Long lastRefreshTime) { + this.lastRefreshTime = lastRefreshTime; + return this; + } + + public Builder lastModifiedTime(Long lastModifiedTime) { + this.lastModifiedTime = lastModifiedTime; + return this; + } + + public Builder additionalMetadata(String additionalMetadata) { + this.additionalMetadata = additionalMetadata; + return this; + } + + public MaterializedViewMetadataUnit build() { + return new MaterializedViewMetadataUnit(this); + } + } + + /** + * Contains schema metadata for MaterializedViewMetadataUnit. + * Provides method handlers for reflection-based field access. + */ + public static class Schema { + + private final List columns; + private final Map unitGetters; + private final Map unitBuilderSetters; + + private Schema(List columns, + Map unitGetters, + Map unitBuilderSetters) { + this.columns = columns; + this.unitGetters = unitGetters; + this.unitBuilderSetters = unitBuilderSetters; + } + + public static Schema of(Class unitClass, Class builderClass) { + List columns = new ArrayList<>(); + Map unitGetters = new HashMap<>(); + Map unitBuilderSetters = new HashMap<>(); + + MethodHandles.Lookup gettersLookup = MethodHandles.publicLookup().in(unitClass); + MethodHandles.Lookup settersLookup = MethodHandles.publicLookup().in(builderClass); + + for (Field field : unitClass.getDeclaredFields()) { + MetastoreFieldDefinition definition = field.getAnnotation(MetastoreFieldDefinition.class); + if (definition == null) { + continue; + } + + MetastoreColumn column = definition.column(); + columns.add(column); + + Class type = field.getType(); + try { + String fieldName = field.getName(); + String columnName = column.columnName(); + MethodHandle getter = gettersLookup.findVirtual(unitClass, fieldName, MethodType.methodType(type)); + unitGetters.put(columnName, getter); + MethodHandle setter = settersLookup.findVirtual(builderClass, fieldName, MethodType.methodType(builderClass, type)); + unitBuilderSetters.put(columnName, setter); + } catch (ReflectiveOperationException e) { + throw new MetastoreException(String.format("Unable to init unit setter / getter method handlers " + + "for unit [%s] and its builder [%s] classes", unitClass.getSimpleName(), builderClass.getSimpleName()), e); + } + } + + return new Schema(columns, unitGetters, unitBuilderSetters); + } + + public List columns() { + return columns; + } + + public Map unitGetters() { + return unitGetters; + } + + public Map unitBuilderSetters() { + return unitBuilderSetters; + } + } +} diff --git a/metastore/metastore-api/src/main/java/org/apache/drill/metastore/components/materializedviews/MaterializedViews.java b/metastore/metastore-api/src/main/java/org/apache/drill/metastore/components/materializedviews/MaterializedViews.java new file mode 100644 index 00000000000..49b9194a450 --- /dev/null +++ b/metastore/metastore-api/src/main/java/org/apache/drill/metastore/components/materializedviews/MaterializedViews.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.metastore.components.materializedviews; + +import org.apache.drill.metastore.operate.Operations; + +/** + * Metastore Materialized Views component implementation which allows + * reading and writing materialized view metadata. + *

+ * Materialized views store pre-computed query results for performance optimization. + * This component tracks MV definitions, refresh status, and data locations. + */ +public interface MaterializedViews extends Operations { + + /** + * @return new basic materialized views requests instance that provides methods + * to make common calls to Metastore MaterializedViews + */ + default BasicMaterializedViewsRequests basicRequests() { + return new BasicMaterializedViewsRequests(this); + } +} diff --git a/metastore/metastore-api/src/main/java/org/apache/drill/metastore/components/materializedviews/MaterializedViewsMetadataTypeValidator.java b/metastore/metastore-api/src/main/java/org/apache/drill/metastore/components/materializedviews/MaterializedViewsMetadataTypeValidator.java new file mode 100644 index 00000000000..a398ebf2d63 --- /dev/null +++ b/metastore/metastore-api/src/main/java/org/apache/drill/metastore/components/materializedviews/MaterializedViewsMetadataTypeValidator.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.metastore.components.materializedviews; + +import org.apache.drill.metastore.metadata.MetadataType; +import org.apache.drill.metastore.operate.MetadataTypeValidator; + +import java.util.Collections; +import java.util.Set; + +/** + * Provides metadata type and columns validation for Materialized Views component. + */ +public class MaterializedViewsMetadataTypeValidator implements MetadataTypeValidator { + + public static final MaterializedViewsMetadataTypeValidator INSTANCE = + new MaterializedViewsMetadataTypeValidator(); + + @Override + public Set supportedMetadataTypes() { + return Collections.singleton(MetadataType.MATERIALIZED_VIEW); + } +} diff --git a/metastore/metastore-api/src/main/java/org/apache/drill/metastore/metadata/MetadataType.java b/metastore/metastore-api/src/main/java/org/apache/drill/metastore/metadata/MetadataType.java index 5ee6b9221c7..430234aa0d1 100644 --- a/metastore/metastore-api/src/main/java/org/apache/drill/metastore/metadata/MetadataType.java +++ b/metastore/metastore-api/src/main/java/org/apache/drill/metastore/metadata/MetadataType.java @@ -64,7 +64,12 @@ public enum MetadataType { /** * Metadata type which belongs to views. */ - VIEW(-1); + VIEW(-1), + + /** + * Metadata type which belongs to materialized views. + */ + MATERIALIZED_VIEW(-2); /** * Level of this metadata type compared to other metadata types. diff --git a/metastore/mongo-metastore/src/main/java/org/apache/drill/metastore/mongo/MongoMetastore.java b/metastore/mongo-metastore/src/main/java/org/apache/drill/metastore/mongo/MongoMetastore.java index 955ac85b123..08a2cb42d4f 100644 --- a/metastore/mongo-metastore/src/main/java/org/apache/drill/metastore/mongo/MongoMetastore.java +++ b/metastore/mongo-metastore/src/main/java/org/apache/drill/metastore/mongo/MongoMetastore.java @@ -22,6 +22,7 @@ import com.mongodb.client.MongoClients; import org.apache.drill.common.config.DrillConfig; import org.apache.drill.metastore.Metastore; +import org.apache.drill.metastore.components.materializedviews.MaterializedViews; import org.apache.drill.metastore.components.tables.Tables; import org.apache.drill.metastore.components.views.Views; import org.apache.drill.metastore.mongo.components.tables.MongoTables; @@ -62,6 +63,11 @@ public Views views() { throw new UnsupportedOperationException("Views metadata support is not implemented"); } + @Override + public MaterializedViews materializedViews() { + throw new UnsupportedOperationException("Materialized views metadata support is not implemented for Mongo metastore"); + } + @Override public void close() { if (this.client != null) { diff --git a/metastore/rdbms-metastore/src/main/java/org/apache/drill/metastore/rdbms/RdbmsMetastore.java b/metastore/rdbms-metastore/src/main/java/org/apache/drill/metastore/rdbms/RdbmsMetastore.java index a8f13552823..4b5ae15e26f 100644 --- a/metastore/rdbms-metastore/src/main/java/org/apache/drill/metastore/rdbms/RdbmsMetastore.java +++ b/metastore/rdbms-metastore/src/main/java/org/apache/drill/metastore/rdbms/RdbmsMetastore.java @@ -27,6 +27,7 @@ import liquibase.resource.ClassLoaderResourceAccessor; import org.apache.drill.common.config.DrillConfig; import org.apache.drill.metastore.Metastore; +import org.apache.drill.metastore.components.materializedviews.MaterializedViews; import org.apache.drill.metastore.components.tables.Tables; import org.apache.drill.metastore.components.views.Views; import org.apache.drill.metastore.rdbms.components.tables.RdbmsTables; @@ -68,6 +69,11 @@ public Views views() { throw new UnsupportedOperationException("Views metadata support is not implemented"); } + @Override + public MaterializedViews materializedViews() { + throw new UnsupportedOperationException("Materialized views metadata support is not implemented for RDBMS metastore"); + } + @Override public void close() { executorProvider.close(); From f049518a4a68e5fb5f7867ed1d105d040e11dcc2 Mon Sep 17 00:00:00 2001 From: cgivre Date: Mon, 2 Feb 2026 12:46:02 -0500 Subject: [PATCH 4/8] Add to INFO schema --- docs/dev/MaterializedViews.md | 46 ++++++++++++++++++- .../store/ischema/InfoSchemaConstants.java | 5 ++ .../ischema/InfoSchemaRecordGenerator.java | 17 +++++++ .../exec/store/ischema/InfoSchemaTable.java | 27 +++++++++++ .../store/ischema/InfoSchemaTableType.java | 2 + .../exec/store/ischema/RecordCollector.java | 43 +++++++++++++++++ .../drill/exec/store/ischema/Records.java | 25 ++++++++++ 7 files changed, 163 insertions(+), 2 deletions(-) diff --git a/docs/dev/MaterializedViews.md b/docs/dev/MaterializedViews.md index c98fb68908b..b9bd2b9b73d 100644 --- a/docs/dev/MaterializedViews.md +++ b/docs/dev/MaterializedViews.md @@ -260,6 +260,50 @@ The MaterializedViewMetadataUnit stored in the metastore contains: 2. Delete the data directory and all contents 3. Remove metadata from metastore (if enabled) +## INFORMATION_SCHEMA Integration + +Materialized views are exposed in the `INFORMATION_SCHEMA.MATERIALIZED_VIEWS` table. This allows users to query metadata about all materialized views in the system. + +### Querying Materialized Views Metadata + +```sql +-- List all materialized views +SELECT * FROM INFORMATION_SCHEMA.MATERIALIZED_VIEWS; + +-- List materialized views in a specific schema +SELECT TABLE_NAME, REFRESH_STATUS, LAST_REFRESH_TIME +FROM INFORMATION_SCHEMA.MATERIALIZED_VIEWS +WHERE TABLE_SCHEMA = 'dfs.tmp'; + +-- Find materialized views that need refresh +SELECT TABLE_SCHEMA, TABLE_NAME +FROM INFORMATION_SCHEMA.MATERIALIZED_VIEWS +WHERE REFRESH_STATUS = 'PENDING' OR REFRESH_STATUS IS NULL; +``` + +### MATERIALIZED_VIEWS Table Columns + +| Column | Type | Description | +|--------|------|-------------| +| TABLE_CATALOG | VARCHAR | Catalog name (always "DRILL") | +| TABLE_SCHEMA | VARCHAR | Schema name (e.g., "dfs.tmp") | +| TABLE_NAME | VARCHAR | Materialized view name | +| VIEW_DEFINITION | VARCHAR | SQL statement that defines the materialized view | +| REFRESH_STATUS | VARCHAR | Current status: "PENDING" or "COMPLETE" | +| LAST_REFRESH_TIME | TIMESTAMP | When the materialized view was last refreshed | +| DATA_LOCATION | VARCHAR | File system path to the stored data | + +### TABLES Integration + +Materialized views also appear in `INFORMATION_SCHEMA.TABLES` with `TABLE_TYPE = 'MATERIALIZED VIEW'`: + +```sql +-- List all materialized views via TABLES +SELECT TABLE_SCHEMA, TABLE_NAME +FROM INFORMATION_SCHEMA.TABLES +WHERE TABLE_TYPE = 'MATERIALIZED VIEW'; +``` + ## Limitations Current limitations of the materialized view implementation: @@ -342,5 +386,3 @@ Planned improvements for future releases: 4. **Cost-Based Selection**: When multiple MVs match, select based on estimated query cost. 5. **Staleness Tracking**: Track source table changes to identify stale materialized views. - -6. **INFORMATION_SCHEMA Integration**: Expose materialized views in INFORMATION_SCHEMA tables. diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaConstants.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaConstants.java index f4370f98651..ee7cd98115e 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaConstants.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaConstants.java @@ -66,6 +66,11 @@ public interface InfoSchemaConstants { // Remaining VIEWS column names: String VIEWS_COL_VIEW_DEFINITION = "VIEW_DEFINITION"; + // MATERIALIZED_VIEWS column names: + String MVIEWS_COL_REFRESH_STATUS = "REFRESH_STATUS"; + String MVIEWS_COL_LAST_REFRESH_TIME = "LAST_REFRESH_TIME"; + String MVIEWS_COL_DATA_LOCATION = "DATA_LOCATION"; + // Remaining COLUMNS column names: String COLS_COL_COLUMN_NAME = "COLUMN_NAME"; String COLS_COL_ORDINAL_POSITION = "ORDINAL_POSITION"; diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaRecordGenerator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaRecordGenerator.java index 28336370078..2bc551406f3 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaRecordGenerator.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaRecordGenerator.java @@ -176,6 +176,23 @@ protected List collect(RecordCollector recordCollector, String sch } } + public static class MaterializedViews extends InfoSchemaRecordGenerator { + + public MaterializedViews(FilterEvaluator filterEvaluator) { + super(filterEvaluator); + } + + @Override + public PojoRecordReader getRecordReader() { + return new PojoRecordReader<>(Records.MaterializedView.class, records); + } + + @Override + protected List collect(RecordCollector recordCollector, String schemaPath, SchemaPlus schema) { + return recordCollector.materializedViews(schemaPath, schema); + } + } + public static class Columns extends InfoSchemaRecordGenerator { public Columns(FilterEvaluator filterEvaluator) { diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaTable.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaTable.java index f2228ed066c..23ea73e1fef 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaTable.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaTable.java @@ -86,6 +86,9 @@ import static org.apache.drill.exec.store.ischema.InfoSchemaConstants.TBLS_COL_TABLE_SOURCE; import static org.apache.drill.exec.store.ischema.InfoSchemaConstants.TBLS_COL_TABLE_TYPE; import static org.apache.drill.exec.store.ischema.InfoSchemaConstants.VIEWS_COL_VIEW_DEFINITION; +import static org.apache.drill.exec.store.ischema.InfoSchemaConstants.MVIEWS_COL_REFRESH_STATUS; +import static org.apache.drill.exec.store.ischema.InfoSchemaConstants.MVIEWS_COL_LAST_REFRESH_TIME; +import static org.apache.drill.exec.store.ischema.InfoSchemaConstants.MVIEWS_COL_DATA_LOCATION; /** * Base class for tables in INFORMATION_SCHEMA. Defines the table (fields and types). @@ -228,6 +231,30 @@ public InfoSchemaRecordGenerator getRecordGenerator(FilterEvaluato } } + /** + * Layout for the MATERIALIZED_VIEWS table. + */ + public static class MaterializedViews extends InfoSchemaTable { + + private static final List fields = Arrays.asList( + Field.create(SHRD_COL_TABLE_CATALOG, VARCHAR), + Field.create(SHRD_COL_TABLE_SCHEMA, VARCHAR), + Field.create(SHRD_COL_TABLE_NAME, VARCHAR), + Field.create(VIEWS_COL_VIEW_DEFINITION, VARCHAR), + Field.create(MVIEWS_COL_REFRESH_STATUS, VARCHAR), + Field.create(MVIEWS_COL_LAST_REFRESH_TIME, TIMESTAMP), + Field.create(MVIEWS_COL_DATA_LOCATION, VARCHAR)); + + public MaterializedViews() { + super(fields); + } + + @Override + public InfoSchemaRecordGenerator getRecordGenerator(FilterEvaluator filterEvaluator) { + return new InfoSchemaRecordGenerator.MaterializedViews(filterEvaluator); + } + } + /** * Layout for the COLUMNS table. */ diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaTableType.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaTableType.java index f08e380e5b4..a9cf568eedf 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaTableType.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaTableType.java @@ -26,6 +26,7 @@ import org.apache.drill.exec.store.ischema.InfoSchemaTable.Catalogs; import org.apache.drill.exec.store.ischema.InfoSchemaTable.Columns; import org.apache.drill.exec.store.ischema.InfoSchemaTable.Files; +import org.apache.drill.exec.store.ischema.InfoSchemaTable.MaterializedViews; import org.apache.drill.exec.store.ischema.InfoSchemaTable.Partitions; import org.apache.drill.exec.store.ischema.InfoSchemaTable.Schemata; import org.apache.drill.exec.store.ischema.InfoSchemaTable.Tables; @@ -46,6 +47,7 @@ public enum InfoSchemaTableType implements DrillTableSelection { CATALOGS(new Catalogs()), SCHEMATA(new Schemata()), VIEWS(new Views()), + MATERIALIZED_VIEWS(new MaterializedViews()), COLUMNS(new Columns()), TABLES(new Tables()), PARTITIONS(new Partitions()), diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/RecordCollector.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/RecordCollector.java index d1e09eebf73..27c69270125 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/RecordCollector.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/RecordCollector.java @@ -27,6 +27,7 @@ import org.apache.commons.lang3.tuple.Pair; import org.apache.drill.common.expression.SchemaPath; import org.apache.drill.exec.ExecConstants; +import org.apache.drill.exec.planner.logical.DrillMaterializedViewTable; import org.apache.drill.exec.planner.logical.DrillViewInfoProvider; import org.apache.drill.exec.record.metadata.TupleMetadata; import org.apache.drill.exec.server.options.OptionManager; @@ -103,6 +104,15 @@ public interface RecordCollector { */ List views(String schemaPath, SchemaPlus schema); + /** + * Collects materialized views data for information_schema. + * + * @param schemaPath schema name + * @param schema schema instance + * @return list of materialized view records + */ + List materializedViews(String schemaPath, SchemaPlus schema); + /** * Collects columns data for information_schema. * @@ -181,6 +191,33 @@ public List views(String schemaPath, SchemaPlus schema) { .collect(Collectors.toList()); } + @Override + public List materializedViews(String schemaPath, SchemaPlus schema) { + AbstractSchema drillSchema = schema.unwrap(AbstractSchema.class); + + return drillSchema.getTablesByNames(schema.getTableNames()).stream() + .filter(pair -> pair.getValue().getJdbcTableType() == Schema.TableType.MATERIALIZED_VIEW) + .filter(pair -> filterEvaluator.shouldVisitTable(schemaPath, pair.getKey(), pair.getValue().getJdbcTableType())) + .map(pair -> { + Table table = pair.getValue(); + String viewSql = table instanceof DrillViewInfoProvider + ? ((DrillViewInfoProvider) table).getViewSql() : ""; + String refreshStatus = null; + Long lastRefreshTime = null; + String dataLocation = null; + if (table instanceof DrillMaterializedViewTable) { + DrillMaterializedViewTable mvTable = (DrillMaterializedViewTable) table; + org.apache.drill.exec.dotdrill.MaterializedView mv = mvTable.getMaterializedView(); + refreshStatus = mv.getRefreshStatus() != null ? mv.getRefreshStatus().name() : null; + lastRefreshTime = mv.getLastRefreshTime(); + dataLocation = mvTable.getDataStoragePath(); + } + return new Records.MaterializedView(IS_CATALOG_NAME, schemaPath, pair.getKey(), + viewSql, refreshStatus, lastRefreshTime, dataLocation); + }) + .collect(Collectors.toList()); + } + @Override public List columns(String schemaPath, SchemaPlus schema) { AbstractSchema drillSchema = schema.unwrap(AbstractSchema.class); @@ -295,6 +332,12 @@ public List views(String schemaPath, SchemaPlus schema) { return Collections.emptyList(); } + @Override + public List materializedViews(String schemaPath, SchemaPlus schema) { + // TODO: Add metastore-based materialized views retrieval when metastore MV support is complete + return Collections.emptyList(); + } + @Override public List columns(String schemaPath, SchemaPlus schema) { AbstractSchema drillSchema = schema.unwrap(AbstractSchema.class); diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/Records.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/Records.java index 064f66b7256..0980f0822e9 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/Records.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/Records.java @@ -672,6 +672,31 @@ public View(String catalog, String schema, String name, String definition) { } } + /** + * Pojo object for a record in INFORMATION_SCHEMA.MATERIALIZED_VIEWS + */ + public static class MaterializedView { + + public final String TABLE_CATALOG; + public final String TABLE_SCHEMA; + public final String TABLE_NAME; + public final String VIEW_DEFINITION; + public final String REFRESH_STATUS; + public final Timestamp LAST_REFRESH_TIME; + public final String DATA_LOCATION; + + public MaterializedView(String catalog, String schema, String name, String definition, + String refreshStatus, Long lastRefreshTime, String dataLocation) { + this.TABLE_CATALOG = catalog; + this.TABLE_SCHEMA = schema; + this.TABLE_NAME = name; + this.VIEW_DEFINITION = definition; + this.REFRESH_STATUS = refreshStatus; + this.LAST_REFRESH_TIME = lastRefreshTime != null ? convertToTimestamp(lastRefreshTime) : null; + this.DATA_LOCATION = dataLocation; + } + } + /** * Pojo object for a record in INFORMATION_SCHEMA.CATALOGS */ From 55dc68647f40a7fdc2c2c00f84b04cd0916bc5fa Mon Sep 17 00:00:00 2001 From: cgivre Date: Mon, 2 Feb 2026 13:33:49 -0500 Subject: [PATCH 5/8] Fix Unit Tests --- .../apache/drill/exec/dotdrill/TestMaterializedView.java | 3 ++- .../planner/sql/parser/TestMaterializedViewSqlParser.java | 3 ++- .../java/org/apache/drill/exec/sql/TestInfoSchema.java | 1 + .../drill/exec/work/metadata/TestMetadataProvider.java | 8 +++++--- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/dotdrill/TestMaterializedView.java b/exec/java-exec/src/test/java/org/apache/drill/exec/dotdrill/TestMaterializedView.java index c82c2b8fbdb..9b4ef5f4b71 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/dotdrill/TestMaterializedView.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/dotdrill/TestMaterializedView.java @@ -29,6 +29,7 @@ import org.apache.calcite.sql.type.SqlTypeName; import org.apache.drill.categories.SqlTest; +import org.apache.drill.test.BaseTest; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -38,7 +39,7 @@ * Tests for MaterializedView data model serialization and deserialization. */ @Category(SqlTest.class) -public class TestMaterializedView { +public class TestMaterializedView extends BaseTest { private static final ObjectMapper mapper = new ObjectMapper(); diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/planner/sql/parser/TestMaterializedViewSqlParser.java b/exec/java-exec/src/test/java/org/apache/drill/exec/planner/sql/parser/TestMaterializedViewSqlParser.java index a57254d4bf9..c197fa90769 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/planner/sql/parser/TestMaterializedViewSqlParser.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/planner/sql/parser/TestMaterializedViewSqlParser.java @@ -28,6 +28,7 @@ import org.apache.drill.categories.SqlTest; import org.apache.drill.exec.planner.physical.PlannerSettings; import org.apache.drill.exec.planner.sql.parser.impl.DrillParserImpl; +import org.apache.drill.test.BaseTest; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -35,7 +36,7 @@ * Tests for parsing materialized view SQL statements. */ @Category(SqlTest.class) -public class TestMaterializedViewSqlParser { +public class TestMaterializedViewSqlParser extends BaseTest { private SqlParser.Config parserConfig() { return SqlParser.config() diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestInfoSchema.java b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestInfoSchema.java index ceda39eaeeb..ede48218b0a 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestInfoSchema.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestInfoSchema.java @@ -99,6 +99,7 @@ public void catalogs() throws Exception { public void showTablesFromDb() throws Exception { List expected = Arrays.asList( new String[]{"information_schema", "VIEWS"}, + new String[]{"information_schema", "MATERIALIZED_VIEWS"}, new String[]{"information_schema", "COLUMNS"}, new String[]{"information_schema", "TABLES"}, new String[]{"information_schema", "CATALOGS"}, diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/work/metadata/TestMetadataProvider.java b/exec/java-exec/src/test/java/org/apache/drill/exec/work/metadata/TestMetadataProvider.java index c0c742afe20..771be52a68c 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/work/metadata/TestMetadataProvider.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/work/metadata/TestMetadataProvider.java @@ -149,13 +149,14 @@ public void tables() throws Exception { assertEquals(RequestStatus.OK, resp.getStatus()); List tables = resp.getTablesList(); - assertEquals(22, tables.size()); + assertEquals(23, tables.size()); verifyTable("information_schema", "CATALOGS", tables); verifyTable("information_schema", "COLUMNS", tables); verifyTable("information_schema", "SCHEMATA", tables); verifyTable("information_schema", "TABLES", tables); verifyTable("information_schema", "VIEWS", tables); + verifyTable("information_schema", "MATERIALIZED_VIEWS", tables); verifyTable("information_schema", "PARTITIONS", tables); verifyTable("information_schema", "FILES", tables); //Verify System Tables @@ -183,13 +184,14 @@ public void tablesWithSystemTableFilter() throws Exception { assertEquals(RequestStatus.OK, resp.getStatus()); List tables = resp.getTablesList(); - assertEquals(22, tables.size()); + assertEquals(23, tables.size()); verifyTable("information_schema", "CATALOGS", tables); verifyTable("information_schema", "COLUMNS", tables); verifyTable("information_schema", "SCHEMATA", tables); verifyTable("information_schema", "TABLES", tables); verifyTable("information_schema", "VIEWS", tables); + verifyTable("information_schema", "MATERIALIZED_VIEWS", tables); verifyTable("information_schema", "PARTITIONS", tables); verifyTable("information_schema", "FILES", tables); //Verify System Tables @@ -242,7 +244,7 @@ public void columns() throws Exception { assertEquals(RequestStatus.OK, resp.getStatus()); List columns = resp.getColumnsList(); - assertEquals(172, columns.size()); + assertEquals(179, columns.size()); // too many records to verify the output. } From cea0ca4888df53f9bbaf4a36839f370d3d3dddd3 Mon Sep 17 00:00:00 2001 From: cgivre Date: Mon, 2 Feb 2026 18:00:47 -0500 Subject: [PATCH 6/8] Fixed JDBC tests --- .../src/test/java/org/apache/drill/jdbc/test/TestJdbcQuery.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestJdbcQuery.java b/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestJdbcQuery.java index 75704a746a1..415ea61bdd8 100644 --- a/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestJdbcQuery.java +++ b/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestJdbcQuery.java @@ -129,6 +129,7 @@ public void testLikeNotLike() throws Exception{ .sql("SELECT TABLE_NAME, COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS " + "WHERE TABLE_NAME NOT LIKE 'C%' AND COLUMN_NAME LIKE 'TABLE_%E'") .returns( + "TABLE_NAME=MATERIALIZED_VIEWS; COLUMN_NAME=TABLE_NAME\n" + "TABLE_NAME=PARTITIONS; COLUMN_NAME=TABLE_NAME\n" + "TABLE_NAME=TABLES; COLUMN_NAME=TABLE_NAME\n" + "TABLE_NAME=TABLES; COLUMN_NAME=TABLE_TYPE\n" + @@ -143,6 +144,7 @@ public void testSimilarNotSimilar() throws Exception{ .sql("SELECT TABLE_NAME FROM INFORMATION_SCHEMA.`TABLES` "+ "WHERE TABLE_NAME SIMILAR TO '%(H|I)E%' AND TABLE_NAME NOT SIMILAR TO 'C%' ORDER BY TABLE_NAME") .returns( + "TABLE_NAME=MATERIALIZED_VIEWS\n" + "TABLE_NAME=SCHEMATA\n" + "TABLE_NAME=VIEWS\n" ); From 78c290a1302738262d4d4d41fca740ce8e5f70f9 Mon Sep 17 00:00:00 2001 From: cgivre Date: Mon, 2 Feb 2026 20:32:13 -0500 Subject: [PATCH 7/8] Fixed final unit test --- .../drill/exec/store/drill/plugin/DrillPluginQueriesTest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/contrib/storage-drill/src/test/java/org/apache/drill/exec/store/drill/plugin/DrillPluginQueriesTest.java b/contrib/storage-drill/src/test/java/org/apache/drill/exec/store/drill/plugin/DrillPluginQueriesTest.java index 2bef81998dc..d8f586e7a2c 100644 --- a/contrib/storage-drill/src/test/java/org/apache/drill/exec/store/drill/plugin/DrillPluginQueriesTest.java +++ b/contrib/storage-drill/src/test/java/org/apache/drill/exec/store/drill/plugin/DrillPluginQueriesTest.java @@ -99,6 +99,7 @@ public void testShowTables() throws Exception { .baselineValues("drill.information_schema", "FILES") .baselineValues("drill.information_schema", "SCHEMATA") .baselineValues("drill.information_schema", "TABLES") + .baselineValues("drill.information_schema", "MATERIALIZED_VIEWS") .go(); } From b00984513a397b5884f8cf54673703c0359bde28 Mon Sep 17 00:00:00 2001 From: cgivre Date: Sat, 7 Feb 2026 22:23:03 -0500 Subject: [PATCH 8/8] Addressed Review Comments --- docs/dev/MaterializedViews.md | 8 ++--- .../sql/handlers/MaterializedViewHandler.java | 21 +++++++++++-- .../drill/exec/store/AbstractSchema.java | 18 +++++++++-- .../store/dfs/WorkspaceSchemaFactory.java | 31 +++++++++++++++++-- 4 files changed, 66 insertions(+), 12 deletions(-) diff --git a/docs/dev/MaterializedViews.md b/docs/dev/MaterializedViews.md index b9bd2b9b73d..ee262642af2 100644 --- a/docs/dev/MaterializedViews.md +++ b/docs/dev/MaterializedViews.md @@ -154,7 +154,7 @@ The definition file contains: - Workspace schema path - Data storage path - Last refresh timestamp -- Refresh status (PENDING or COMPLETE) +- Refresh status (INCOMPLETE or COMPLETE) Example definition file structure: @@ -225,7 +225,7 @@ The MaterializedViewMetadataUnit stored in the metastore contains: | sql | String | Defining SQL statement | | workspaceSchemaPath | List | Schema path components | | dataLocation | String | Path to data directory | -| refreshStatus | String | PENDING or COMPLETE | +| refreshStatus | String | INCOMPLETE or COMPLETE | | lastRefreshTime | Long | Timestamp of last refresh | | lastModifiedTime | Long | Timestamp of last modification | @@ -278,7 +278,7 @@ WHERE TABLE_SCHEMA = 'dfs.tmp'; -- Find materialized views that need refresh SELECT TABLE_SCHEMA, TABLE_NAME FROM INFORMATION_SCHEMA.MATERIALIZED_VIEWS -WHERE REFRESH_STATUS = 'PENDING' OR REFRESH_STATUS IS NULL; +WHERE REFRESH_STATUS = 'INCOMPLETE' OR REFRESH_STATUS IS NULL; ``` ### MATERIALIZED_VIEWS Table Columns @@ -289,7 +289,7 @@ WHERE REFRESH_STATUS = 'PENDING' OR REFRESH_STATUS IS NULL; | TABLE_SCHEMA | VARCHAR | Schema name (e.g., "dfs.tmp") | | TABLE_NAME | VARCHAR | Materialized view name | | VIEW_DEFINITION | VARCHAR | SQL statement that defines the materialized view | -| REFRESH_STATUS | VARCHAR | Current status: "PENDING" or "COMPLETE" | +| REFRESH_STATUS | VARCHAR | Current status: "INCOMPLETE" or "COMPLETE" | | LAST_REFRESH_TIME | TIMESTAMP | When the materialized view was last refreshed | | DATA_LOCATION | VARCHAR | File system path to the stored data | diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/MaterializedViewHandler.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/MaterializedViewHandler.java index 911921a13d3..613a93ab9e3 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/MaterializedViewHandler.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/MaterializedViewHandler.java @@ -208,13 +208,22 @@ public PhysicalPlan getPlan(SqlNode sqlNode) throws IOException, ForemanSetupExc final Table viewToDrop = SqlHandlerUtil.getTableFromSchema(drillSchema, viewName); + // Check if the table exists and is actually a materialized view + final boolean isMaterializedView = viewToDrop != null && + viewToDrop.getJdbcTableType() == Schema.TableType.MATERIALIZED_VIEW; + if (dropMV.checkViewExistence()) { - if (viewToDrop == null) { + if (viewToDrop == null || !isMaterializedView) { return DirectPlan.createDirectPlan(context, false, String.format("Materialized view [%s] not found in schema [%s].", viewName, schemaPath)); } } else { - if (viewToDrop == null) { + if (viewToDrop == null || !isMaterializedView) { + if (viewToDrop != null) { + throw UserException.validationError() + .message("[%s] is not a materialized view in schema [%s].", viewName, schemaPath) + .build(logger); + } throw UserException.validationError() .message("Unknown materialized view [%s] in schema [%s].", viewName, schemaPath) .build(logger); @@ -259,7 +268,7 @@ public PhysicalPlan getPlan(SqlNode sqlNode) throws ValidationException, RelConv .build(logger); } - // Clear existing data directory + // Clear existing data directory and mark INCOMPLETE while refresh is in progress drillSchema.refreshMaterializedView(viewName); // Parse and validate the MV's SQL definition @@ -282,6 +291,12 @@ public PhysicalPlan getPlan(SqlNode sqlNode) throws ValidationException, RelConv PhysicalOperator pop = convertToPop(prel); PhysicalPlan plan = convertToPlan(pop, queryRelNode); + // Mark COMPLETE after the plan is successfully created. + // TODO: Ideally this should be called after plan execution completes + // via a post-execution callback, so that the status is only COMPLETE + // once data files are fully written. + drillSchema.completeMaterializedViewRefresh(viewName); + logger.info("Refreshing materialized view [{}] in schema [{}]", viewName, schemaPath); return plan; diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/AbstractSchema.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/AbstractSchema.java index ea871f5ecad..72dc44656a1 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/AbstractSchema.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/AbstractSchema.java @@ -202,8 +202,9 @@ public void dropMaterializedView(String viewName) throws IOException { } /** - * Refresh the materialized view with given name by re-executing its query - * and replacing the stored data. + * Prepare the materialized view for refresh by clearing existing data + * and marking it as INCOMPLETE. After data is written, call + * {@link #completeMaterializedViewRefresh(String)} to mark COMPLETE. * * @param viewName materialized view name * @throws IOException in case of error refreshing the materialized view @@ -214,6 +215,19 @@ public void refreshMaterializedView(String viewName) throws IOException { .build(logger); } + /** + * Mark the materialized view as COMPLETE after data has been fully written. + * This should be called after the refresh data write has finished successfully. + * + * @param viewName materialized view name + * @throws IOException in case of error updating the materialized view + */ + public void completeMaterializedViewRefresh(String viewName) throws IOException { + throw UserException.unsupportedError() + .message("Completing materialized view refresh is not supported in schema [%s]", getSchemaPath()) + .build(logger); + } + /** * Get the materialized view with the given name. * diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java index 398e0d426e1..78cb4bdef42 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java @@ -441,10 +441,11 @@ public void refreshMaterializedView(String viewName) throws IOException { schemaConfig.getOption(ExecConstants.NEW_VIEW_DEFAULT_PERMS_KEY).string_val); getFS().mkdirs(dataPath, dirPerms); - // Update the materialized view with new refresh time + // Mark as INCOMPLETE while data is being refreshed. + // completeMaterializedViewRefresh() should be called after data is fully written. MaterializedView updatedMV = mv.withRefreshInfo( - System.currentTimeMillis(), - MaterializedView.RefreshStatus.COMPLETE); + mv.getLastRefreshTime(), + MaterializedView.RefreshStatus.INCOMPLETE); // Write the updated definition file Path viewPath = getMaterializedViewPath(viewName); @@ -458,6 +459,30 @@ public void refreshMaterializedView(String viewName) throws IOException { syncMaterializedViewToMetastore(updatedMV); } + @Override + public void completeMaterializedViewRefresh(String viewName) throws IOException { + MaterializedView mv = getMaterializedView(viewName); + if (mv == null) { + throw UserException.validationError() + .message("Materialized view [%s] not found in schema [%s]", viewName, getFullSchemaName()) + .build(logger); + } + + // Mark as COMPLETE with current timestamp now that data is fully written + MaterializedView updatedMV = mv.withRefreshInfo( + System.currentTimeMillis(), + MaterializedView.RefreshStatus.COMPLETE); + + Path viewPath = getMaterializedViewPath(viewName); + final FsPermission viewPerms = new FsPermission( + schemaConfig.getOption(ExecConstants.NEW_VIEW_DEFAULT_PERMS_KEY).string_val); + try (OutputStream stream = DrillFileSystem.create(getFS(), viewPath, viewPerms)) { + mapper.writeValue(stream, updatedMV); + } + + syncMaterializedViewToMetastore(updatedMV); + } + @Override public CreateTableEntry createMaterializedViewDataWriter(String viewName) { // Use Parquet format for storing materialized view data