diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/AsyncIndexUpdate.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/AsyncIndexUpdate.java index d663b831dc2..73d7746d31f 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/AsyncIndexUpdate.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/AsyncIndexUpdate.java @@ -18,10 +18,10 @@ */ package org.apache.jackrabbit.oak.plugins.index; -import static org.apache.jackrabbit.oak.commons.conditions.Validate.checkArgument; import static java.util.Objects.requireNonNull; import static org.apache.jackrabbit.oak.api.jmx.IndexStatsMBean.STATUS_DONE; import static org.apache.jackrabbit.oak.commons.PathUtils.elements; +import static org.apache.jackrabbit.oak.commons.conditions.Validate.checkArgument; import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.ASYNC_PROPERTY_NAME; import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.REINDEX_PROPERTY_NAME; import static org.apache.jackrabbit.oak.plugins.memory.EmptyNodeState.MISSING_NODE; @@ -30,6 +30,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Calendar; +import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.List; @@ -50,7 +51,6 @@ import javax.management.openmbean.SimpleType; import javax.management.openmbean.TabularData; -import com.codahale.metrics.MetricRegistry; import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.jackrabbit.api.stats.TimeSeries; import org.apache.jackrabbit.oak.api.CommitFailedException; @@ -65,10 +65,13 @@ import org.apache.jackrabbit.oak.plugins.commit.ConflictValidatorProvider; import org.apache.jackrabbit.oak.plugins.index.IndexUpdate.MissingIndexProviderStrategy; import org.apache.jackrabbit.oak.plugins.index.TrackingCorruptIndexHandler.CorruptIndexInfo; +import org.apache.jackrabbit.oak.plugins.index.optimizer.DiffIndexUpdater; +import org.apache.jackrabbit.oak.plugins.index.optimizer.IndexDefinitionGenerator; import org.apache.jackrabbit.oak.plugins.index.progress.MetricRateEstimator; import org.apache.jackrabbit.oak.plugins.index.progress.NodeCounterMBeanEstimator; import org.apache.jackrabbit.oak.plugins.memory.PropertyStates; import org.apache.jackrabbit.oak.plugins.metric.MetricStatisticsProvider; +import org.apache.jackrabbit.oak.query.stats.QueryStatsMBean; import org.apache.jackrabbit.oak.spi.commit.CommitContext; import org.apache.jackrabbit.oak.spi.commit.CommitHook; import org.apache.jackrabbit.oak.spi.commit.CommitInfo; @@ -77,6 +80,7 @@ import org.apache.jackrabbit.oak.spi.commit.EditorDiff; import org.apache.jackrabbit.oak.spi.commit.EditorHook; import org.apache.jackrabbit.oak.spi.commit.EditorProvider; +import org.apache.jackrabbit.oak.spi.commit.EmptyHook; import org.apache.jackrabbit.oak.spi.commit.ResetCommitAttributeHook; import org.apache.jackrabbit.oak.spi.commit.SimpleCommitContext; import org.apache.jackrabbit.oak.spi.commit.ValidatorProvider; @@ -86,6 +90,7 @@ import org.apache.jackrabbit.oak.spi.state.NodeState; import org.apache.jackrabbit.oak.spi.state.NodeStateDiff; import org.apache.jackrabbit.oak.spi.state.NodeStore; +import org.apache.jackrabbit.oak.spi.whiteboard.Tracker; import org.apache.jackrabbit.oak.stats.CounterStats; import org.apache.jackrabbit.oak.stats.Counting; import org.apache.jackrabbit.oak.stats.HistogramStats; @@ -100,6 +105,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.codahale.metrics.MetricRegistry; + public class AsyncIndexUpdate implements Runnable, Closeable { /** * Name of service property which determines the name of Async task @@ -214,13 +221,22 @@ public class AsyncIndexUpdate implements Runnable, Closeable { private final StatisticsProvider statisticsProvider; + private final Tracker statsTracker; + public AsyncIndexUpdate(@NotNull String name, @NotNull NodeStore store, @NotNull IndexEditorProvider provider, boolean switchOnSync) { - this(name, store, provider, StatisticsProvider.NOOP, switchOnSync); + this(name, store, provider, StatisticsProvider.NOOP, switchOnSync, null); } public AsyncIndexUpdate(@NotNull String name, @NotNull NodeStore store, - @NotNull IndexEditorProvider provider, StatisticsProvider statsProvider, boolean switchOnSync) { + @NotNull IndexEditorProvider provider, StatisticsProvider statsProvider, + boolean switchOnSync) { + this(name, store, provider, statsProvider, switchOnSync, null); + } + + public AsyncIndexUpdate(@NotNull String name, @NotNull NodeStore store, + @NotNull IndexEditorProvider provider, StatisticsProvider statsProvider, + boolean switchOnSync, @Nullable Tracker statsTracker) { this.name = checkValidName(name); this.lastIndexedTo = lastIndexedTo(name); this.store = requireNonNull(store); @@ -230,6 +246,7 @@ public AsyncIndexUpdate(@NotNull String name, @NotNull NodeStore store, this.statisticsProvider = statsProvider; this.indexStats = new AsyncIndexStats(name, statsProvider); this.corruptIndexHandler.setMeterStats(statsProvider.getMeter(TrackingCorruptIndexHandler.CORRUPT_INDEX_METER_NAME, StatsOptions.METRICS_ONLY)); + this.statsTracker = statsTracker; } public AsyncIndexUpdate(@NotNull String name, @NotNull NodeStore store, @@ -515,6 +532,10 @@ private void runWhenPermitted() { } } + if (name.equals("async")) { + improveIndexes(store); + } + // start collecting runtime statistics preAsyncRunStatsStats(indexStats); @@ -632,6 +653,80 @@ private void runWhenPermitted() { } } + private void improveIndexes(NodeStore store) { + NodeState rootState = store.getRoot(); + NodeBuilder builder = rootState.builder(); + if (statsTracker == null) { + return; + } + if (!rootState.hasChildNode("oak:index")) { + return; + } + if (!rootState.getChildNode("oak:index").hasChildNode("diff.index")) { + return; + } + List list = statsTracker.getServices(); + if (list.isEmpty()) { + return; + } + QueryStatsMBean stats = list.get(0); + if (stats == null) { + return; + } + TabularData slow = stats.getSlowQueries(); + + @SuppressWarnings("unchecked") + Collection coll = new ArrayList<>((Collection) slow.values()); + + // Find inefficient queries and add to collection for index diff generation + coll.addAll(findInefficientQueries(stats)); + + if (coll.isEmpty()) { + return; + } + boolean changed = false; + for (CompositeData cd : coll) { + String language = (String) cd.get("language"); + String statement = (String) cd.get("statement"); + if (statement.startsWith("explain") || statement.indexOf("/* oak-internal */") >= 0) { + continue; + } + log.info("language {} statement {}", language, statement); + String indexDef = IndexDefinitionGenerator.generateIndexDefinition(language, statement); + changed |= DiffIndexUpdater.applyIndexDefinition(store, rootState, builder, indexDef, statement); + } + if (changed) { + try { + store.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY); + } catch (CommitFailedException e) { + log.warn("Can not store indexes", e); + } + } + } + + @SuppressWarnings("unchecked") + private List findInefficientQueries(final QueryStatsMBean stats) { + final TabularData popularQueries = stats.getPopularQueries(); + final List inefficientQueries = new ArrayList<>(); + + for (CompositeData queryData : (Collection) popularQueries.values()) { + final Long rowsRead = (Long) queryData.get("rowsRead"); + final Long rowsScanned = (Long) queryData.get("rowsScanned"); + + int readEfficiency = 100; + + if (rowsScanned > 0) { + readEfficiency = (int) ((rowsRead * 100f) / rowsScanned); + } + + if (readEfficiency <= stats.getIndexOptimizerLimit()) { + inefficientQueries.add(queryData); + } + } + + return inefficientQueries; + } + private void clearLease() throws CommitFailedException { NodeState root = store.getRoot(); NodeState async = root.getChildNode(ASYNC); @@ -807,7 +902,7 @@ protected boolean updateIndex(NodeState before, String beforeCheckpoint, CommitInfo info = new CommitInfo(CommitInfo.OAK_UNKNOWN, CommitInfo.OAK_UNKNOWN, Map.of(IndexConstants.CHECKPOINT_CREATION_TIME, afterTime)); indexUpdate = - new IndexUpdate(provider, name, after, builder, callback, callback, info, corruptIndexHandler) + new IndexUpdate(provider, name, after, builder, callback, callback, info, corruptIndexHandler, store) .withMissingProviderStrategy(missingStrategy); configureRateEstimator(indexUpdate); CommitFailedException exception = diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/AsyncIndexerService.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/AsyncIndexerService.java index b5b36e2c6db..c1d6dc979b3 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/AsyncIndexerService.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/AsyncIndexerService.java @@ -18,6 +18,9 @@ */ package org.apache.jackrabbit.oak.plugins.index; +import static org.apache.jackrabbit.oak.commons.conditions.Validate.checkArgument; +import static org.apache.jackrabbit.oak.spi.whiteboard.WhiteboardUtils.registerMBean; + import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; @@ -31,11 +34,13 @@ import org.apache.jackrabbit.oak.plugins.index.property.jmx.PropertyIndexAsyncReindex; import org.apache.jackrabbit.oak.plugins.index.property.jmx.PropertyIndexAsyncReindexMBean; import org.apache.jackrabbit.oak.plugins.observation.ChangeCollectorProvider; +import org.apache.jackrabbit.oak.query.stats.QueryStatsMBean; import org.apache.jackrabbit.oak.spi.commit.ValidatorProvider; import org.apache.jackrabbit.oak.spi.state.Clusterable; import org.apache.jackrabbit.oak.spi.state.NodeStore; import org.apache.jackrabbit.oak.spi.whiteboard.CompositeRegistration; import org.apache.jackrabbit.oak.spi.whiteboard.Registration; +import org.apache.jackrabbit.oak.spi.whiteboard.Tracker; import org.apache.jackrabbit.oak.spi.whiteboard.Whiteboard; import org.apache.jackrabbit.oak.spi.whiteboard.WhiteboardExecutor; import org.apache.jackrabbit.oak.stats.StatisticsProvider; @@ -52,9 +57,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.jackrabbit.oak.commons.conditions.Validate.checkArgument; -import static org.apache.jackrabbit.oak.spi.whiteboard.WhiteboardUtils.registerMBean; - @Component( configurationPolicy = ConfigurationPolicy.REQUIRE, service = {}) @@ -116,6 +118,8 @@ public class AsyncIndexerService { private WhiteboardExecutor executor; + private Tracker statsTracker; + @Activate public void activate(BundleContext bundleContext, Configuration config) { List asyncIndexerConfig = getAsyncConfig(config.asyncConfigs()); @@ -124,12 +128,13 @@ public void activate(BundleContext bundleContext, Configuration config) { indexEditorProvider.start(whiteboard); executor = new WhiteboardExecutor(); executor.start(whiteboard); + statsTracker = whiteboard.track(QueryStatsMBean.class); TrackingCorruptIndexHandler corruptIndexHandler = createCorruptIndexHandler(config); for (AsyncConfig c : asyncIndexerConfig) { AsyncIndexUpdate task = new AsyncIndexUpdate(c.name, nodeStore, indexEditorProvider, - statisticsProvider, false); + statisticsProvider, false, statsTracker); task.setCorruptIndexHandler(corruptIndexHandler); task.setValidatorProviders(Collections.singletonList(validatorProvider)); @@ -158,7 +163,7 @@ public void activate(BundleContext bundleContext, Configuration config) { private void registerAsyncReindexSupport(Whiteboard whiteboard) { // async reindex String name = IndexConstants.ASYNC_REINDEX_VALUE; - AsyncIndexUpdate task = new AsyncIndexUpdate(name, nodeStore, indexEditorProvider, statisticsProvider, true); + AsyncIndexUpdate task = new AsyncIndexUpdate(name, nodeStore, indexEditorProvider, statisticsProvider, true, null); PropertyIndexAsyncReindex asyncPI = new PropertyIndexAsyncReindex(task, executor); final Registration reg = new CompositeRegistration( @@ -177,6 +182,9 @@ public void deactivate() throws IOException { executor.stop(); executor = null; } + if (statsTracker != null) { + statsTracker.stop(); + } //Close the task *after* unregistering the jobs closer.close(); diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/IndexInfoServiceImpl.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/IndexInfoServiceImpl.java index 5721f3a6d31..63a1f3edb15 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/IndexInfoServiceImpl.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/IndexInfoServiceImpl.java @@ -66,7 +66,7 @@ public Iterable getAllIndexInfo() { if (indexPathService.getMountInfoProvider().hasNonDefaultMounts()) { activeIndexes.addAll(IndexName.filterReplacedIndexes(allIndexes, nodeStore.getRoot(), true)); } else { - activeIndexes.addAll(allIndexes); + activeIndexes.addAll(IndexName.filterNewestIndexes(allIndexes, nodeStore.getRoot())); } return IterableUtils.filter(IterableUtils.transform(indexPathService.getIndexPaths(), indexPath -> { try { diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/IndexName.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/IndexName.java index 3597079d28b..7d8313c8e22 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/IndexName.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/IndexName.java @@ -234,6 +234,23 @@ public static Collection filterReplacedIndexes(Collection indexP return result; } + public static Collection filterNewestIndexes(Collection indexPaths, NodeState rootState) { + HashMap latestVersions = new HashMap<>(); + for (String p : indexPaths) { + IndexName indexName = IndexName.parse(p); + IndexName stored = latestVersions.get(indexName.baseName); + if (stored == null || stored.compareTo(indexName) < 0) { + // no old version, or old version is smaller: use + latestVersions.put(indexName.baseName, indexName); + } + } + ArrayList result = new ArrayList<>(latestVersions.size()); + for (IndexName n : latestVersions.values()) { + result.add(n.nodeName); + } + return result; + } + public String nextCustomizedName() { return baseName + "-" + productVersion + "-custom-" + (customerVersion + 1); } diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/IndexUpdate.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/IndexUpdate.java index e33bfe9eff7..cacf99e103c 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/IndexUpdate.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/IndexUpdate.java @@ -48,6 +48,8 @@ import org.apache.jackrabbit.oak.commons.collections.SetUtils; import org.apache.jackrabbit.oak.plugins.index.IndexCommitCallback.IndexProgress; import org.apache.jackrabbit.oak.plugins.index.NodeTraversalCallback.PathSource; +import org.apache.jackrabbit.oak.plugins.index.diff.DiffIndex; +import org.apache.jackrabbit.oak.plugins.index.diff.DiffIndexMerger; import org.apache.jackrabbit.oak.plugins.index.progress.IndexingProgressReporter; import org.apache.jackrabbit.oak.plugins.index.progress.NodeCountEstimator; import org.apache.jackrabbit.oak.plugins.index.progress.TraversalRateEstimator; @@ -60,6 +62,7 @@ import org.apache.jackrabbit.oak.spi.state.NodeBuilder; import org.apache.jackrabbit.oak.spi.state.NodeState; import org.apache.jackrabbit.oak.spi.state.NodeStateUtils; +import org.apache.jackrabbit.oak.spi.state.NodeStore; import org.apache.jackrabbit.oak.spi.state.ReadOnlyBuilder; import org.apache.jackrabbit.util.ISO8601; import org.jetbrains.annotations.NotNull; @@ -108,6 +111,8 @@ public class IndexUpdate implements Editor, PathSource { } } + private final NodeStore store; + private final IndexUpdateRootState rootState; private final NodeBuilder builder; @@ -150,6 +155,16 @@ public IndexUpdate( NodeState root, NodeBuilder builder, IndexUpdateCallback updateCallback, NodeTraversalCallback traversalCallback, CommitInfo commitInfo, CorruptIndexHandler corruptIndexHandler) { + this(provider, async, root, builder, updateCallback, traversalCallback, commitInfo, corruptIndexHandler, null); + } + + public IndexUpdate( + IndexEditorProvider provider, String async, + NodeState root, NodeBuilder builder, + IndexUpdateCallback updateCallback, NodeTraversalCallback traversalCallback, + CommitInfo commitInfo, CorruptIndexHandler corruptIndexHandler, + @Nullable NodeStore store) { + this.store = store; this.parent = null; this.name = null; this.path = "/"; @@ -158,6 +173,7 @@ public IndexUpdate( } private IndexUpdate(IndexUpdate parent, String name) { + this.store = parent.store; this.parent = requireNonNull(parent); this.name = name; this.rootState = parent.rootState; @@ -279,6 +295,12 @@ private static boolean hasAnyHiddenNodes(NodeBuilder builder) { } private void collectIndexEditors(NodeBuilder definitions, NodeState before) throws CommitFailedException { + if (definitions.hasChildNode(DiffIndexMerger.DIFF_INDEX) + && "disabled".equals(definitions.child(DiffIndexMerger.DIFF_INDEX).getString("type"))) { + if (rootState.async == null || rootState.async.equals("async")) { + DiffIndex.createNewIndexesIfNeeded(store, definitions); + } + } for (String name : definitions.getChildNodeNames()) { NodeBuilder definition = definitions.getChildNode(name); if (isIncluded(rootState.async, definition)) { diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/DiffIndex.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/DiffIndex.java new file mode 100644 index 00000000000..dbcb294d53c --- /dev/null +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/DiffIndex.java @@ -0,0 +1,214 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.diff; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Comparator; + +import org.apache.jackrabbit.oak.api.PropertyState; +import org.apache.jackrabbit.oak.api.Type; +import org.apache.jackrabbit.oak.commons.PathUtils; +import org.apache.jackrabbit.oak.commons.json.JsonObject; +import org.apache.jackrabbit.oak.plugins.index.IndexConstants; +import org.apache.jackrabbit.oak.plugins.index.IndexName; +import org.apache.jackrabbit.oak.plugins.tree.TreeConstants; +import org.apache.jackrabbit.oak.spi.state.NodeBuilder; +import org.apache.jackrabbit.oak.spi.state.NodeStore; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class DiffIndex { + + private static final Logger LOG = LoggerFactory.getLogger(DiffIndex.class); + + public static void createNewIndexesIfNeeded(NodeStore store, NodeBuilder indexDefinitions) { + JsonObject newImageLuceneDefinitions = null; + for (String diffIndex : new String[] { DiffIndexMerger.DIFF_INDEX, DiffIndexMerger.DIFF_INDEX_OPTIMIZER }) { + if (!indexDefinitions.hasChildNode(diffIndex)) { + continue; + } + NodeBuilder diffIndexDefinition = indexDefinitions.child(diffIndex); + NodeBuilder diffJson = diffIndexDefinition.getChildNode("diff.json"); + if (!diffJson.exists()) { + continue; + } + NodeBuilder jcrContent = diffJson.getChildNode("jcr:content"); + if (!jcrContent.exists()) { + continue; + } + PropertyState lastMod = jcrContent.getProperty("jcr:lastModified"); + if (lastMod == null) { + continue; + } + String modified = lastMod.getValue(Type.DATE); + PropertyState lastProcessed = jcrContent.getProperty(":lastProcessed"); + if (lastProcessed != null) { + if (modified.equals(lastProcessed.getValue(Type.STRING))) { + // already processed + continue; + } + } + // store now, so a change is only processed once + jcrContent.setProperty(":lastProcessed", modified); + PropertyState jcrData = jcrContent.getProperty("jcr:data"); + String diff = readString(jcrData); + if (diff == null) { + continue; + } + try { + JsonObject diffObj = JsonObject.fromJson("{\"diff\": " + diff + "}", true); + diffIndexDefinition.removeProperty("error"); + if (newImageLuceneDefinitions == null) { + newImageLuceneDefinitions = new JsonObject(); + } + newImageLuceneDefinitions.getChildren().put("/oak:index/" + diffIndex, diffObj); + } catch (Exception e) { + String message = "Error parsing diff.index"; + LOG.warn(message + ": {}", e.getMessage(), e); + diffIndexDefinition.setProperty("error", message + ": " + e.getMessage()); + } + } + if (newImageLuceneDefinitions == null) { + // not a valid diff index, or already processed + return; + } + LOG.info("Processing a new diff.index with node store {}", store); + JsonObject repositoryDefinitions = RootIndexesListService.getRootIndexDefinitions(indexDefinitions); + LOG.debug("Index list {}", repositoryDefinitions.toString()); + try { + DiffIndexMerger.merge(newImageLuceneDefinitions, repositoryDefinitions, store); + for (String m : newImageLuceneDefinitions.getChildren().keySet()) { + if (m.startsWith("/oak:index/" + DiffIndexMerger.DIFF_INDEX)) { + continue; + } + JsonObject newDef = newImageLuceneDefinitions.getChildren().get(m); + String indexNodeName = PathUtils.getName(m); + JsonNodeBuilder.addOrReplace(indexDefinitions, store, indexNodeName, IndexConstants.INDEX_DEFINITIONS_NODE_TYPE, newDef.toString()); + updateNodetypeIndexForPath(indexDefinitions, indexNodeName, true); + disableOrRemoveOldVersions(indexDefinitions, m, indexNodeName); + } + removeDisabledMergedIndexes(indexDefinitions); + sortIndexes(indexDefinitions); + } catch (Exception e) { + LOG.warn("Error merging diff.index: {}", e.getMessage(), e); + NodeBuilder diffIndexDefinition = indexDefinitions.child(DiffIndexMerger.DIFF_INDEX); + diffIndexDefinition.setProperty("error", e.getMessage()); + } + } + + public static String readString(PropertyState jcrData) { + InputStream in = jcrData.getValue(Type.BINARY).getNewStream(); + try { + return new String(in.readAllBytes(), StandardCharsets.UTF_8); + } catch (IOException e) { + return null; + } + } + + private static void sortIndexes(NodeBuilder builder) { + ArrayList list = new ArrayList<>(); + for (String child : builder.getChildNodeNames()) { + list.add(child); + } + list.sort(Comparator.naturalOrder()); + builder.setProperty(TreeConstants.OAK_CHILD_ORDER, list, Type.NAMES); + } + + private static void removeDisabledMergedIndexes(NodeBuilder definitions) { + ArrayList toRemove = new ArrayList<>(); + for (String child : definitions.getChildNodeNames()) { + if (!definitions.getChildNode(child).hasProperty("mergeChecksum")) { + continue; + } + if ("disabled".equals(definitions.getChildNode(child).getString("type"))) { + toRemove.add(child); + } + } + for (String r : toRemove) { + LOG.info("Removing disabled index " + r); + definitions.child(r).remove(); + updateNodetypeIndexForPath(definitions, r, false); + } + } + + private static void disableOrRemoveOldVersions(NodeBuilder definitions, String m, String except) { + if (m.startsWith("/oak:index/")) { + m = m.substring("/oak:index/".length()); + } + IndexName name = IndexName.parse(m); + ArrayList toRemove = new ArrayList<>(); + for (String child : definitions.getChildNodeNames()) { + if (child.indexOf("-custom-") < 0) { + // not a customized or custom index + continue; + } + if (child.equals(except)) { + continue; + } + IndexName n2 = IndexName.parse(child); + if (name.getBaseName().equals(n2.getBaseName())) { + if (m.equals(child)) { + if (!"disabled".equals(definitions.getChildNode(m).getString("type"))) { + continue; + } + } + toRemove.add(child); + } + } + for (String r : toRemove) { + LOG.info("Removing old index " + r); + definitions.child(r).remove(); + updateNodetypeIndexForPath(definitions, r, false); + } + } + + private static void updateNodetypeIndexForPath(NodeBuilder indexDefinitions, + String indexName, boolean add) { + LOG.info("nodetype index update add={} name={}", add, indexName); + if (!indexDefinitions.hasChildNode("nodetype")) { + return; + } + NodeBuilder nodetypeIndex = indexDefinitions.getChildNode("nodetype"); + NodeBuilder indexContent = nodetypeIndex.child(":index"); + String key = URLEncoder.encode("oak:QueryIndexDefinition", StandardCharsets.UTF_8); + String path = "/oak:index/" + indexName; + if (add) { + // insert entry + NodeBuilder builder = indexContent.child(key); + for (String name : PathUtils.elements(path)) { + builder = builder.child(name); + } + LOG.info("nodetype index match"); + builder.setProperty("match", true); + } else { + // remove entry (for deleted indexes) + NodeBuilder builder = indexContent.getChildNode(key); + for (String name : PathUtils.elements(path)) { + builder = builder.getChildNode(name); + } + if (builder.exists()) { + LOG.info("nodetype index remove"); + builder.removeProperty("match"); + } + } + } + +} diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/DiffIndexMerger.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/DiffIndexMerger.java new file mode 100644 index 00000000000..1b14c242863 --- /dev/null +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/DiffIndexMerger.java @@ -0,0 +1,757 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.diff; + +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Map.Entry; +import java.util.TreeMap; + +import org.apache.jackrabbit.oak.commons.StringUtils; +import org.apache.jackrabbit.oak.commons.json.JsonObject; +import org.apache.jackrabbit.oak.commons.json.JsopBuilder; +import org.apache.jackrabbit.oak.commons.json.JsopTokenizer; +import org.apache.jackrabbit.oak.json.Base64BlobSerializer; +import org.apache.jackrabbit.oak.json.JsonSerializer; +import org.apache.jackrabbit.oak.plugins.index.IndexName; +import org.apache.jackrabbit.oak.spi.state.NodeBuilder; +import org.apache.jackrabbit.oak.spi.state.NodeState; +import org.apache.jackrabbit.oak.spi.state.NodeStateUtils; +import org.apache.jackrabbit.oak.spi.state.NodeStore; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Index definition merge utility that uses the "diff" mode. + */ +public class DiffIndexMerger { + + final static Logger LOG = LoggerFactory.getLogger(DiffIndexMerger.class); + + public final static String DIFF_INDEX = "diff.index"; + public final static String DIFF_INDEX_OPTIMIZER = "diff.index.optimizer"; + + private final static String MERGE_INFO = "This index was auto-merged. See also https://thomasmueller.github.io/oakTools/simplified.html"; + + // the list of unsupported included paths, e.g. "/apps,/libs" + // by default all paths are supported + private final static String[] UNSUPPORTED_INCLUDED_PATHS = System.getProperty("oak.diffIndex.unsupportedPaths", "").split(","); + + // in case a custom index is removed, whether a dummy index is created + private final static boolean DELETE_CREATES_DUMMY = Boolean.getBoolean("oak.diffIndex.deleteCreatesDummy"); + + // in case a customization was removed, create a copy of the OOTB index + private final static boolean DELETE_COPIES_OOTB = Boolean.getBoolean("oak.diffIndex.deleteCopiesOOTB"); + + /** + * If there is a diff index, that is an index with prefix "diff.", then try to merge it. + * + * @param newImageLuceneDefinitions + * the new indexes + * (input and output) + * @param repositoryDefinitions + * the indexes in the writable repository + * (input) + * @param repositoryNodeStore + */ + public static void merge(JsonObject newImageLuceneDefinitions, JsonObject repositoryDefinitions, NodeStore repositoryNodeStore) { + // combine all definitions into one object + JsonObject combined = new JsonObject(); + + // index definitions in the repository + combined.getChildren().putAll(repositoryDefinitions.getChildren()); + + // read the diff.index explicitly, + // because it's a not a regular index definition, + // and so in the repositoryDefinitions + if (repositoryNodeStore != null) { + Map diffInRepo = readDiffIndex(repositoryNodeStore, DIFF_INDEX_OPTIMIZER); + combined.getChildren().putAll(diffInRepo); + } + + // overwrite with the provided definitions (if any) + combined.getChildren().putAll(newImageLuceneDefinitions.getChildren()); + + // check if there "diff.index" or "diff.index.optimizer" + boolean found = combined.getChildren().containsKey("/oak:index/" + DIFF_INDEX) + || combined.getChildren().containsKey("/oak:index/" + DIFF_INDEX_OPTIMIZER); + if (!found) { + // early exit, so that the risk of merging the PR + // is very small for customers that do not use this + LOG.debug("No 'diff.index' definition"); + return; + } + mergeDiff(newImageLuceneDefinitions, combined); + } + + /** + * If there is a diff index (hardcoded node "/oak:index/diff.index" or + * "/oak:index/diff.index.optimizer"), then iterate over all entries and create new + * (merged) versions if needed. + * + * @param newImageLuceneDefinitions + * the new Lucene definitions + * (input + output) + * @param combined + * the definitions in the repository, + * including the one in the customer repo and new ones + * (input) + * @return whether a new version of an index was added + */ + static boolean mergeDiff(JsonObject newImageLuceneDefinitions, JsonObject combined) { + // iterate again, this time process + + // collect the diff index(es) + HashMap toProcess = new HashMap<>(); + tryExtractDiffIndex(combined, "/oak:index/" + DIFF_INDEX, toProcess); + tryExtractDiffIndex(combined, "/oak:index/" + DIFF_INDEX_OPTIMIZER, toProcess); + // if the diff index exists, but doesn't contain some of the previous indexes + // (indexes with mergeInfo), then we need to disable those (using /dummy includedPath) + extractExistingMergedIndexes(combined, toProcess); + if (toProcess.isEmpty()) { + LOG.debug("No diff index definitions found."); + return false; + } + boolean hasChanges = false; + for (Entry e : toProcess.entrySet()) { + String key = e.getKey(); + JsonObject value = e.getValue(); + if (key.startsWith("/oak:index/")) { + LOG.warn("The key should contains just the index name, without the '/oak:index' prefix for key {}", key); + key = key.substring("/oak:index/".length()); + } + LOG.debug("Processing {}", key); + hasChanges |= processMerge(key, value, newImageLuceneDefinitions, combined); + } + return hasChanges; + } + + /** + * Extract a "diff.index" from the set of index definitions (if found), and if + * found, store the nested entries in the target map, merging them with previous + * entries if found. + * + * The diff.index may either have a file (a "jcr:content" child node with a + * "jcr:data" property), or a "diff" JSON object. For customers (in the git + * repository), the file is much easier to construct, but when running the + * indexing job, the nested JSON is much easier. + * + * @param indexDefs the set of index definitions (may be empty) + * @param name the name of the diff.index (either diff.index or + * diff.index.optimizer) + * @param target the target map of diff.index definitions + * @return the error message trying to parse the JSON file, or null + */ + static String tryExtractDiffIndex(JsonObject indexDefs, String name, HashMap target) { + JsonObject diffIndex = indexDefs.getChildren().get(name); + if (diffIndex == null) { + return null; + } + // extract either the file, or the nested json + JsonObject file = diffIndex.getChildren().get("diff.json"); + JsonObject diff; + if (file != null) { + // file + JsonObject jcrContent = file.getChildren().get("jcr:content"); + if (jcrContent == null) { + String message = "jcr:content child node is missing in diff.json"; + LOG.warn(message); + return message; + } + String jcrData = JsonNodeBuilder.oakStringValue(jcrContent, "jcr:data"); + try { + diff = JsonObject.fromJson(jcrData, true); + } catch (Exception e) { + LOG.warn("Illegal Json, ignoring: {}", jcrData, e); + String message = "Illegal Json, ignoring: " + e.getMessage(); + return message; + } + } else { + // nested json + diff = diffIndex.getChildren().get("diff"); + } + // store, if not empty + if (diff != null) { + for (Entry e : diff.getChildren().entrySet()) { + String key = e.getKey(); + target.put(key, mergeDiffs(target.get(key), e.getValue())); + } + } + return null; + } + + /** + * Extract the indexes with a "mergeInfo" property and store them in the target + * object. This is needed so that indexes that were removed from the index.diff + * are detected (a new version is needed in this case with includedPaths + * "/dummy"). + * + * @param indexDefs the index definitions in the repository + * @param target the target map of "diff.index" definitions. for each entry + * found, an empty object is added + */ + private static void extractExistingMergedIndexes(JsonObject indexDefs, HashMap target) { + for (Entry e : indexDefs.getChildren().entrySet()) { + String key = e.getKey(); + JsonObject value = e.getValue(); + if (key.indexOf("-custom-") < 0 || !value.getProperties().containsKey("mergeInfo")) { + continue; + } + String baseName = IndexName.parse(key.substring("/oak:index/".length())).getBaseName(); + if (!target.containsKey(baseName)) { + // if there is no entry yet for this key, + // add a new empty object + target.put(baseName, new JsonObject()); + } + } + } + + /** + * Merge diff from "diff.index" and "diff.index.optimizer". + * The customer can define a diff (stored in "diff.index") + * and someone else (or the optimizer) can define one (stored in "diff.index.optimizer"). + * + * @param a the first diff + * @param b the second diff (overwrites entries in a) + * @return the merged entry + */ + private static JsonObject mergeDiffs(JsonObject a, JsonObject b) { + if (a == null) { + return b; + } else if (b == null) { + return a; + } + JsonObject result = JsonObject.fromJson(a.toString(), true); + result.getProperties().putAll(b.getProperties()); + HashSet both = new HashSet<>(a.getChildren().keySet()); + both.addAll(b.getChildren().keySet()); + for (String k : both) { + result.getChildren().put(k, mergeDiffs(a.getChildren().get(k), b.getChildren().get(k))); + } + return result; + } + + /** + * Merge using the diff definition. + * + * If the latest customized index already matches, then + * newImageLuceneDefinitions will remain as is. Otherwise, a new customized + * index is added, with a "mergeInfo" property. + * + * Existing properties are never changed; only new properties/children are + * added. + * + * @param indexName the name, eg. "damAssetLucene" + * @param indexDiff the diff with the new properties + * @param newImageLuceneDefinitions the new Lucene definitions (input + output) + * @param combined the definitions in the repository, including + * the one in the customer repo and new ones + * (input) + * @return whether a new version of an index was added + */ + public static boolean processMerge(String indexName, JsonObject indexDiff, JsonObject newImageLuceneDefinitions, JsonObject combined) { + // extract the latest product index (eg. damAssetLucene-12) + // and customized index (eg. damAssetLucene-12-custom-3) - if any + IndexName latestProduct = null; + String latestProductKey = null; + IndexName latestCustomized = null; + String latestCustomizedKey = null; + String prefix = "/oak:index/"; + for (String key : combined.getChildren().keySet()) { + IndexName name = IndexName.parse(key.substring(prefix.length())); + if (!name.isVersioned()) { + LOG.debug("Ignoring unversioned index {}", name); + continue; + } + if (!name.getBaseName().equals(indexName)) { + continue; + } + boolean isCustom = key.indexOf("-custom-") >= 0; + if (isCustom) { + if (latestCustomized == null || + name.compareTo(latestCustomized) > 0) { + latestCustomized = name; + latestCustomizedKey = key; + } + } else { + if (latestProduct == null || + name.compareTo(latestProduct) > 0) { + latestProduct = name; + latestProductKey = key; + } + } + } + LOG.debug("Latest product: {}", latestProductKey); + LOG.debug("Latest customized: {}", latestCustomizedKey); + if (latestProduct == null) { + if (indexName.indexOf('.') >= 0) { + // a fully custom index needs to contains a dot + LOG.debug("Fully custom index {}", indexName); + } else { + LOG.debug("No product version for {}", indexName); + return false; + } + } + JsonObject latestProductIndex = combined.getChildren().get(latestProductKey); + String[] includedPaths; + if (latestProductIndex == null) { + if (indexDiff.getProperties().isEmpty() && indexDiff.getChildren().isEmpty()) { + // there is no customization (any more), which means a dummy index may be needed + LOG.debug("No customization for {}", indexName); + } else { + includedPaths = JsonNodeBuilder.oakStringArrayValue(indexDiff, "includedPaths"); + if (includesUnsupportedPaths(includedPaths)) { + LOG.warn("New custom index {} is not supported because it contains an unsupported path ({})", + indexName, Arrays.toString(UNSUPPORTED_INCLUDED_PATHS)); + return false; + } + } + } else { + includedPaths = JsonNodeBuilder.oakStringArrayValue(latestProductIndex, "includedPaths"); + if (includesUnsupportedPaths(includedPaths)) { + LOG.warn("Customizing index {} is not supported because it contains an unsupported path ({})", + latestProductKey, Arrays.toString(UNSUPPORTED_INCLUDED_PATHS)); + return false; + } + } + + // merge + JsonObject merged = null; + if (indexDiff == null) { + // no diff definition: use to the OOTB index + if (latestCustomized == null) { + LOG.debug("Only a product index found, nothing to do"); + return false; + } + merged = latestProductIndex; + } else { + merged = processMerge(latestProductIndex, indexDiff); + } + + // compare to the latest version of the this index + JsonObject latestIndexVersion = new JsonObject(); + if (latestCustomized == null) { + latestIndexVersion = latestProductIndex; + } else { + latestIndexVersion = combined.getChildren().get(latestCustomizedKey); + } + JsonObject mergedDef = cleanedAndNormalized(switchToLucene(merged)); + // compute merge checksum for later, but do not yet add + String mergeChecksum = computeMergeChecksum(mergedDef); + // get the merge checksum before cleaning (cleaning removes it) - if available + String key; + if (latestIndexVersion == null) { + // new index + key = prefix + indexName + "-1-custom-1"; + } else { + String latestMergeChecksum = JsonNodeBuilder.oakStringValue(latestIndexVersion, "mergeChecksum"); + JsonObject latestDef = cleanedAndNormalized(switchToLucene(latestIndexVersion)); + if (isSameIgnorePropertyOrder(mergedDef, latestDef)) { + // normal case: no change + // (even if checksums do not match: checksums might be missing or manipulated) + LOG.debug("Latest index matches"); + if (latestMergeChecksum != null && !latestMergeChecksum.equals(mergeChecksum)) { + LOG.warn("Indexes do match, but checksums do not. Possibly checksum was changed: {} vs {}", latestMergeChecksum, mergeChecksum); + LOG.warn("latest: {}\nmerged: {}", latestDef, mergedDef); + } + return false; + } + if (latestMergeChecksum != null && latestMergeChecksum.equals(mergeChecksum)) { + // checksum matches, but data does not match + // could be eg. due to numbers formatting issues (-0.0 vs 0.0, 0.001 vs 1e-3) + // but unexpected because we do not normally have such cases + LOG.warn("Indexes do not match, but checksums match. Possible normalization issue."); + LOG.warn("Index: {}, latest: {}\nmerged: {}", indexName, latestDef, mergedDef); + // if checksums match, we consider it a match + return false; + } + LOG.info("Indexes do not match, with"); + LOG.info("Index: {}, latest: {}\nmerged: {}", indexName, latestDef, mergedDef); + // a new merged index definition + if (latestProduct == null) { + // fully custom index: increment version + key = prefix + indexName + + "-" + latestCustomized.getProductVersion() + + "-custom-" + (latestCustomized.getCustomerVersion() + 1); + } else { + // customized OOTB index: use the latest product as the base + key = prefix + indexName + + "-" + latestProduct.getProductVersion() + + "-custom-"; + if (latestCustomized != null) { + key += (latestCustomized.getCustomerVersion() + 1); + } else { + key += "1"; + } + } + } + merged.getProperties().put("mergeInfo", JsopBuilder.encode(MERGE_INFO)); + merged.getProperties().put("mergeChecksum", JsopBuilder.encode(mergeChecksum)); + merged.getProperties().put("merges", "[" + JsopBuilder.encode("/oak:index/" + indexName) + "]"); + merged.getProperties().remove("reindexCount"); + merged.getProperties().remove("reindex"); + if (!DELETE_COPIES_OOTB && indexDiff.toString().equals("{}")) { + merged.getProperties().put("type", "\"disabled\""); + merged.getProperties().put("mergeComment", "\"This index is superseeded and can be removed\""); + } + newImageLuceneDefinitions.getChildren().put(key, merged); + return true; + } + + /** + * Check whether the includedPaths covers unsupported paths, + * if there are any unsupported path (eg. "/apps" or "/libs"). + * + * @param includedPaths the includedPaths list + * @return true if any unsupported path is included + */ + public static boolean includesUnsupportedPaths(String[] includedPaths) { + if (UNSUPPORTED_INCLUDED_PATHS.length == 1 && "".equals(UNSUPPORTED_INCLUDED_PATHS[0])) { + // set to an empty string + return false; + } + if (includedPaths == null) { + // not set means all entries + return true; + } + for (String path : includedPaths) { + if ("/".equals(path)) { + // all + return true; + } + for (String unsupported : UNSUPPORTED_INCLUDED_PATHS) { + if (unsupported.isEmpty()) { + continue; + } + if (path.equals(unsupported) || path.startsWith(unsupported + "/")) { + // includedPaths matches, or starts with an unsupported path + return true; + } + } + } + return false; + } + + /** + * Compute the SHA-256 checksum of the JSON object. This is useful to detect + * that the JSON object was not "significantly" changed, even if stored + * somewhere and later read again. Insignificant changes include: rounding of + * floating point numbers, re-ordering properties, things like that. Without the + * checksum, we would risk creating a new version of a customized index each + * time the indexing job is run, even thought the customer didn't change + * anything. + * + * @param json the input + * @return the SHA-256 checksum + */ + private static String computeMergeChecksum(JsonObject json) { + byte[] bytes = json.toString().getBytes(StandardCharsets.UTF_8); + try { + MessageDigest md = MessageDigest.getInstance("SHA-256"); + return StringUtils.convertBytesToHex(md.digest(bytes)); + } catch (NoSuchAlgorithmException e) { + // SHA-256 is guaranteed to be available in standard Java platforms + throw new RuntimeException("SHA-256 algorithm not available", e); + } + } + + /** + * Switch the index from type "elasticsearch" to "lucene", if needed. This will + * also replace all properties that have an "...@lucene" version. + * + * This is needed because we want to merge only the "lucene" version, to + * simplify the merging logic. (The switch to the "elasticsearch" version + * happens later). + * + * @param indexDef the index definition (is not changed by this method) + * @return the lucene version (a new JSON object) + */ + public static JsonObject switchToLucene(JsonObject indexDef) { + JsonObject obj = JsonObject.fromJson(indexDef.toString(), true); + String type = JsonNodeBuilder.oakStringValue(obj, "type"); + if (type == null || !"elasticsearch".equals(type) ) { + return obj; + } + switchToLuceneChildren(obj); + return obj; + } + + private static void switchToLuceneChildren(JsonObject indexDef) { + // clone the keys to avoid ConcurrentModificationException + for (String p : new ArrayList<>(indexDef.getProperties().keySet())) { + if (!p.endsWith("@lucene")) { + continue; + } + String v = indexDef.getProperties().remove(p); + indexDef.getProperties().put(p.substring(0, p.length() - "@lucene".length()), v); + } + for (String c : indexDef.getChildren().keySet()) { + JsonObject co = indexDef.getChildren().get(c); + switchToLuceneChildren(co); + } + } + + /** + * Convert the JSON object to a new object, where index definition + * properties that are unimportant for comparison are removed. + * Example of important properties are "reindex", "refresh", "seed" etc. + * + * @param obj the input (is not changed by the method) + * @return a new JSON object + */ + public static JsonObject cleanedAndNormalized(JsonObject obj) { + obj = JsonObject.fromJson(obj.toString(), true); + obj.getProperties().remove(":version"); + obj.getProperties().remove(":nameSeed"); + obj.getProperties().remove(":mappingVersion"); + obj.getProperties().remove("refresh"); + obj.getProperties().remove("reindexCount"); + obj.getProperties().remove("reindex"); + obj.getProperties().remove("seed"); + obj.getProperties().remove("merges"); + obj.getProperties().remove("mergeInfo"); + obj.getProperties().remove("mergeChecksum"); + for (String p : new ArrayList<>(obj.getProperties().keySet())) { + if (p.endsWith("@lucene")) { + obj.getProperties().remove(p); + } else if (p.endsWith("@elasticsearch")) { + obj.getProperties().remove(p); + } else { + // remove "str:", "nam:", etc if needed + String v = obj.getProperties().get(p); + String v2 = normalizeOakString(v); + if (!v2.equals(v)) { + obj.getProperties().put(p, v2); + } + } + } + removeUUIDs(obj); + for (Entry e : obj.getChildren().entrySet()) { + obj.getChildren().put(e.getKey(), cleanedAndNormalized(e.getValue())); + } + // re-build the properties in alphabetical order + // (sorting the child nodes would be incorrect however, as order is significant here) + TreeMap props = new TreeMap<>(obj.getProperties()); + obj.getProperties().clear(); + for (Entry e : props.entrySet()) { + obj.getProperties().put(e.getKey(), e.getValue()); + } + return obj; + } + + /** + * "Normalize" a JSON string value. Remove any "nam:" and "dat:" and "str:" + * prefix in the value, because customers won't use them normally. (We want the + * diff to be as simple as possible). + * + * @param value the value (including double quotes; eg. "str:value") + * @return the normalized value (including double quotes) + */ + private static String normalizeOakString(String value) { + if (value == null || !value.startsWith("\"")) { + // ignore numbers + return value; + } + value = JsopTokenizer.decodeQuoted(value); + if (value.startsWith("str:") || value.startsWith("nam:") || value.startsWith("dat:")) { + value = value.substring("str:".length()); + } + return JsopBuilder.encode(value); + } + + /** + * Remove all "jcr:uuid" properties (including those in children), because the + * values might conflict. (new uuids are added later when needed). + * + * @param obj the JSON object where uuids will be removed. + */ + private static void removeUUIDs(JsonObject obj) { + obj.getProperties().remove("jcr:uuid"); + for (JsonObject c : obj.getChildren().values()) { + removeUUIDs(c); + } + } + + /** + * Merge a product index with a diff. If the product index is null, then the + * diff needs to contain a complete custom index definition. + * + * @param productIndex the product index definition, or null if none + * @param diff the diff (from the diff.index definition) + * @return the index definition of the merged index + */ + private static JsonObject processMerge(JsonObject productIndex, JsonObject diff) { + JsonObject result; + if (productIndex == null) { + // fully custom index + result = new JsonObject(true); + } else { + result = JsonObject.fromJson(productIndex.toString(), true); + } + mergeInto("", diff, result); + addPrimaryType("", result); + return result; + } + + /** + * Add primary type properties where needed. For the top-level index definition, + * this is "oak:QueryIndexDefinition", and "nt:unstructured" elsewhere. + * + * @param path the path (so we can call the method recursively) + * @param json the JSON object (is changed if needed) + */ + private static void addPrimaryType(String path, JsonObject json) { + // all nodes need to have a node type; + // the index definition itself (at root level) is "oak:QueryIndexDefinition", + // and all other nodes are "nt:unstructured" + if (!json.getProperties().containsKey("jcr:primaryType")) { + // all nodes need to have a primary type, + // otherwise index import will fail + String nodeType; + if (path.isEmpty()) { + nodeType = "oak:QueryIndexDefinition"; + } else { + nodeType = "nt:unstructured"; + } + String nodeTypeValue = "nam:" + nodeType; + json.getProperties().put("jcr:primaryType", JsopBuilder.encode(nodeTypeValue)); + } + for (Entry e : json.getChildren().entrySet()) { + addPrimaryType(path + "/" + e.getKey(), e.getValue()); + } + } + + /** + * Merge a JSON diff into a target index definition. + * + * @param path the path + * @param diff the diff (what to merge) + * @param target where to merge into + */ + private static void mergeInto(String path, JsonObject diff, JsonObject target) { + for (String p : diff.getProperties().keySet()) { + if (path.isEmpty()) { + if ("jcr:primaryType".equals(p)) { + continue; + } + } + if (target.getProperties().containsKey(p)) { + // we do not currently allow to overwrite existing properties + LOG.warn("Ignoring existing property {} at {}", p, path); + } else { + target.getProperties().put(p, diff.getProperties().get(p)); + } + } + for (String c : diff.getChildren().keySet()) { + if (!target.getChildren().containsKey(c)) { + // only create the child (properties are added below) + target.getChildren().put(c, new JsonObject()); + } + mergeInto(path + "/" + c, diff.getChildren().get(c), target.getChildren().get(c)); + } + if (target.getProperties().isEmpty() && target.getChildren().isEmpty()) { + if (DELETE_CREATES_DUMMY) { + // dummy index + target.getProperties().put("async", "\"async\""); + target.getProperties().put("includedPaths", "\"/dummy\""); + target.getProperties().put("queryPaths", "\"/dummy\""); + target.getProperties().put("type", "\"lucene\""); + JsopBuilder buff = new JsopBuilder(); + buff.object(). + key("properties").object(). + key("dummy").object(). + key("name").value("dummy"). + key("propertyIndex").value(true). + endObject(). + endObject(). + endObject(); + JsonObject indexRules = JsonObject.fromJson(buff.toString(), true); + target.getChildren().put("indexRules", indexRules); + } else { + target.getProperties().put("type", "\"disabled\""); + } + } + } + + /** + * Compare two JSON object, ignoring the order of properties. (The order of + * children is however significant). + * + * This is done in addition to the checksum comparison, because the in theory + * the customer might change the checksum (it is not read-only as read-only + * values are not supported). We do not rely on the comparison, but if comparison + * and checksum comparison do not match, we log a warning. + * + * @param a the first object + * @param b the second object + * @return true if the keys and values are equal + */ + public static boolean isSameIgnorePropertyOrder(JsonObject a, JsonObject b) { + if (!a.getChildren().keySet().equals(b.getChildren().keySet())) { + LOG.debug("Child (order) difference: {} vs {}", + a.getChildren().keySet(), b.getChildren().keySet()); + return false; + } + for (String k : a.getChildren().keySet()) { + if (!isSameIgnorePropertyOrder( + a.getChildren().get(k), b.getChildren().get(k))) { + return false; + } + } + TreeMap pa = new TreeMap<>(a.getProperties()); + TreeMap pb = new TreeMap<>(b.getProperties()); + if (!pa.toString().equals(pb.toString())) { + LOG.debug("Property value difference: {} vs {}", pa.toString(), pb.toString()); + } + return pa.toString().equals(pb.toString()); + } + + /** + * Read the "diff.index" from the repository, if it exists. + * This is needed because the build-transform job doesn't have this + * data: it is only available in the writeable repository. + * + * @param repositoryNodeStore the node store + * @return a map, possibly with a single entry with key /oak:index/diff.index + */ + static Map readDiffIndex(NodeStore repositoryNodeStore, String name) { + HashMap map = new HashMap<>(); + NodeState root = repositoryNodeStore.getRoot(); + String indexPath = "/oak:index/" + name; + NodeState idxState = NodeStateUtils.getNode(root, indexPath); + LOG.debug("Searching index {}: found={}", indexPath, idxState.exists()); + if (!idxState.exists()) { + return map; + } + JsopBuilder builder = new JsopBuilder(); + String filter = "{\"properties\":[\"*\", \"-:childOrder\"],\"nodes\":[\"*\", \"-:*\"]}"; + JsonSerializer serializer = new JsonSerializer(builder, filter, new Base64BlobSerializer()); + serializer.serialize(idxState); + JsonObject jsonObj = JsonObject.fromJson(builder.toString(), true); + jsonObj = cleanedAndNormalized(jsonObj); + LOG.debug("Found {}", jsonObj.toString()); + map.put(indexPath, jsonObj); + return map; + } + +} diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/JsonNodeBuilder.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/JsonNodeBuilder.java new file mode 100644 index 00000000000..5011474cdab --- /dev/null +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/JsonNodeBuilder.java @@ -0,0 +1,278 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.diff; + +import java.util.Map.Entry; +import java.util.TreeSet; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Base64; +import java.util.UUID; + +import org.apache.jackrabbit.oak.api.Blob; +import org.apache.jackrabbit.oak.api.CommitFailedException; +import org.apache.jackrabbit.oak.api.PropertyState; +import org.apache.jackrabbit.oak.api.Type; +import org.apache.jackrabbit.oak.commons.PathUtils; +import org.apache.jackrabbit.oak.commons.json.JsonObject; +import org.apache.jackrabbit.oak.commons.json.JsopReader; +import org.apache.jackrabbit.oak.commons.json.JsopTokenizer; +import org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore; +import org.apache.jackrabbit.oak.plugins.tree.TreeConstants; +import org.apache.jackrabbit.oak.spi.state.NodeBuilder; +import org.apache.jackrabbit.oak.spi.state.NodeStore; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A utility class to persist a configuration that is in the form of JSON into + * the node store. + * + * This is used to persist a small set of configuration nodes, eg. index + * definitions, using a simple JSON format. + * + * The node type does not need to be set on a per-node basis. Where it is + * missing, the provided node type is used (e.g. "nt:unstructured") + * + * A "jcr:uuid" is automatically added for nodes of type "nt:resource". + * + * String, string arrays, boolean, blob, long, and double values are supported. + * Values that start with ":blobId:...base64..." are stored as binaries. "str:", + * "nam:" and "dat:" prefixes are removed. + * + * "null" entries are not supported. + */ +public class JsonNodeBuilder { + + private static final Logger LOG = LoggerFactory.getLogger(JsonNodeBuilder.class); + + /** + * Add a replace a node, including all child nodes, in the node store. + * + * @param nodeStore the target node store + * @param targetPath the target path where the node(s) is/are replaced + * @param nodeType the node type of the new node (eg. "nt:unstructured") + * @param jsonString the json string with the node data + * @throws CommitFailedException if storing the nodes failed + * @throws IOException if storing a blob failed + */ + public static void addOrReplace(NodeBuilder builder, NodeStore nodeStore, String targetPath, String nodeType, String jsonString) throws CommitFailedException, IOException { + LOG.debug("Storing {}: {}", targetPath, jsonString); + if (nodeType.indexOf("/") >= 0) { + throw new IllegalStateException("Illegal node type: " + nodeType); + } + JsonObject json = JsonObject.fromJson(jsonString, true); + for (String name : PathUtils.elements(targetPath)) { + NodeBuilder child = builder.child(name); + if (!child.hasProperty("jcr:primaryType")) { + child.setProperty("jcr:primaryType", nodeType, Type.NAME); + } + builder = child; + } + storeConfigNode(nodeStore, builder, nodeType, json); + } + + private static void storeConfigNode(NodeStore nodeStore, NodeBuilder builder, String nodeType, JsonObject json) throws IOException { + ArrayList childOrder = new ArrayList<>(); + for (Entry e : json.getChildren().entrySet()) { + String k = e.getKey(); + childOrder.add(k); + JsonObject v = e.getValue(); + storeConfigNode(nodeStore, builder.child(k), nodeType, v); + } + for (String child : builder.getChildNodeNames()) { + if (!json.getChildren().containsKey(child)) { + builder.child(child).remove(); + } + } + for (Entry e : json.getProperties().entrySet()) { + String k = e.getKey(); + String v = e.getValue(); + storeConfigProperty(nodeStore, builder, k, v); + } + if (!json.getProperties().containsKey("jcr:primaryType")) { + builder.setProperty("jcr:primaryType", nodeType, Type.NAME); + } + for (PropertyState prop : builder.getProperties()) { + if ("jcr:primaryType".equals(prop.getName())) { + continue; + } + if (!json.getProperties().containsKey(prop.getName())) { + builder.removeProperty(prop.getName()); + } + } + builder.setProperty(TreeConstants.OAK_CHILD_ORDER, childOrder, Type.NAMES); + if ("nt:resource".equals(JsonNodeBuilder.oakStringValue(json, "jcr:primaryType"))) { + if (!json.getProperties().containsKey("jcr:uuid")) { + String uuid = UUID.randomUUID().toString(); + builder.setProperty("jcr:uuid", uuid); + } + } + } + + private static void storeConfigProperty(NodeStore nodeStore, NodeBuilder builder, String propertyName, String value) throws IOException { + if (value.startsWith("\"")) { + // string or blob + value = JsopTokenizer.decodeQuoted(value); + if (value.startsWith(":blobId:")) { + String base64 = value.substring(":blobId:".length()); + byte[] bytes = Base64.getDecoder().decode(base64.getBytes(StandardCharsets.UTF_8)); + if (nodeStore == null) { + MemoryNodeStore mns = new MemoryNodeStore(); + Blob blob = mns.createBlob(new ByteArrayInputStream(bytes)); + builder.setProperty(propertyName, blob); + } else { + Blob blob = nodeStore.createBlob(new ByteArrayInputStream(bytes)); + builder.setProperty(propertyName, blob); + } + } else { + if (value.startsWith("str:") || value.startsWith("nam:") || value.startsWith("dat:")) { + value = value.substring("str:".length()); + } + if ("jcr:primaryType".equals(propertyName)) { + builder.setProperty(propertyName, value, Type.NAME); + } else { + builder.setProperty(propertyName, value); + } + } + } else if ("null".equals(value)) { + throw new IllegalArgumentException("Removing entries is not supported for property " + propertyName); + } else if ("true".equals(value)) { + builder.setProperty(propertyName, true); + } else if ("false".equals(value)) { + builder.setProperty(propertyName, false); + } else if (value.startsWith("[")) { + JsopTokenizer tokenizer = new JsopTokenizer(value); + ArrayList result = new ArrayList<>(); + tokenizer.matches('['); + if (!tokenizer.matches(']')) { + do { + if (!tokenizer.matches(JsopReader.STRING)) { + throw new IllegalArgumentException("Could not process string array " + value + " for property " + propertyName); + } + result.add(tokenizer.getEscapedToken()); + } while (tokenizer.matches(',')); + tokenizer.read(']'); + } + tokenizer.read(JsopReader.END); + builder.setProperty(propertyName, result, Type.STRINGS); + } else if (value.indexOf('.') >= 0 || value.toLowerCase().indexOf("e") >= 0) { + // double + try { + Double d = Double.parseDouble(value); + builder.setProperty(propertyName, d); + } catch (NumberFormatException e) { + throw new IllegalArgumentException("Could not parse double " + value + " for property " + propertyName); + } + } else if (value.startsWith("-") || (!value.isEmpty() && Character.isDigit(value.charAt(0)))) { + // long + try { + Long x = Long.parseLong(value); + builder.setProperty(propertyName, x); + } catch (NumberFormatException e) { + throw new IllegalArgumentException("Could not parse long " + value + " for property " + propertyName); + } + } else { + throw new IllegalArgumentException("Unsupported value " + value + " for property " + propertyName); + } + } + + public static String oakStringValue(JsonObject json, String propertyName) { + String value = json.getProperties().get(propertyName); + if (value == null) { + return null; + } + return oakStringValue(value); + } + + static String oakStringValue(String value) { + if (!value.startsWith("\"")) { + // support numbers + return value; + } + value = JsopTokenizer.decodeQuoted(value); + if (value.startsWith(":blobId:")) { + value = value.substring(":blobId:".length()); + value = new String(Base64.getDecoder().decode(value.getBytes(StandardCharsets.UTF_8)), StandardCharsets.UTF_8); + } else if (value.startsWith("str:") || value.startsWith("nam:") || value.startsWith("dat:")) { + value = value.substring("str:".length()); + } + return value; + } + + /** + * Read an Oak string array. There are 3 cases: + * + * - the property doesn't exist: return null + * - the value is stored as string: return an array with one entry + * - the value is stored in an array: return the sorted list of value + * + * The value is sorted, because the order is insignificant in our case, + * and want ["a", "b"] = ["b", "a"] when comparing index definitions. + * + * @param json the JSON object + * @param propertyName the property to extract + * @return a string array or null + */ + public static String[] oakStringArrayValue(JsonObject json, String propertyName) { + String value = json.getProperties().get(propertyName); + if (value == null) { + return null; + } else if (value.startsWith("\"")) { + return new String[] { oakStringValue(value) }; + } else if (value.startsWith("[")) { + return JsonNodeBuilder.getStringSet(value).toArray(new String[0]); + } else { + LOG.warn("Unsupported value type: {}", value); + return null; + } + } + + public static TreeSet getStringSet(String value) { + if (value == null) { + return null; + } + try { + JsopTokenizer tokenizer = new JsopTokenizer(value); + TreeSet result = new TreeSet<>(); + if (tokenizer.matches(JsopReader.STRING)) { + result.add(tokenizer.getEscapedToken()); + return result; + } + if (!tokenizer.matches('[')) { + return null; + } + if (!tokenizer.matches(']')) { + do { + if (!tokenizer.matches(JsopReader.STRING)) { + // not a string + return null; + } + result.add(tokenizer.getEscapedToken()); + } while (tokenizer.matches(',')); + tokenizer.read(']'); + } + tokenizer.read(JsopReader.END); + return result; + } catch (IllegalArgumentException e) { + return null; + } + } + +} diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/RootIndexesListService.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/RootIndexesListService.java new file mode 100644 index 00000000000..806278f1540 --- /dev/null +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/RootIndexesListService.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.diff; + +import java.io.PrintWriter; +import java.io.StringWriter; +import java.util.ArrayList; + +import org.apache.felix.inventory.Format; +import org.apache.jackrabbit.oak.commons.json.JsonObject; +import org.apache.jackrabbit.oak.commons.json.JsopBuilder; +import org.apache.jackrabbit.oak.commons.json.JsopTokenizer; +import org.apache.jackrabbit.oak.json.Base64BlobSerializer; +import org.apache.jackrabbit.oak.json.JsonSerializer; +import org.apache.jackrabbit.oak.plugins.index.IndexConstants; +import org.apache.jackrabbit.oak.plugins.index.IndexPathService; +import org.apache.jackrabbit.oak.plugins.index.inventory.IndexDefinitionPrinter; +import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry; +import org.apache.jackrabbit.oak.spi.state.NodeBuilder; +import org.apache.jackrabbit.oak.spi.state.NodeState; +import org.apache.jackrabbit.oak.spi.state.NodeStore; +import org.jetbrains.annotations.Nullable; + +public class RootIndexesListService implements IndexPathService { + + private final NodeStore nodeStore; + + private RootIndexesListService(NodeStore nodeStore) { + this.nodeStore = nodeStore; + } + + public static JsonObject getRootIndexDefinitions(NodeBuilder definitions) { + JsopBuilder json = new JsopBuilder(); + String filter = "{\"properties\":[\"*\", \"-:childOrder\"],\"nodes\":[\"*\", \"-:*\"]}"; + json.object(); + for (String indexPath : definitions.getChildNodeNames()) { + NodeState node = definitions.child(indexPath).getNodeState(); + json.key("/oak:index/" + indexPath); + JsonSerializer s = new JsonSerializer(json, filter, new Base64BlobSerializer()); + s.serialize(node); + } + json.endObject(); + return JsonObject.fromJson(json.toString(), true); + } + + /** + * Get the index definitions at /oak:index from a node store. + * + * @param nodeStore the source node store (may not be null) + * @param typePattern the index types (may be null, meaning all) + * @return a JSON object with all index definitions + */ + public static JsonObject getRootIndexDefinitions(NodeStore nodeStore, @Nullable String typePattern) { + if (nodeStore == null) { + return new JsonObject(); + } + RootIndexesListService imageIndexPathService = new RootIndexesListService(nodeStore); + IndexDefinitionPrinter indexDefinitionPrinter = new IndexDefinitionPrinter(nodeStore, imageIndexPathService); + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + indexDefinitionPrinter.print(printWriter, Format.JSON, false); + printWriter.flush(); + writer.flush(); + String json = writer.toString(); + JsonObject result = JsonObject.fromJson(json, true); + if (typePattern != null) { + for (String c : new ArrayList<>(result.getChildren().keySet())) { + String type = result.getChildren().get(c).getProperties().get("type"); + if (type == null) { + continue; + } + type = JsopTokenizer.decodeQuoted(type); + if (type != null && !type.matches(typePattern)) { + result.getChildren().remove(c); + } + } + } + return result; + } + + @Override + public Iterable getIndexPaths() { + ArrayList list = new ArrayList<>(); + NodeState oakIndex = nodeStore.getRoot().getChildNode("oak:index"); + if (!oakIndex.exists()) { + return list; + } + for (ChildNodeEntry cn : oakIndex.getChildNodeEntries()) { + if (!IndexConstants.INDEX_DEFINITIONS_NODE_TYPE + .equals(cn.getNodeState().getName("jcr:primaryType"))) { + continue; + } + list.add("/oak:index/" + cn.getName()); + } + return list; + } + +} \ No newline at end of file diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/optimizer/DiffIndexUpdater.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/optimizer/DiffIndexUpdater.java new file mode 100644 index 00000000000..0d8536668ca --- /dev/null +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/optimizer/DiffIndexUpdater.java @@ -0,0 +1,274 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.optimizer; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.commons.lang3.ArrayUtils; +import org.apache.jackrabbit.JcrConstants; +import org.apache.jackrabbit.oak.api.Blob; +import org.apache.jackrabbit.oak.api.PropertyState; +import org.apache.jackrabbit.oak.api.Type; +import org.apache.jackrabbit.oak.commons.json.JsonObject; +import org.apache.jackrabbit.oak.commons.json.JsopBuilder; +import org.apache.jackrabbit.oak.plugins.index.IndexConstants; +import org.apache.jackrabbit.oak.plugins.index.diff.DiffIndex; +import org.apache.jackrabbit.oak.plugins.index.diff.JsonNodeBuilder; +import org.apache.jackrabbit.oak.plugins.index.diff.RootIndexesListService; +import org.apache.jackrabbit.oak.query.stats.QueryRecorder; +import org.apache.jackrabbit.oak.spi.filter.PathFilter; +import org.apache.jackrabbit.oak.spi.state.NodeBuilder; +import org.apache.jackrabbit.oak.spi.state.NodeState; +import org.apache.jackrabbit.oak.spi.state.NodeStore; +import org.apache.jackrabbit.util.ISO8601; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class DiffIndexUpdater { + + private static final Logger LOG = LoggerFactory.getLogger(DiffIndexUpdater.class); + + public static boolean applyIndexDefinition(NodeStore store, NodeState rootState, NodeBuilder builder, String jsonString, String statement) { + String simplifiedStatement = QueryRecorder.simplifySafely(statement); + LOG.info("indexDef {}", jsonString); + if (!jsonString.trim().startsWith("{")) { + return false; + } + NodeBuilder optimizer = builder.child("oak:index").child("diff.index.optimizer"); + optimizer.setProperty("jcr:primaryType", "oak:QueryIndexDefinition", Type.NAME); + optimizer.setProperty("type", "disabled", Type.STRING); + + JsonObject json = JsonObject.fromJson(jsonString, true); + PropertyState jcrData = rootState.getChildNode("oak:index").getChildNode("diff.index.optimizer").getChildNode("diff.json").getChildNode("jcr:content").getProperty("jcr:data"); + String old = "{}"; + if (jcrData != null) { + old = DiffIndex.readString(jcrData); + LOG.info("Old diff.index {}", old); + } + JsonObject jsonContent = JsonObject.fromJson(old, true); + JsonObject index = json.getChildren().get("index"); + if (!index.getProperties().containsKey("includedPaths")) { + index.getProperties().put("warningNoIncludedPaths", "\"Warning: the query doesn't have a path restriction. This is not recommended. Consider adding a path restriction such as '/content'.\""); + } + if (!index.getProperties().containsKey("tags")) { + index.getProperties().put("warningNoTag", "\"Warning: the query doesn't use a tag. Consider adding a tag using 'option(index tag xyz)' where 'xyz' is the name of the component of the application.\""); + } else { + index.getProperties().put("selectionPolicy", "\"tag\""); + } + index.getProperties().put("statement", JsopBuilder.encode(simplifiedStatement)); + // search in old indexes if we already optimized for this query + for (JsonObject existing : jsonContent.getChildren().values()) { + String oldStatement = existing.getProperties().get("statement"); + if (oldStatement != null && oldStatement.equals("\"" + simplifiedStatement + "\"")) { + return false; + } + } + Optional bestIndexName = findMatchingIndexName(store, json.toString()); + String prefix; + if (bestIndexName.isEmpty()) { + prefix = "auto.indexOptimizer"; + } else { + prefix = bestIndexName.get(); + } + if (prefix.startsWith("/oak:index/")) { + prefix = prefix.substring("/oak:index/".length()); + } + int dash = prefix.indexOf('-'); + if (dash >= 0) { + prefix = prefix.substring(0, dash); + } + // there might be multiple; if so, append a number + // (alternatively, we could try to merge) + int indexNumber = 0; + for (String existing : jsonContent.getChildren().keySet()) { + if (existing.startsWith(prefix)) { + String n = existing.substring(prefix.length()); + if (n.isEmpty()) { + indexNumber = 1; + } else { + try { + indexNumber = Math.max(indexNumber, Integer.parseInt(n) + 1); + } catch (NumberFormatException e) { + // ignore + } + } + } + } + String newIndexName = prefix + (indexNumber == 0 ? "" : "" + indexNumber); + jsonContent.getChildren().put(newIndexName, index); + String newJsonContent = jsonContent.toString(); + InputStream inputStream = new ByteArrayInputStream(newJsonContent.getBytes(StandardCharsets.UTF_8)); + try { + Blob blob = store.createBlob(inputStream); + NodeBuilder diffJson = optimizer.child("diff.json"); + diffJson.setProperty("jcr:primaryType", "nt:file", Type.NAME); + NodeBuilder diffJsonContent = diffJson.child("jcr:content"); + diffJsonContent.setProperty("jcr:primaryType", "nt:resource", Type.NAME); + diffJsonContent.setProperty("jcr:mimeType", "application/json"); + diffJsonContent.setProperty("jcr:lastModifiedBy", "Optimizer Service"); + diffJsonContent.setProperty("jcr:lastModified", ISO8601.format(Calendar.getInstance()), Type.DATE); + diffJsonContent.setProperty("jcr:encoding", "utf-8"); + diffJsonContent.setProperty("jcr:data", blob); + } catch (IOException e) { + LOG.warn("Error writing blob", e); + } + return true; + } + + private static Set getIncludedPathsForIndex(JsonObject index) { + Set includedPaths; + if (index.getProperties().containsKey(PathFilter.PROP_INCLUDED_PATHS)) { + String[] includedPathsArray = JsonNodeBuilder.oakStringArrayValue(index, PathFilter.PROP_INCLUDED_PATHS); + includedPaths = Set.of(ArrayUtils.nullToEmpty(includedPathsArray)); + } else { + includedPaths = Set.of(); + } + return includedPaths; + } + + /** + * Try to find an existing index that matches the node type, tag, and included paths of the provided index JSON. + * + * @param store node store + * @param jsonString index JSON + * @return name of matching index or Optional.empty() if not found + */ + public static Optional findMatchingIndexName(NodeStore store, String jsonString) { + Map indexes = RootIndexesListService.getRootIndexDefinitions(store, ".*").getChildren(); + JsonObject json = JsonObject.fromJson(jsonString, true); + JsonObject index = json.getChildren().get(FulltextIndexConstants.PROP_INDEX); + + Set nodeTypes = getNodeTypesForIndex(index); + Set includedPaths = getIncludedPathsForIndex(index); + Set tags = Set.of(ArrayUtils.nullToEmpty(JsonNodeBuilder.oakStringArrayValue(index, IndexConstants.INDEX_TAGS))); + LOG.info("nodeTypes: {}", nodeTypes); + LOG.info("includedPaths: {}", includedPaths); + LOG.info("tags: {}", tags); + + if (nodeTypes.contains("nt:base") && tags.isEmpty()) { + // do not recommend an index for nt:base, except if there is a tag + return Optional.empty(); + } + + List remaining = new ArrayList<>(); + for(Entry candidate : indexes.entrySet()) { + if (candidate.getKey().indexOf("-custom-") >= 0) { + // ignore custom indexes + continue; + } + JsonObject candidateIndex = candidate.getValue(); + + // check node types + if (!nodeTypes.isEmpty()) { + // check only one node type (most queries only have one) + String nodeType = nodeTypes.iterator().next(); + if (!getNodeTypesForIndex(candidateIndex).contains(nodeType)) { + // not a match + continue; + } + } + + // ignore indexes with excludedPaths + if (candidateIndex.getProperties().containsKey(PathFilter.PROP_EXCLUDED_PATHS)) { + continue; + } + + // check includedPaths + if (includedPaths.isEmpty()) { + if (!getIncludedPathsForIndex(candidateIndex).isEmpty()) { + // not a match + continue; + } + } else { + // check only one (the query can only have one path) + String firstIncludedPaths = includedPaths.iterator().next(); + boolean found = false; + // iterate over the includedPaths in the index + // if any of them is a prefix of this path, it's fine + for (String inc : getIncludedPathsForIndex(candidateIndex)) { + if (firstIncludedPaths.startsWith(inc)) { + found = true; + break; + } + } + if (!found) { + // not a match + continue; + } + } + + // check tag + if (tags.isEmpty()) { + // no tag: only consider without selection policy + String selectionPolicy = JsonNodeBuilder.oakStringValue(index, IndexConstants.INDEX_SELECTION_POLICY); + if (selectionPolicy != null) { + continue; + } + } else { + // a tag: check if the first one (there's almost always only one in the query) matches + String tag = tags.iterator().next(); + Set tags2 = Set.of(ArrayUtils.nullToEmpty(JsonNodeBuilder.oakStringArrayValue(index, IndexConstants.INDEX_TAGS))); + if (!tags2.contains(tag)) { + continue; + } + } + remaining.add(candidate.getKey()); + } + LOG.info("Candidate indexes: {}", remaining); + + + return remaining.stream().findFirst(); + } + + /** + * Get the node types defined in the index rules for the given index. + * + * @param index index JSON + * @return set of node types or empty set if no node types are defined in the index + */ + private static Set getNodeTypesForIndex(JsonObject index) { + Set nodeTypes; + if (index.getChildren().containsKey(FulltextIndexConstants.INDEX_RULES)) { + JsonObject indexRules = index.getChildren().get(FulltextIndexConstants.INDEX_RULES); + nodeTypes = indexRules.getChildren().keySet() + .stream() + .filter(name -> !name.equals(JcrConstants.JCR_PRIMARYTYPE)) + .collect(Collectors.toSet()); + } else { + if ("\"property\"".equals(index.getProperties().get("type"))) { + Set decl = JsonNodeBuilder.getStringSet(index.getProperties().get("declaringNodeTypes")); + LOG.info("Found property index with declaring node types: {}", decl); + return decl == null ? Set.of() : decl; + } + nodeTypes = Set.of(JcrConstants.NT_BASE); + } + return nodeTypes; + } + + +} diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/optimizer/FulltextIndexConstants.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/optimizer/FulltextIndexConstants.java new file mode 100644 index 00000000000..c16da622688 --- /dev/null +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/optimizer/FulltextIndexConstants.java @@ -0,0 +1,448 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.optimizer; + +import java.util.Locale; +import java.util.Map; + +/** + * Internal constants used in index definition, and index implementations. + */ +public interface FulltextIndexConstants { + + enum IndexingMode { + SYNC, NRT, ASYNC; + + public String asyncValueName() { + return name().toLowerCase(Locale.ENGLISH); + } + + public static IndexingMode from(String indexingMode) { + return valueOf(indexingMode.toUpperCase(Locale.ENGLISH)); + } + } + + String INDEX_DATA_CHILD_NAME = ":data"; + + /** + * include only certain property types in the index + */ + String INCLUDE_PROPERTY_TYPES = "includePropertyTypes"; + + /** + * exclude certain properties by name + */ + String EXCLUDE_PROPERTY_NAMES = "excludePropertyNames"; + + String PERSISTENCE_NAME = "persistence"; + + String PERSISTENCE_OAK = "repository"; + + String TEST_MODE = "testMode"; + + String PERSISTENCE_FILE = "file"; + + String PERSISTENCE_PATH = "path"; + + /** + * Experimental flag to control storage behavior: 'null' or 'true' means the content is stored + */ + String EXPERIMENTAL_STORAGE = "oak.experimental.storage"; + + /** + * Determines if full text indexing is enabled for this index definition. + * Default is true + */ + String FULL_TEXT_ENABLED = "fulltextEnabled"; + + /** + * Only include properties with name in this set. If this property is defined + * then {@code excludePropertyNames} would be ignored + */ + String INCLUDE_PROPERTY_NAMES = "includePropertyNames"; + + /** + * Type of the property being indexed defined as part of property definition + * under the given index definition. Refer to {@link javax.jcr.PropertyType} + * constants for the possible values + */ + String PROP_TYPE = "type"; + + /** + * Defines properties which would be used for ordering. If range queries are to + * be performed with same property then it must be part of include list also + */ + String ORDERED_PROP_NAMES = "orderedProps"; + + /** + * Size in bytes used for splitting the index files when storing them in NodeStore + */ + String BLOB_SIZE = "blobSize"; + + /** + * Native function name associated with this index definition. Any query can + * use this as the function name to ensure that this index gets used for invoking + * the index + */ + String FUNC_NAME = "function"; + + /** + * Child node name under which property details are provided + */ + String PROP_NODE = "properties"; + + String INDEX_RULES = "indexRules"; + + /** + * Field boost factor + */ + String FIELD_BOOST = "boost"; + + /** + * Property name defined explicitly. Mostly used in case of relative property names + */ + String PROP_NAME = "name"; + + String PROP_IS_REGEX = "isRegexp"; + + String PROP_INDEX = "index"; + + String PROP_USE_IN_EXCERPT = "useInExcerpt"; + + String EXCERPT_NODE_FIELD_NAME = "."; + + String PROP_NODE_SCOPE_INDEX = "nodeScopeIndex"; + + String PROP_PROPERTY_INDEX = "propertyIndex"; + + String PROP_ANALYZED = "analyzed"; + + String RULE_INHERITED = "inherited"; + + String PROP_ORDERED = "ordered"; + + String PROP_SCORER_PROVIDER = "scorerProviderName"; + + String PROP_WEIGHT = "weight"; + + String PROP_DYNAMIC_BOOST = "dynamicBoost"; + + float DYNAMIC_BOOST_WEIGHT = 0.0001f; + + /** + * Boolean property in property definition to mark sync properties + */ + String PROP_SYNC = "sync"; + + /** + * Boolean property in property definition to mark unique properties + */ + String PROP_UNIQUE = "unique"; + + + String EVALUATE_PATH_RESTRICTION = "evaluatePathRestrictions"; + + /** + * The property name to specify a regular expression for property value in index definition. If this property is present + * in index definition, then only those properties would be added to index whose value matches the regex defined by + * this property. + */ + String PROP_VALUE_REGEX = "valueRegex"; + + /** + * The property name to specify a regular expression for query text. If this property is present in an index definition, + * then those queries whose search text doesn't match this pattern but are still using the index will log a warning. + * If this property is not specified, but {@link #PROP_VALUE_REGEX} is specified, that property is also used for the use + * case specified here. + */ + String PROP_QUERY_FILTER_REGEX = "queryFilterRegex"; + + /** + * Experimental config to restrict which property type gets indexed at + * property definition level. Mostly index rule level #INCLUDE_PROPERTY_TYPES + * should be sufficient + */ + String PROP_INCLUDED_TYPE = "oak.experimental.includePropertyTypes"; + + /** + * Regex to allow inclusion of all immediate properties of the node + */ + String REGEX_ALL_PROPS = "^[^\\/]*$"; + + /** + * Node name storing the aggregate rules + */ + String AGGREGATES = "aggregates"; + + String AGG_PRIMARY_TYPE = "primaryType"; + + /** + * Name of property which stores the aggregate include pattern like jcr:content/metadata + */ + String AGG_PATH = "path"; + + /** + * Limit for maximum number of reaggregates allowed. For example if there is an aggregate of nt:folder + * and it also includes nt:folder then aggregation would traverse down until this limit is hit + */ + String AGG_RECURSIVE_LIMIT = "reaggregateLimit"; + + /** + * Boolean property indicating that separate fulltext field should be created for + * node represented by this pattern + */ + String AGG_RELATIVE_NODE = "relativeNode"; + + String COST_PER_ENTRY = "costPerEntry"; + + String COST_PER_EXECUTION = "costPerExecution"; + + + /** + * Config node which include Tika related configuration + * Its value should match {@link FieldNames#NODE_NAME} + */ + String TIKA = "tika"; + + /** + * nt:file node under 'tika' node which refers to the config xml file + */ + String TIKA_CONFIG = "config.xml"; + + String TIKA_MAX_EXTRACT_LENGTH = "maxExtractLength"; + + /** + * Config node under tika which defines mime type mappings + */ + String TIKA_MIME_TYPES = "mimeTypes"; + + /** + * Property name within the mime type structure which defines a mime type mapping + */ + String TIKA_MAPPED_TYPE = "mappedType"; + + /** + * The maximum number of terms that will be indexed for a single field in a + * document. This limits the amount of memory required for indexing, so that + * collections with very large files will not crash the indexing process by + * running out of memory. + *

+ * Note that this effectively truncates large documents, excluding from the + * index terms that occur further in the document. If you know your source + * documents are large, be sure to set this value high enough to accommodate + * the expected size. If you set it to Integer.MAX_VALUE, then the only limit + * is your memory, but you should anticipate an OutOfMemoryError. + *

+ * By default, no more than 10,000 terms will be indexed for a field. + */ + String MAX_FIELD_LENGTH = "maxFieldLength"; + + /** + * whether use this property values for suggestions + */ + String PROP_USE_IN_SUGGEST = "useInSuggest"; + + /** + * subnode holding configuration for suggestions + */ + String SUGGESTION_CONFIG = "suggestion"; + + /** + * update frequency of the suggester in minutes + */ + String SUGGEST_UPDATE_FREQUENCY_MINUTES = "suggestUpdateFrequencyMinutes"; + + /** + * whether use this property values for spellchecking + */ + String PROP_USE_IN_SPELLCHECK = "useInSpellcheck"; + + /** + * whether use this property values for similarity + */ + String PROP_USE_IN_SIMILARITY = "useInSimilarity"; + + /** + * whether feature vector similarity search should rerank based on feature values + */ + String PROP_SIMILARITY_RERANK = "similarityRerank"; + + /** + * whether property values should be indexed as tags to boost similarity search results + */ + String PROP_SIMILARITY_TAGS = "similarityTags"; + + /** + * Property definition config indicating that null check support should be + * enabled for this property + */ + String PROP_NULL_CHECK_ENABLED = "nullCheckEnabled"; + + /** + * Property definition config indicating that this property would be used with + * 'IS NOT NULL' constraint + */ + String PROP_NOT_NULL_CHECK_ENABLED = "notNullCheckEnabled"; + + /** + * IndexRule level config to indicate that Node name should also be index + * to support fn:name() queries + */ + String INDEX_NODE_NAME = "indexNodeName"; + + /** + * Property definition name to indicate indexing node name + */ + String PROPDEF_PROP_NODE_NAME = ":nodeName"; + + + /** + * Optional subnode holding configuration for facets. + */ + String FACETS = "facets"; + + /** + * Optional property to set the suggest field to be analyzed and therefore allow more fine + * grained and flexible suggestions. + */ + String SUGGEST_ANALYZED = "suggestAnalyzed"; + + /** + * Integer property indicating that the index should be + * used in compat mode to specific version + */ + String COMPAT_MODE = "compatVersion"; + + /** + * Name of the codec to be used for indexing + */ + String CODEC_NAME = "codec"; + + /** + * Name of the merge policy to be used while indexing + */ + String MERGE_POLICY_NAME = "mergePolicy"; + + /** + * Optional (index definition) property indicating whether facets should be ACL checked. + * Default is true + */ + String PROP_SECURE_FACETS = "secure"; + + String PROP_SECURE_FACETS_VALUE_INSECURE = "insecure"; + String PROP_SECURE_FACETS_VALUE_STATISTICAL = "statistical"; + String PROP_SECURE_FACETS_VALUE_SECURE = "secure"; + String PROP_SECURE_FACETS_VALUE_JVM_PARAM = "oak.facets.secure"; + + String STATISTICAL_FACET_SAMPLE_SIZE_JVM_PARAM = "oak.facet.statistical.sampleSize"; + String PROP_STATISTICAL_FACET_SAMPLE_SIZE = "sampleSize"; + int STATISTICAL_FACET_SAMPLE_SIZE_DEFAULT = 1000; + + /** + * Property name to specify the size of vectors used for similarity search. + */ + String PROP_SIMILARITY_SEARCH_DENSE_VECTOR_SIZE = "similaritySearchDenseVectorSize"; + + /** + * Optional (index definition) property indicating max number of facets that will be retrieved + * in query + * Default is {@link IndexDefinition#DEFAULT_FACET_COUNT} + */ + String PROP_FACETS_TOP_CHILDREN = "topChildren"; + + /** + * Optional (property definition) property indicating whether facets should be created + * for this property + */ + String PROP_FACETS = "facets"; + + /** + * Boolean property indicate that property should not be included in aggregation + */ + String PROP_EXCLUDE_FROM_AGGREGATE = "excludeFromAggregation"; + + /** + * String property: the function to index, for function-based index + */ + String PROP_FUNCTION = "function"; + + /** + * Boolean property which signal FulltextIndexEditor to refresh the stored index definition + */ + String PROP_REFRESH_DEFN = "refresh"; + + /** + * Long property that keep seed for random number generation. One example usage of this is + * to randomly sample query results to statistically check for ACLs to extrapolate facet + * counts + */ + String PROP_RANDOM_SEED = "seed"; + + /** + * Boolean property to indicate that nodes nodetype matching indexRule name + * should be indexed + */ + String PROP_INDEX_NODE_TYPE = "nodeTypeIndex"; + + /** + * The property of an index. If the given node or property exists, then the + * index is used for queries; otherwise, it is not used (returns infinite + * cost). The value is: nodes, the path. For properties, the path of the node, then '@' property. + */ + String USE_IF_EXISTS = "useIfExists"; + + /** + * Boolean property to enable or disable indexing of binaries for similarity searches. + * By default the value of this property is true. + */ + String INDEX_SIMILARITY_BINARIES = "indexSimilarityBinaries"; + + /** + * Boolean property to enable or disable indexing of strings for similarity searches. + * By default the value of this property is true. + */ + String INDEX_SIMILARITY_STRINGS = "indexSimilarityStrings"; + + /** + * Node name under which various analyzers are configured + */ + String ANALYZERS = "analyzers"; + + /** + * Name of the default analyzer definition node under 'analyzers' node + */ + String ANL_DEFAULT = "default"; + String ANL_FILTERS = "filters"; + String ANL_STOPWORDS = "stopwords"; + String ANL_TOKENIZER = "tokenizer"; + String ANL_CHAR_FILTERS = "charFilters"; + String ANL_CLASS = "class"; + String ANL_NAME = "name"; + + /** + * Boolean property indicating if in-built analyzer should preserve original term + */ + String INDEX_ORIGINAL_TERM = "indexOriginalTerm"; + + /** + * Internal version of the index definition for specific index type. Index version is an information that might be + * needed from an outside process that does not have visibility to the specific index module. + */ + Map INDEX_VERSION_BY_TYPE = Map.of( + "elasticsearch", "1.4.0" + ); +} diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/optimizer/FunctionNameConverter.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/optimizer/FunctionNameConverter.java new file mode 100644 index 00000000000..e833cc072d8 --- /dev/null +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/optimizer/FunctionNameConverter.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.optimizer; + +import java.util.Arrays; +import java.util.Deque; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.Locale; +import java.util.Map; + +public class FunctionNameConverter { + + // Map from function to node name (name of the node in the index definition) + private final static Map XPATH_NAMES = new HashMap<>(); + static { + XPATH_NAMES.put("upper", "upperCase"); + XPATH_NAMES.put("lower", "lowerCase"); + XPATH_NAMES.put("coalesce", "coalesce"); + XPATH_NAMES.put("first", "first"); + XPATH_NAMES.put("length", "stringLength"); + XPATH_NAMES.put("@:localname", "localname"); + XPATH_NAMES.put("@:name", "name"); + XPATH_NAMES.put("@:path", "path"); + } + /** + * Converts a given function pattern in polish notation into a string in camelCase. This is used + * to generate node names from the query. For example, the function pattern + * "function*upper*@data" will be converted to "upperData" if the query was written in JCR-SQL2 + * and to "upperCaseData" if the query was written in XPath. + * + * @param functionPattern The string pattern representing a function. It is split into tokens + * based on the '*' character. + * @return A string combining the function name(s) and properties in camelCase. + */ + public static String apply(String functionPattern, boolean isXPath) { + Deque tokens = new LinkedList<>(Arrays.asList(functionPattern.split("\\*"))); + if ("function".equals(tokens.peek())) { + tokens.poll(); + } + + String converted = parse(tokens, isXPath); + + // lowercase the first letter + return converted.substring(0, 1).toLowerCase(Locale.ENGLISH) + converted.substring(1); + } + + private static String parse(Deque tokens, boolean isXPath) { + if (tokens.isEmpty()) { + return ""; + } + + String token = tokens.poll(); + String fn; + + // All function names are capitalized as we want the node name to be camelCase. The only + // exception is the starting function. However, in this function, we "naively" + // capitalize all functions and handle that exception in the apply method to avoid + // checking if we are dealing with the first function. + switch (token) { + case "upper": + case "lower": + case "first": + case "length": + case "@:localname": + case "@:name": + case "@:path": + fn = isXPath ? capitalize(XPATH_NAMES.get(token)) : capitalize(token); + return fn + parse(tokens, isXPath); + case "coalesce": + return capitalize(token) + parse(tokens, isXPath) + parse(tokens, isXPath); + default: + return capitalize(extractPropertyName(token)); + } + } + + /** + * Capitalizes the first letter of the given string. If the string starts with a special prefix + * "@:", this prefix is removed before capitalization. If the string is null or empty, it is + * returned as is. + * + * @param str The string to be capitalized. + * @return The capitalized string, or as is if it is null or empty. + */ + private static String capitalize(String str) { + if (str == null || str.isEmpty()) { + return str; + } + // Remove "@:" prefix if present + if (str.startsWith("@:")) { + str = str.substring(2); + } + + return str.substring(0, 1).toUpperCase(Locale.ENGLISH) + str.substring(1); + } + + /** + * Extracts the property name from the string. A property name is assumed to start with a '@'. + * If that is the case and the string contains characters like ':' and/or '/' we need to handle + * that. For example: + *

+ * "@jcr:content/foo2" -> "foo2" + * + * @param input The input string containing the property name. + * @return The extracted property name. + */ + private static String extractPropertyName(String input) { + if (input.contains("/")) { + int slash = input.lastIndexOf("/"); + return input.charAt(slash + 1) + input.substring(slash + 2); + } + + if (input.contains(":")) { + int colon = input.lastIndexOf(":"); + return input.charAt(colon + 1) + input.substring(colon + 2); + } + + return input.substring(1); + } +} diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/optimizer/IndexConfigGenerator.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/optimizer/IndexConfigGenerator.java new file mode 100644 index 00000000000..f16c9ab4d5c --- /dev/null +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/optimizer/IndexConfigGenerator.java @@ -0,0 +1,428 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.optimizer; + +import static org.apache.jackrabbit.oak.commons.PathUtils.getParentPath; + +import java.text.ParseException; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Set; + +import javax.jcr.PropertyType; + +import org.apache.jackrabbit.oak.api.QueryEngine; +import org.apache.jackrabbit.oak.api.Root; +import org.apache.jackrabbit.oak.api.Type; +import org.apache.jackrabbit.oak.commons.PathUtils; +import org.apache.jackrabbit.oak.core.ImmutableRoot; +import org.apache.jackrabbit.oak.plugins.index.IndexConstants; +import org.apache.jackrabbit.oak.plugins.index.optimizer.IndexDefinitionBuilder.IndexRule; +import org.apache.jackrabbit.oak.plugins.index.optimizer.IndexDefinitionBuilder.PropertyRule; +import org.apache.jackrabbit.oak.query.ExecutionContext; +import org.apache.jackrabbit.oak.query.QueryEngineImpl; +import org.apache.jackrabbit.oak.query.QueryEngineSettings; +import org.apache.jackrabbit.oak.query.ast.NodeTypeInfo; +import org.apache.jackrabbit.oak.query.ast.NodeTypeInfoProvider; +import org.apache.jackrabbit.oak.spi.query.Cursor; +import org.apache.jackrabbit.oak.spi.query.Filter; +import org.apache.jackrabbit.oak.spi.query.Filter.PathRestriction; +import org.apache.jackrabbit.oak.spi.query.Filter.PropertyRestriction; +import org.apache.jackrabbit.oak.spi.query.QueryConstants; +import org.apache.jackrabbit.oak.spi.query.QueryIndex; +import org.apache.jackrabbit.oak.spi.query.QueryIndex.OrderEntry; +import org.apache.jackrabbit.oak.spi.query.QueryIndexProvider; +import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextContains; +import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextExpression; +import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextTerm; +import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextVisitor; +import org.apache.jackrabbit.oak.spi.state.NodeState; + +class IndexConfigGenerator { + + private final QueryEngine queryEngine; + private final IndexDefinitionBuilder builder = new IndexDefinitionBuilder(); + private final Set propsWithFulltextConstraints = new HashSet<>(); + + public IndexConfigGenerator() { + final Root root = new ImmutableRoot(InitialContentHelper.INITIAL_CONTENT); + queryEngine = new QueryEngineImpl() { + @Override + protected ExecutionContext getExecutionContext() { + return new ExecutionContext( + InitialContentHelper.INITIAL_CONTENT, + root, + new QueryEngineSettings(), + new LuceneIndexGeneratingIndexProvider(), null, null) { + @Override + public NodeTypeInfoProvider getNodeTypeInfoProvider() { + return DummyNodeTypeInfoProvider.INSTANCE; + } + }; + } + }; + } + + public static boolean isXPath(String query) { + // the query is not, at least SQL is not + query = query.trim().toLowerCase(Locale.ENGLISH); + // explain queries + if (query.startsWith("explain")) { + query = query.substring("explain".length()).trim(); + if (query.startsWith("measure")) { + query = query.substring("measure".length()).trim(); + } + } + // union queries + while (query.startsWith("(")) { + query = query.substring("(".length()).trim(); + } + + return !query.startsWith("select"); + } + + public void process(String statement) throws ParseException { + String lang = isXPath(statement) ? "xpath" : "JCR-SQL2"; + process(statement, lang); + } + + public void process(String statement, String language) throws ParseException { + queryEngine.executeQuery(statement, language, null, null); + } + + public NodeState getIndexConfig() { + return builder.build(); + } + + private void processFilter(Filter filter, List sortOrder) { + boolean xpath = isOriginallyXPath(filter.getQueryStatement()); + addPathRestrictions(filter); + IndexRule rule = processNodeTypeConstraint(filter); + processTags(filter); + processFulltextConstraints(filter, rule); + processPropertyRestrictions(filter, rule); + processSortConditions(sortOrder, rule, xpath); + processPureNodeTypeConstraints(filter, rule); + } + + private void processTags(Filter filter) { + PropertyRestriction indexTag = filter.getPropertyRestriction(IndexConstants.INDEX_TAG_OPTION); + + if (indexTag != null && indexTag.first != null) { + builder.tags(indexTag.first.getValue(Type.STRING)); + } + } + + private void addPathRestrictions(Filter filter) { + if (!filter.getPath().isEmpty() && !"/".equals(filter.getPath())) { + String path = filter.getPath().replaceAll("\\s", ""); + builder.includedPaths(path); + builder.queryPaths(path); + } + } + + private void processPureNodeTypeConstraints(Filter filter, IndexRule rule) { + if (filter.getFullTextConstraint() == null + && filter.getPropertyRestrictions().isEmpty() + && !"nt:base".equals(filter.getNodeType())) { + rule.property("jcr:primaryType"); + } + } + + private void processFulltextConstraints(Filter filter, final IndexRule rule) { + FullTextExpression ft = filter.getFullTextConstraint(); + if (ft == null) { + return; + } + + ft.accept(new FullTextVisitor.FullTextVisitorBase() { + @Override + public boolean visit(FullTextContains contains) { + visitTerm(contains.getPropertyName()); + return true; + } + + @Override + public boolean visit(FullTextTerm term) { + visitTerm(term.getPropertyName()); + return false; + } + + private void visitTerm(String propertyName) { + String p = propertyName; + String propertyPath = null; + String nodePath = null; + if (p == null) { + return; + } + String parent = getParentPath(p); + if (isNodePath(p)) { + nodePath = parent; + } else { + propertyPath = p; + } + + if (nodePath != null) { + builder.aggregateRule(rule.getRuleName()).include(nodePath).relativeNode(); + } else if (propertyPath != null) { + rule.property(propertyPath).analyzed(); + propsWithFulltextConstraints.add(propertyPath); + } + } + }); + } + + /** + * In a fulltext term for jcr:contains(foo, 'bar') 'foo' is the property name. While in + * jcr:contains(foo/*, 'bar') 'foo' is node name + * + * @return true if the term is related to node + */ + private static boolean isNodePath(String fulltextTermPath) { + return fulltextTermPath.endsWith("/*"); + } + + private void processSortConditions(List sortOrder, IndexRule rule, boolean isXPath) { + if (sortOrder == null) { + return; + } + + for (OrderEntry o : sortOrder) { + if ("jcr:score".equals(o.getPropertyName())) { + continue; + } + + if (o.getPropertyType().isArray()) { + continue; + } + + String propertyName = o.getPropertyName(); + if (isFunction(propertyName)) { + String queryFunc = PolishToQueryConverter.apply(propertyName, isXPath); + propertyName = FunctionNameConverter.apply(propertyName, isXPath); + PropertyRule prop = rule.property(propertyName); + prop.function(queryFunc); + prop.ordered(); + continue; + } + + PropertyRule propRule = rule.property(o.getPropertyName()); + if (o.getPropertyType() != Type.UNDEFINED) { + propRule.ordered(PropertyType.nameFromValue(o.getPropertyType().tag())); + } else { + propRule.ordered(); + } + } + } + + /** + * Returns if the propertyName is a function. If it is, it will be in Polish notation. + * + * @param propertyName the propertyName in a propertyRestriction + * @return true if it is a function and false otherwise. + */ + private boolean isFunction(String propertyName) { + return propertyName.startsWith("function*"); + } + + + /** + * Returns if the query originally was written in XPath. When the query engine creates the + * filter, the query statement is automatically SQL-2. But if it was originally XPath, it + * contains a comment with the original XPath query. + *

+ * Detecting it like this is only a heuristic. It is not 100% accurate as a JCR-SQL2 query might + * contain a condition with this String literal. But in most cases, this should correctly detect + * it. + * + * @param query the query statement + * @return true if the query was originally XPath and false otherwise. + */ + public static boolean isOriginallyXPath(String query) { + return query.contains("/* xpath: ") && query.endsWith(" */"); + } + + private void processPropertyRestrictions(Filter filter, IndexRule rule) { + System.out.println(filter.getQueryStatement()); + for (PropertyRestriction pr : filter.getPropertyRestrictions()) { + //Ignore special restrictions + if (isSpecialRestriction(pr)) { + continue; + } + + //QueryEngine adds a synthetic constraint for those properties + //which are used in fulltext constraint so as to ensure that given + //property is present. They need not be backed by index + if (propsWithFulltextConstraints.contains(pr.propertyName)) { + continue; + } + + if (isFunction(pr.propertyName)) { + boolean isXPath = isOriginallyXPath(filter.getQueryStatement()); + String queryFunc = PolishToQueryConverter.apply(pr.propertyName, isXPath); + String propertyName = FunctionNameConverter.apply(pr.propertyName, isXPath); + PropertyRule prop = rule.property(propertyName); + prop.function(queryFunc); + continue; + } + + PropertyRule propRule = rule.property(pr.propertyName); + if (pr.isNullRestriction()) { + propRule.nullCheckEnabled(); + } else if (pr.isNotNullRestriction()) { + propRule.notNullCheckEnabled(); + } + propRule.propertyIndex(); + } + + if (filter.getPropertyRestriction(QueryConstants.RESTRICTION_LOCAL_NAME) != null) { + rule.indexNodeName(); + } + } + + private boolean isSpecialRestriction(PropertyRestriction pr) { + String name = pr.propertyName; + if (name.startsWith(":")) { + return true; + } + if (name.startsWith("native*")) { + return true; + } + return false; + } + + private void processPathRestriction(Filter filter) { + if (filter.getPathRestriction() != PathRestriction.NO_RESTRICTION + || (filter.getPathRestriction() == PathRestriction.ALL_CHILDREN + && !PathUtils.denotesRoot(filter.getPath())) + ) { + builder.evaluatePathRestrictions(); + } + } + + private IndexRule processNodeTypeConstraint(Filter filter) { + return builder.indexRule(filter.getNodeType()); + } + + private class LuceneIndexGeneratingIndexProvider implements QueryIndexProvider { + + @Override + public List getQueryIndexes(NodeState nodeState) { + return List.of(new LuceneIndexGeneratingIndex()); + } + } + + private class LuceneIndexGeneratingIndex implements QueryIndex.AdvancedQueryIndex, QueryIndex { + + @Override + public double getMinimumCost() { + return 1.0; + } + + @Override + public double getCost(Filter filter, NodeState nodeState) { + return Double.MAX_VALUE; + } + + @Override + public Cursor query(Filter filter, NodeState nodeState) { + return null; + } + + @Override + public String getPlan(Filter filter, NodeState nodeState) { + return null; + } + + @Override + public String getIndexName() { + return "LuceneIndexGenerator"; + } + + @Override + public List getPlans(Filter filter, + List sortOrder, NodeState rootState) { + processFilter(filter, sortOrder); + return Collections.emptyList(); + } + + @Override + public String getPlanDescription(QueryIndex.IndexPlan plan, NodeState root) { + return null; + } + + @Override + public Cursor query(QueryIndex.IndexPlan plan, NodeState rootState) { + return null; + } + } + + private enum DummyNodeTypeInfoProvider implements NodeTypeInfoProvider { + INSTANCE; + + @Override + public NodeTypeInfo getNodeTypeInfo(String nodeTypeName) { + return new DummyNodeTypeInfo(nodeTypeName); + } + } + + private static class DummyNodeTypeInfo implements NodeTypeInfo { + + private final String nodeTypeName; + + private DummyNodeTypeInfo(String nodeTypeName) { + this.nodeTypeName = nodeTypeName; + } + + @Override + public boolean exists() { + return true; + } + + @Override + public String getNodeTypeName() { + return nodeTypeName; + } + + @Override + public Set getSuperTypes() { + return new HashSet<>(); + } + + @Override + public Set getPrimarySubTypes() { + return new HashSet<>(); + } + + @Override + public Set getMixinSubTypes() { + return new HashSet<>(); + } + + @Override + public boolean isMixin() { + return false; + } + + @Override + public Iterable getNamesSingleValuesProperties() { + return new HashSet<>(); + } + } +} diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/optimizer/IndexDefinitionBuilder.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/optimizer/IndexDefinitionBuilder.java new file mode 100644 index 00000000000..83b4d59ac41 --- /dev/null +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/optimizer/IndexDefinitionBuilder.java @@ -0,0 +1,303 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.optimizer; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import java.util.stream.Collectors; +import javax.jcr.PropertyType; + +import org.apache.jackrabbit.oak.api.Type; +import org.apache.jackrabbit.oak.commons.PathUtils; +import org.apache.jackrabbit.oak.plugins.index.IndexConstants; +import org.apache.jackrabbit.oak.spi.filter.PathFilter; +import org.apache.jackrabbit.oak.spi.state.NodeBuilder; +import org.apache.jackrabbit.oak.spi.state.NodeState; + +import static org.apache.jackrabbit.JcrConstants.JCR_PRIMARYTYPE; +import static org.apache.jackrabbit.JcrConstants.NT_UNSTRUCTURED; +import static org.apache.jackrabbit.oak.plugins.memory.EmptyNodeState.EMPTY_NODE; +import static org.apache.jackrabbit.oak.plugins.memory.PropertyStates.createProperty; + +public class IndexDefinitionBuilder { + private final NodeBuilder builder = EMPTY_NODE.builder(); + private final Map rules = new HashMap<>(); + private final Map aggRules = new HashMap<>(); + private final NodeBuilder indexRule; + private NodeBuilder aggregateBuilder; + + public IndexDefinitionBuilder() { + builder.setProperty("compatVersion", 2); + builder.setProperty("async", "async"); + builder.setProperty("type", "lucene"); + builder.setProperty("evaluatePathRestrictions", true); + builder.setProperty(JCR_PRIMARYTYPE, "oak:QueryIndexDefinition", Type.NAME); + indexRule = createChild(builder, "indexRules"); + } + + public IndexDefinitionBuilder evaluatePathRestrictions(){ + builder.setProperty("evaluatePathRestrictions", true); + return this; + } + + public IndexDefinitionBuilder includedPaths(String ... paths){ + builder.setProperty(createProperty(PathFilter.PROP_INCLUDED_PATHS, Arrays.asList(paths), Type.STRINGS)); + return this; + } + + public IndexDefinitionBuilder queryPaths(String ... paths){ + builder.setProperty(createProperty(IndexConstants.QUERY_PATHS, Arrays.asList(paths), Type.STRINGS)); + return this; + } + + public IndexDefinitionBuilder excludedPaths(String ... paths){ + builder.setProperty(createProperty(PathFilter.PROP_EXCLUDED_PATHS, Arrays.asList(paths), Type.STRINGS)); + return this; + } + + public IndexDefinitionBuilder tags(String... tags) { + builder.setProperty(IndexConstants.INDEX_TAGS, Arrays.asList(tags), Type.STRINGS); + return this; + } + + public NodeState build(){ + return builder.getNodeState(); + } + + //~--------------------------------------< IndexRule > + + public IndexRule indexRule(String type){ + IndexRule rule = rules.get(type); + if (rule == null){ + rule = new IndexRule(createChild(indexRule, type), type); + rules.put(type, rule); + } + return rule; + } + + public static class IndexRule { + private final NodeBuilder builder; + private final NodeBuilder propertiesBuilder; + private final String ruleName; + private final Map props = new HashMap<>(); + private final Set propNodeNames = new HashSet<>(); + + private IndexRule(NodeBuilder builder, String type) { + this.builder = builder; + this.propertiesBuilder = createChild(builder, "properties"); + this.ruleName = type; + } + + public IndexRule indexNodeName(){ + builder.setProperty("indexNodeName", true); + return this; + } + + public PropertyRule property(String name){ + PropertyRule propRule = props.get(name); + if (propRule == null) { + if (name.equals("jcr:path")) { + propRule = new PropertyRule(this, createChild(propertiesBuilder, createPropNodeName(name)), name); + propRule.function("path()"); + } else { + propRule = new PropertyRule(this, createChild(propertiesBuilder, createPropNodeName(name)), name); + } + props.put(name, propRule); + } + return propRule; + } + + private String createPropNodeName(String name) { + name = getSafePropName(name); + if (name.isEmpty()) { + name = "prop"; + } + if (propNodeNames.contains(name)) { + name = name + "_" + propNodeNames.size(); + } + propNodeNames.add(name); + return name; + } + + public String getRuleName() { + return ruleName; + } + + @Override + public String toString() { + String propsString = props.isEmpty() + ? "None" + : props.entrySet().stream() + .map(entry -> entry.getKey() + "=" + entry.getValue()) + .collect(Collectors.joining(", ", "[", "]")); + + return String.format("IndexRule{ruleName='%s', properties=%s, indexNodeName=%s}", + ruleName, + propsString, + builder.getProperty("indexNodeName")); + } + + + } + + public static class PropertyRule { + private final IndexRule indexRule; + private final NodeBuilder builder; + + private PropertyRule(IndexRule indexRule, NodeBuilder builder, String name) { + this.indexRule = indexRule; + this.builder = builder; + builder.setProperty(LuceneIndexConstants.PROP_NAME, name); + } + + public PropertyRule useInExcerpt(){ + builder.setProperty(LuceneIndexConstants.PROP_USE_IN_EXCERPT, true); + return this; + } + + public PropertyRule analyzed(){ + builder.setProperty(LuceneIndexConstants.PROP_ANALYZED, true); + return this; + } + + public PropertyRule nodeScopeIndex(){ + builder.setProperty(LuceneIndexConstants.PROP_NODE_SCOPE_INDEX, true); + return this; + } + + public PropertyRule ordered(){ + builder.setProperty(LuceneIndexConstants.PROP_ORDERED, true); + return this; + } + + public PropertyRule ordered(String type){ + //This would throw an IAE if type is invalid + PropertyType.valueFromName(type); + builder.setProperty(LuceneIndexConstants.PROP_ORDERED, true); + builder.setProperty(LuceneIndexConstants.PROP_TYPE, type); + return this; + } + + public PropertyRule propertyIndex(){ + builder.setProperty(LuceneIndexConstants.PROP_PROPERTY_INDEX, true); + return this; + } + + public PropertyRule nullCheckEnabled(){ + builder.setProperty(LuceneIndexConstants.PROP_NULL_CHECK_ENABLED, true); + return this; + } + + public PropertyRule notNullCheckEnabled(){ + builder.setProperty(LuceneIndexConstants.PROP_NOT_NULL_CHECK_ENABLED, true); + return this; + } + + public PropertyRule function(String fnName) { + builder.setProperty(LuceneIndexConstants.FUNC_NAME, fnName); + builder.removeProperty(LuceneIndexConstants.PROP_NAME); + return this; + } + + public IndexRule enclosingRule(){ + return indexRule; + } + } + + //~--------------------------------------< Aggregates > + + public AggregateRule aggregateRule(String type){ + if (aggregateBuilder == null){ + aggregateBuilder = createChild(builder, LuceneIndexConstants.AGGREGATES); + } + AggregateRule rule = aggRules.get(type); + if (rule == null){ + rule = new AggregateRule(createChild(aggregateBuilder, type)); + aggRules.put(type, rule); + } + return rule; + } + + public AggregateRule aggregateRule(String primaryType, String ... includes){ + AggregateRule rule = aggregateRule(primaryType); + for (String include : includes){ + rule.include(include); + } + return rule; + } + + public static class AggregateRule { + private final NodeBuilder builder; + private final Map includes = new HashMap<>(); + + private AggregateRule(NodeBuilder builder) { + this.builder = builder; + } + + public Include include(String includePath) { + Include include = includes.get(includePath); + if (include == null){ + include = new Include(createChild(builder, "include" + includes.size())); + includes.put(includePath, include); + } + include.path(includePath); + return include; + } + + public static class Include { + private final NodeBuilder builder; + + private Include(NodeBuilder builder) { + this.builder = builder; + } + + public Include path(String includePath) { + builder.setProperty(LuceneIndexConstants.AGG_PATH, includePath); + return this; + } + + public Include relativeNode(){ + builder.setProperty(LuceneIndexConstants.AGG_RELATIVE_NODE, true); + return this; + } + } + } + + private static NodeBuilder createChild(NodeBuilder builder, String name){ + NodeBuilder result = builder.child(name); + result.setProperty(JCR_PRIMARYTYPE, NT_UNSTRUCTURED, Type.NAME); + return result; + } + + // TODO: document examples of this + // when the property is like jcr:primaryType or something like this. + static String getSafePropName(String relativePropName) { + String propName = PathUtils.getName(relativePropName); + int indexOfColon = propName.indexOf(':'); + if (indexOfColon > 0){ + propName = propName.substring(indexOfColon + 1); + } + + //Just keep ascii chars + propName = propName.replaceAll("\\W", ""); + return propName; + } +} diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/optimizer/IndexDefinitionGenerator.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/optimizer/IndexDefinitionGenerator.java new file mode 100644 index 00000000000..8e53eb5ccd4 --- /dev/null +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/optimizer/IndexDefinitionGenerator.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.optimizer; + +import java.text.ParseException; + +import org.apache.jackrabbit.oak.commons.json.JsopBuilder; +import org.apache.jackrabbit.oak.json.Base64BlobSerializer; +import org.apache.jackrabbit.oak.json.JsonSerializer; +import org.apache.jackrabbit.oak.namepath.NamePathMapper; +import org.apache.jackrabbit.oak.query.NodeStateNodeTypeInfoProvider; +import org.apache.jackrabbit.oak.query.Query; +import org.apache.jackrabbit.oak.query.QueryEngineSettings; +import org.apache.jackrabbit.oak.query.QueryImpl; +import org.apache.jackrabbit.oak.query.SQL2Parser; +import org.apache.jackrabbit.oak.query.ast.NodeTypeInfoProvider; +import org.apache.jackrabbit.oak.query.stats.QueryStatsData; +import org.apache.jackrabbit.oak.query.stats.QueryStatsData.QueryExecutionStats; +import org.apache.jackrabbit.oak.query.xpath.XPathToSQL2Converter; +import org.apache.jackrabbit.oak.spi.query.Filter; +import org.apache.jackrabbit.oak.spi.state.NodeState; + +public class IndexDefinitionGenerator { + + public static String generateIndexDefinition(String language, String queryStatement) { + IndexConfigGenerator gen = new IndexConfigGenerator(); + try { + gen.process(queryStatement, language); + NodeState state = gen.getIndexConfig(); + JsopBuilder json = new JsopBuilder(); + json.object(); + json.key("index"); + String filter = "{\"properties\":[\"*\", \"-:childOrder\"],\"nodes\":[\"*\", \"-:*\"]}";; + JsonSerializer serializer = new JsonSerializer(json, filter, new Base64BlobSerializer()); + serializer.serialize(state); + json.endObject(); + return JsopBuilder.prettyPrint(json.toString()); + } catch (Throwable e) { + // ignore + return "error: " + e.toString(); + } + } + + public static String generateIndexDefinition2(NodeState rootState, String language, String queryStatement) { + NamePathMapper namePathMapper = NamePathMapper.DEFAULT; + NodeTypeInfoProvider nodeTypes = new NodeStateNodeTypeInfoProvider(rootState); + QueryEngineSettings settings = new QueryEngineSettings(); + QueryStatsData data = new QueryStatsData("", ""); + QueryExecutionStats stats = data.new QueryExecutionStats(); + SQL2Parser parser = new SQL2Parser(namePathMapper, nodeTypes, settings, stats); + try { + Query query; + if ("xpath".equals(language)) { + XPathToSQL2Converter converter = new XPathToSQL2Converter(); + String sql2 = converter.convert(queryStatement); + query = parser.parse(sql2); + } else if ("JCR-SQL2".equals(language)) { + query = parser.parse(queryStatement); + } else if ("sql".equals(language)) { + parser.setSupportSQL1(true); + query = parser.parse(queryStatement); + } else { + return ""; + } + try { + query.init(); + } catch (Exception e) { + ParseException e2 = new ParseException(query.getStatement() + ": " + e.getMessage(), 0); + e2.initCause(e); + throw e2; + } + query.prepare(); + Filter filter = ((QueryImpl) query).createFilter(true); + return "filter: " + filter.toString(); + } catch (Throwable e) { + // ignore + return "error: " + e.toString(); + } + } +} diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/optimizer/InitialContentHelper.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/optimizer/InitialContentHelper.java new file mode 100644 index 00000000000..3383584bfc9 --- /dev/null +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/optimizer/InitialContentHelper.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.optimizer; + +import org.apache.jackrabbit.oak.InitialContent; +import org.apache.jackrabbit.oak.OakInitializer; +import org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore; +import org.apache.jackrabbit.oak.plugins.name.NamespaceEditorProvider; +import org.apache.jackrabbit.oak.plugins.nodetype.TypeEditorProvider; +import org.apache.jackrabbit.oak.spi.commit.CompositeEditorProvider; +import org.apache.jackrabbit.oak.spi.commit.EditorHook; +import org.apache.jackrabbit.oak.spi.state.NodeState; +import org.apache.jackrabbit.oak.spi.state.NodeStore; + +/** + * {@code InitialContent} helper for tests + */ +public class InitialContentHelper { + + public static final NodeState INITIAL_CONTENT = createInitialContent(false); + + public static final NodeState INITIAL_CONTENT_FROZEN_NODE_REFERENCEABLE = createInitialContent(true); + + private static final String REFERENCEABLE_FROZEN_NODE_PROPERTY = "oak.referenceableFrozenNode"; + + private static NodeState createInitialContent(boolean referenceableFrozenNodes) { + String propValue = System.getProperty(REFERENCEABLE_FROZEN_NODE_PROPERTY); + if (referenceableFrozenNodes) { + System.setProperty(REFERENCEABLE_FROZEN_NODE_PROPERTY, "true"); + } else { + System.clearProperty(REFERENCEABLE_FROZEN_NODE_PROPERTY); + } + try { + NodeStore store = new MemoryNodeStore(); + EditorHook hook = new EditorHook( + new CompositeEditorProvider(new NamespaceEditorProvider(), new TypeEditorProvider())); + OakInitializer.initialize(store, new InitialContent(), hook); + return store.getRoot(); + } finally { + if (propValue != null) { + System.setProperty(REFERENCEABLE_FROZEN_NODE_PROPERTY, propValue); + } else { + System.clearProperty(REFERENCEABLE_FROZEN_NODE_PROPERTY); + } + } + } + + private InitialContentHelper() {} + +} diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/optimizer/LuceneIndexConstants.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/optimizer/LuceneIndexConstants.java new file mode 100644 index 00000000000..917fcc558ec --- /dev/null +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/optimizer/LuceneIndexConstants.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.optimizer; + +/** + * Constants used internally in Lucene indexes. + */ +public interface LuceneIndexConstants extends FulltextIndexConstants { + + String TYPE_LUCENE = "lucene"; + + String SUGGEST_DATA_CHILD_NAME = ":suggest-data"; + + String TRASH_CHILD_NAME = ":trash"; + + /** + * Name of the codec to be used for indexing + */ + String CODEC_NAME = "codec"; + + /** + * Name of the merge policy to be used while indexing + */ + String MERGE_POLICY_NAME = "mergePolicy"; + + /** + * Boolean property to indicate that LuceneIndex is being used in testMode + * and it should participate in every test + */ + String TEST_MODE = "testMode"; + + /** + * Config node which include Tika related configuration + */ + String TIKA = "tika"; + + /** + * nt:file node under 'tika' node which refers to the config xml file + */ + String TIKA_CONFIG = "config.xml"; + + String TIKA_MAX_EXTRACT_LENGTH = "maxExtractLength"; + + /** + * Config node under tika which defines mime type mappings + */ + String TIKA_MIME_TYPES = "mimeTypes"; + + /** + * Property name within the mime type structure which defines a mime type mapping + */ + String TIKA_MAPPED_TYPE = "mappedType"; + + /** + * The maximum number of terms that will be indexed for a single field in a + * document. This limits the amount of memory required for indexing, so that + * collections with very large files will not crash the indexing process by + * running out of memory. + *

+ * Note that this effectively truncates large documents, excluding from the + * index terms that occur further in the document. If you know your source + * documents are large, be sure to set this value high enough to accommodate + * the expected size. If you set it to Integer.MAX_VALUE, then the only limit + * is your memory, but you should anticipate an OutOfMemoryError. + *

+ * By default, no more than 10,000 terms will be indexed for a field. + */ + String MAX_FIELD_LENGTH = "maxFieldLength"; + + /** + * whether use this property values for suggestions + */ + String PROP_USE_IN_SUGGEST = "useInSuggest"; + + /** + * subnode holding configuration for suggestions + */ + String SUGGESTION_CONFIG = "suggestion"; + + /** + * update frequency of the suggester in minutes + */ + String SUGGEST_UPDATE_FREQUENCY_MINUTES = "suggestUpdateFrequencyMinutes"; + + /** + * whether use this property values for spellchecking + */ + String PROP_USE_IN_SPELLCHECK = "useInSpellcheck"; + + /** + * whether use this property values for similarity + */ + String PROP_USE_IN_SIMILARITY = "useInSimilarity"; + + /** + * IndexRule level config to indicate that Node name should also be index + * to support fn:name() queries + */ + String INDEX_NODE_NAME = "indexNodeName"; + + /** + * Property definition name to indicate indexing node name + * Its value should match {@link FieldNames#NODE_NAME} + */ + String PROPDEF_PROP_NODE_NAME = ":nodeName"; + + /** + * Boolean property indicating that Lucene directory content + * should be saved as part of NodeState itself as a multi value property + * to allow faster reads (OAK-2809) + */ + String SAVE_DIR_LISTING = "saveDirectoryListing"; + + /** + * Optional Property to store the path of index in the repository. Path at which index + * definition is defined is not known to IndexEditor. To make use of CopyOnWrite + * feature its required to know the indexPath to optimize the lookup and read of + * existing index files + * + * @deprecated With OAK-4152 no need to explicitly define indexPath property + */ + @Deprecated + String INDEX_PATH = "indexPath"; +} diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/optimizer/PolishToQueryConverter.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/optimizer/PolishToQueryConverter.java new file mode 100644 index 00000000000..43ecc81b2af --- /dev/null +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/optimizer/PolishToQueryConverter.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.optimizer; + +import java.util.Arrays; +import java.util.Deque; +import java.util.LinkedList; + +public class PolishToQueryConverter { + + /** + * Converts a given Polish notation string to either XPath or JCR-SQL2 syntax based on the + * specified flag. + * + * @param polishNotation The Polish notation string to be converted. + * @param isXPath A boolean flag indicating whether to convert to XPath (true) or + * JCR-SQL2 (false) syntax. + * @return A string representing the converted query in either XPath or JCR-SQL2 syntax. + */ + public static String apply(String polishNotation, boolean isXPath) { + Deque tokens = new LinkedList<>(Arrays.asList(polishNotation.split("\\*"))); + if ("function".equals(tokens.peek())) { + tokens.poll(); + } + return parseTokens(tokens, isXPath); + } + + /** + * Recursively parses tokens from a deque representing a Polish notation expression and converts + * them into either XPath or JCR-SQL2 query syntax. We use a deque, as we can tokenize each part + * of the expression as they are separated by "*". + * + * @param tokens A deque of tokens derived from the Polish notation expression. + * @param isXPath A boolean flag indicating whether to convert to XPath (true) or JCR-SQL2 + * (false) syntax. + * @return A string representing the converted part of the query in the appropriate syntax. + */ + private static String parseTokens(Deque tokens, boolean isXPath) { + if (tokens.isEmpty()) { + return ""; + } + + String token = tokens.poll(); + String fn; + + switch (token) { + case "upper": + fn = isXPath ? "fn:upper-case(" : "upper("; + return fn + parseTokens(tokens, isXPath) + ")"; + case "lower": + fn = isXPath ? "fn:lower-case(" : "lower("; + return fn + parseTokens(tokens, isXPath) + ")"; + case "coalesce": + fn = isXPath ? "fn:coalesce(" : "coalesce("; + return fn + parseTokens(tokens, isXPath) + "," + parseTokens(tokens, isXPath) + ")"; + case "first": + fn = isXPath ? "jcr:first(" : "first("; + return fn + parseTokens(tokens, isXPath) + ")"; + case "length": + fn = isXPath ? "fn:string-length(" : "length("; + return fn + parseTokens(tokens, isXPath) + ")"; + case "@:localname": + return isXPath ? "fn:local-name()" : "localname()"; + case "@:name": + return isXPath ? "fn:name()" : "name()"; + case "@:path": + return isXPath ? "fn:path()" : "path()"; + // Handle properties + default: + return isXPath ? formatXPathProperty(token) : formatSQL2Property(token); + } + } + + /** + * Properties in JCR-SQL2 needs to be surrounded with [ ] and doesn't have "@". + * Also, ] is escaped as ]]. + */ + private static String formatSQL2Property(String token) { + if (token.startsWith("@")) { + String property = token.substring(1); + property = property.replaceAll("]", "]]"); + return "[" + property + "]"; + } + return token; + } + + /** + * This method formats properties from Polish notation to valid XPath. The property tokens are + * always prefixed with "@". Since the token might contain a "/", meaning a nested property, we + * need to format it to a valid XPath which means that the "deepest" child needs to be prefixed + * with "@" instead. Example: "@jcr:content/foo/bar/property1" to + * "jcr:content/foo/bar/@property1". + * + * @param token The property token in Polish notation. + * @return The valid XPath formatted property. + */ + private static String formatXPathProperty(String token) { + if (token.contains("/")) { + token = token.substring(token.indexOf("@") + 1); + int lastSlash = token.lastIndexOf('/'); + return token.substring(0, lastSlash) + "/@" + token.substring(lastSlash + 1); + } + return token; + } +} + diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/query/QueryImpl.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/query/QueryImpl.java index 3f08b4fae4e..a792604b0ff 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/query/QueryImpl.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/query/QueryImpl.java @@ -112,7 +112,7 @@ */ public class QueryImpl implements Query { - public static final UnsupportedOperationException TOO_MANY_UNION = + public static final UnsupportedOperationException TOO_MANY_UNION = new UnsupportedOperationException("Too many union queries"); public final static int MAX_UNION = Integer.getInteger("oak.sql2MaxUnion", 1000); @@ -129,17 +129,17 @@ public class QueryImpl implements Query { SourceImpl source; private String statement; final HashMap bindVariableMap = new HashMap<>(); - + /** * The map of indexes (each selector uses one index) */ final HashMap selectorIndexes = new HashMap<>(); - + /** * The list of selectors of this query. For a join, there can be multiple selectors. */ final ArrayList selectors = new ArrayList<>(); - + ConstraintImpl constraint; /** @@ -148,7 +148,7 @@ public class QueryImpl implements Query { * purposes. */ private boolean traversalEnabled = true; - + /** * The query option to be used for this query. */ @@ -156,13 +156,13 @@ public class QueryImpl implements Query { private OrderingImpl[] orderings; private ColumnImpl[] columns; - + /** * The columns that make a row distinct. This is all columns * except for "jcr:score". */ private boolean[] distinctColumns; - + private boolean explain, measure; private boolean distinct; private Optional limit = Optional.empty(); @@ -170,7 +170,7 @@ public class QueryImpl implements Query { private long size = -1; private boolean prepared; private ExecutionContext context; - + /** * whether the object has been initialised or not */ @@ -179,7 +179,7 @@ public class QueryImpl implements Query { private boolean isSortedByIndex; private final NamePathMapper namePathMapper; - + private double estimatedCost; private final QueryEngineSettings settings; @@ -222,7 +222,7 @@ public boolean visit(BindVariableValueImpl node) { bindVariableMap.put(node.getBindVariableName(), null); return true; } - + @Override public boolean visit(ChildNodeImpl node) { node.setQuery(query); @@ -289,14 +289,14 @@ public boolean visit(NativeFunctionImpl node) { node.bindSelector(source); return super.visit(node); } - + @Override public boolean visit(SimilarImpl node) { node.setQuery(query); node.bindSelector(source); return super.visit(node); } - + @Override public boolean visit(SpellcheckImpl node) { node.setQuery(query); @@ -351,7 +351,7 @@ public boolean visit(PropertyExistenceImpl node) { node.bindSelector(source); return true; } - + @Override public boolean visit(PropertyInexistenceImpl node) { node.setQuery(query); @@ -420,7 +420,7 @@ public boolean visit(InImpl node) { node.setQuery(query); return super.visit(node); } - + @Override public boolean visit(AndImpl node) { node.setQuery(query); @@ -455,7 +455,7 @@ public boolean visit(NotImpl node) { } distinctColumns[i] = distinct; } - + init = true; } @@ -508,7 +508,7 @@ public void setExplain(boolean explain) { public void setMeasure(boolean measure) { this.measure = measure; } - + public void setDistinct(boolean distinct) { this.distinct = distinct; } @@ -521,7 +521,7 @@ public ResultImpl executeQuery() { /** * If one of the indexes wants a warning to be logged due to path mismatch, * then get the warning message. Otherwise, return null. - * + * * @return null (in the normal case) or the list of index plan names (if * some index wants a warning to be logged) */ @@ -574,7 +574,7 @@ private void logAdditionalMessages() { } } } - + @Override public Iterator getRows() { prepare(); @@ -702,7 +702,7 @@ private boolean canSortByIndex() { public String getPlan() { return source.getPlan(context.getBaseState()); } - + @Override public String getIndexCostInfo() { return source.getIndexCostInfo(context.getBaseState()); @@ -770,7 +770,7 @@ public void prepare() { source = result; isSortedByIndex = canSortByIndex(); } - + private static SourceImpl buildJoin(SourceImpl result, SourceImpl last, List conditions) { if (result == null) { return last; @@ -790,14 +790,14 @@ private static SourceImpl buildJoin(SourceImpl result, SourceImpl last, List!Test purpose only! - * + * * this creates a filter for the given query - * + * */ - Filter createFilter(boolean preparing) { + public Filter createFilter(boolean preparing) { return source.createFilter(preparing); } @@ -1013,7 +1013,7 @@ public int getSelectorIndex(String selectorName) { public int getColumnIndex(String columnName) { return getColumnIndex(columns, columnName); } - + static int getColumnIndex(ColumnImpl[] columns, String columnName) { for (int i = 0, size = columns.length; i < size; i++) { ColumnImpl c = columns[i]; @@ -1039,7 +1039,7 @@ public String[] getSelectorNames() { for (int i = 0; i < list.length; i++) { list[i] = selectors.get(i).getSelectorName(); } - // reverse names to that for xpath, + // reverse names to that for xpath, // the first selector is the same as the node iterator Collections.reverse(Arrays.asList(list)); return list; @@ -1087,7 +1087,7 @@ private SelectorExecutionPlan getBestSelectorExecutionPlan( // current index is below the minimum cost of the next index. List queryIndexes = indexProvider.getQueryIndexes(rootState).stream() .sorted(MINIMAL_COST_ORDERING).collect(Collectors.toList()); - List sortOrder = getSortOrder(filter); + List sortOrder = getSortOrder(filter); for (int i = 0; i < queryIndexes.size(); i++) { QueryIndex index = queryIndexes.get(i); double minCost = index.getMinimumCost(); @@ -1112,7 +1112,7 @@ private SelectorExecutionPlan getBestSelectorExecutionPlan( filter, sortOrder, rootState); cost = Double.POSITIVE_INFINITY; for (IndexPlan p : ipList) { - + long entryCount = p.getEstimatedEntryCount(); if (p.getSupportsPathRestriction()) { entryCount = scaleEntryCount(rootState, filter, entryCount); @@ -1196,12 +1196,12 @@ private SelectorExecutionPlan getBestSelectorExecutionPlan( if (potentiallySlowTraversalQuery || bestIndex == null) { // Log warning for fulltext queries without index, since these cannot return results - if(!filter.getFulltextConditions().isEmpty()) { + if(!filter.getFulltextConditions().isEmpty()) { LOG.warn("Fulltext query without index for filter {}; no results will be returned", filter); } else { - LOG.debug("no proper index was found for filter {}", filter); + LOG.debug("no proper index was found for filter {}", filter); } - + StatisticsProvider statisticsProvider = getSettings().getStatisticsProvider(); if (statisticsProvider != null) { HistogramStats histogram = statisticsProvider.getHistogram(INDEX_UNAVAILABLE, StatsOptions.METRICS_ONLY); @@ -1214,7 +1214,7 @@ private SelectorExecutionPlan getBestSelectorExecutionPlan( return new SelectorExecutionPlan(filter.getSelector(), bestIndex, bestPlan, bestCost); } - + private long scaleEntryCount(NodeState rootState, FilterImpl filter, long count) { PathRestriction r = filter.getPathRestriction(); if (r != PathRestriction.ALL_CHILDREN) { @@ -1235,14 +1235,14 @@ private long scaleEntryCount(NodeState rootState, FilterImpl filter, long count) totalNodesCount = 1; } // same logic as for the property index (see ContentMirrorStoreStrategy): - + // assume nodes in the index are evenly distributed in the repository (old idea) long countScaledDown = (long) ((double) count / totalNodesCount * filterPathCount); // assume 80% of the indexed nodes are in this subtree long mostNodesFromThisSubtree = (long) (filterPathCount * 0.8); // count can at most be the assumed subtree size count = Math.min(count, mostNodesFromThisSubtree); - // this in theory should not have any effect, + // this in theory should not have any effect, // except if the above estimates are incorrect, // so this is just for safety feature count = Math.max(count, countScaledDown); @@ -1253,7 +1253,7 @@ private long scaleEntryCount(NodeState rootState, FilterImpl filter, long count) public boolean isPotentiallySlow() { return potentiallySlowTraversalQuery; } - + @Override public void verifyNotPotentiallySlow() { if (potentiallySlowTraversalQuery) { @@ -1285,7 +1285,7 @@ public void verifyNotPotentiallySlow() { } } } - + private List getSortOrder(FilterImpl filter) { if (orderings == null) { return null; @@ -1304,7 +1304,7 @@ private List getSortOrder(FilterImpl filter) { } return sortOrder; } - + private void logDebug(String msg) { if (isInternal) { LOG.trace(msg); @@ -1347,7 +1347,7 @@ public boolean isMeasureOrExplainEnabled() { /** * Validate the path is syntactically correct, and convert it to an Oak * internal path (including namespace remapping if needed). - * + * * @param path the path * @return the the converted path */ @@ -1397,7 +1397,7 @@ public String toString() { public long getSize() { return size; } - + @Override public long getSize(SizePrecision precision, long max) { // Note: DISTINCT is ignored @@ -1425,10 +1425,10 @@ public void setInternal(boolean isInternal) { public ExecutionContext getExecutionContext() { return context; } - + /** * Add two values, but don't let it overflow or underflow. - * + * * @param x the first value * @param y the second value * @return the sum, or Long.MIN_VALUE for underflow, or Long.MAX_VALUE for @@ -1444,7 +1444,7 @@ public static long saturatedAdd(long x, long y) { @Override public Query buildAlternativeQuery() { Query result = this; - + if (constraint != null) { Set unionList; try { @@ -1477,14 +1477,14 @@ public Query buildAlternativeQuery() { // re-composing the statement for better debug messages left.statement = recomposeStatement(left); } - + result = newAlternativeUnionQuery(left, right); } } - + return result; } - + private static String recomposeStatement(@NotNull QueryImpl query) { requireNonNull(query); String original = query.getStatement(); @@ -1493,7 +1493,7 @@ private static String recomposeStatement(@NotNull QueryImpl query) { final String where = " WHERE "; final String orderBy = " ORDER BY "; int whereOffset = where.length(); - + if (query.getConstraint() == null) { recomputed.append(original); } else { @@ -1505,18 +1505,18 @@ private static String recomposeStatement(@NotNull QueryImpl query) { } return recomputed.toString(); } - + /** * Convenience method for creating a UnionQueryImpl with proper settings. - * + * * @param left the first subquery * @param right the second subquery * @return the union query */ private UnionQueryImpl newAlternativeUnionQuery(@NotNull Query left, @NotNull Query right) { UnionQueryImpl u = new UnionQueryImpl( - false, - requireNonNull(left, "`left` cannot be null"), + false, + requireNonNull(left, "`left` cannot be null"), requireNonNull(right, "`right` cannot be null"), this.settings); u.setExplain(explain); @@ -1526,20 +1526,20 @@ private UnionQueryImpl newAlternativeUnionQuery(@NotNull Query left, @NotNull Qu u.setOrderings(orderings); return u; } - + @Override public Query copyOf() { if (isInit()) { throw new IllegalStateException("QueryImpl cannot be cloned once initialised."); } - + List cols = new ArrayList<>(); for (ColumnImpl c : columns) { cols.add((ColumnImpl) copyElementAndCheckReference(c)); } - + QueryImpl copy = new QueryImpl( - this.statement, + this.statement, (SourceImpl) copyElementAndCheckReference(this.source), this.constraint, cols.toArray(new ColumnImpl[0]), @@ -1552,7 +1552,7 @@ public Query copyOf() { copy.distinct = this.distinct; copy.queryOptions = this.queryOptions; - return copy; + return copy; } @Override diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/query/stats/QueryStatsMBean.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/query/stats/QueryStatsMBean.java index 17b63cf55b9..6f120549556 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/query/stats/QueryStatsMBean.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/query/stats/QueryStatsMBean.java @@ -23,36 +23,42 @@ public interface QueryStatsMBean { String TYPE = "QueryStats"; - + /** * Get the slow queries. Those are the ones that scan more than 100'000 * nodes, or the configured maximum number of nodes to scan. (Raw execution * time is not taken into account, as execution can be slow if the code is * not compiled yet.) - * + * * @return the slow queries table */ @Description("Get the slow queries (those that scan/traverse over many nodes).") TabularData getSlowQueries(); - + @Description("Get the popular queries (those that take most of the time).") TabularData getPopularQueries(); @Description("Get all data as Json.") String asJson(); - + @Description("Reset the statistics (clear the list of queries).") void resetStats(); /** * Whether to capture a thread dump in addition to the thread name. * No thread name / thread dump is captures for internal queries. - * + * * @param captureStackTraces the new valu */ @Description("Enable / disable capturing the thread dumps (in addition to the thread name).") void setCaptureStackTraces(boolean captureStackTraces); - + boolean getCaptureStackTraces(); - + + @Description("Read optimization limit. Queries with a lower read read optimization trigger automatic index optimization. Use 100 to optimize all queries.") + int getIndexOptimizerLimit(); + + @Description("Read optimization limit. Queries with a lower read read optimization trigger automatic index optimization. Use 100 to optimize all queries.") + void setIndexOptimizerLimit(int limit); + } diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/query/stats/QueryStatsMBeanImpl.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/query/stats/QueryStatsMBeanImpl.java index e0bfb474fb8..646488e904a 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/query/stats/QueryStatsMBeanImpl.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/query/stats/QueryStatsMBeanImpl.java @@ -36,29 +36,30 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class QueryStatsMBeanImpl extends AnnotatedStandardMBean +public class QueryStatsMBeanImpl extends AnnotatedStandardMBean implements QueryStatsMBean, QueryStatsReporter { private final Logger log = LoggerFactory.getLogger(getClass()); - private final int SLOW_QUERY_LIMIT_SCANNED = + private final int SLOW_QUERY_LIMIT_SCANNED = Integer.getInteger("oak.query.slowScanLimit", 5000); private final int MAX_STATS_DATA = Integer.getInteger("oak.query.stats", 5000); - private final int MAX_POPULAR_QUERIES = + private final int MAX_POPULAR_QUERIES = Integer.getInteger("oak.query.slowLimit", 100); private final int MAX_QUERY_SIZE = Integer.getInteger("oak.query.maxQuerySize", 2048); - private final ConcurrentSkipListMap statistics = + private final ConcurrentSkipListMap statistics = new ConcurrentSkipListMap(); private final QueryEngineSettings settings; private boolean captureStackTraces; private int evictionCount; + private int indexOptimizerLimit = 30; public QueryStatsMBeanImpl(QueryEngineSettings settings) { super(QueryStatsMBean.class); this.settings = settings; } - + @Override public TabularData getSlowQueries() { ArrayList list = new ArrayList(); @@ -76,7 +77,7 @@ public int compare(QueryStatsData o1, QueryStatsData o2) { }); return asTabularData(list); } - + @Override public TabularData getPopularQueries() { ArrayList list = new ArrayList(statistics.values()); @@ -96,7 +97,7 @@ public int compare(QueryStatsData o1, QueryStatsData o2) { public void resetStats() { statistics.clear(); } - + @Override public void setCaptureStackTraces(boolean captureStackTraces) { this.captureStackTraces = captureStackTraces; @@ -106,7 +107,17 @@ public void setCaptureStackTraces(boolean captureStackTraces) { public boolean getCaptureStackTraces() { return captureStackTraces; } - + + @Override + public int getIndexOptimizerLimit() { + return indexOptimizerLimit; + } + + @Override + public void setIndexOptimizerLimit(int limit) { + this.indexOptimizerLimit = limit; + } + @Override public String asJson() { ArrayList list = new ArrayList(statistics.values()); @@ -155,7 +166,7 @@ public QueryExecutionStats getQueryExecution(String statement, String language) private void evict() { evictionCount++; // retain 50% of the slowest entries - // of the rest, retain the newest entries + // of the rest, retain the newest entries ArrayList list = new ArrayList(statistics.values()); Collections.sort(list, new Comparator() { @Override @@ -177,11 +188,11 @@ public int compare(QueryStatsData o1, QueryStatsData o2) { statistics.remove(list.get(i).getKey()); } } - + public int getEvictionCount() { return evictionCount; } - + private TabularData asTabularData(ArrayList list) { TabularDataSupport tds = null; try { @@ -201,13 +212,13 @@ private TabularData asTabularData(ArrayList list) { return null; } } - + private static class QueryStatsCompositeTypeFactory { private final static String[] index = { "position" }; - private final static String[] names = { "position", - "maxTimeMillis", "totalTimeMillis", "executeCount", + private final static String[] names = { "position", + "maxTimeMillis", "totalTimeMillis", "executeCount", "rowsRead", "rowsScanned", "maxRowsRead", "maxRowsScanned", "language", "statement", "lastExecuted", "lastThread"}; @@ -228,11 +239,11 @@ public static CompositeType getCompositeType() throws OpenDataException { public static Object[] getValues(QueryStatsData q, int position) { return new Object[] { (long) position, - q.getMaxTimeNanos() / 1000000, q.getTotalTimeNanos() / 1000000, q.getExecuteCount(), + q.getMaxTimeNanos() / 1000000, q.getTotalTimeNanos() / 1000000, q.getExecuteCount(), q.getTotalRowsRead(), q.getTotalRowsScanned(), q.getMaxRowsRead(), q.getMaxRowsScanned(), q.getLanguage(), q.getQuery(), QueryStatsData.getTimeString(q.getLastExecutedMillis()), q.isInternal() ? "(internal query)" : q.getLastThreadName()}; } } - + } diff --git a/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/AsyncIndexUpdateTest.java b/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/AsyncIndexUpdateTest.java index 825680c09df..b77ab736ef1 100644 --- a/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/AsyncIndexUpdateTest.java +++ b/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/AsyncIndexUpdateTest.java @@ -1313,7 +1313,7 @@ public synchronized NodeState retrieve(@NotNull String checkpoint) { // merge it back in store.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY); - AsyncIndexUpdate async = new AsyncIndexUpdate("async", store, provider, statsProvider, false); + AsyncIndexUpdate async = new AsyncIndexUpdate("async", store, provider, statsProvider, false, null); runOneCycle(async); assertEquals(1, async.getIndexStats().getExecutionStats().getExecutionCounter().getCount()); @@ -1350,7 +1350,7 @@ public void executionCountUpdatesOnRunWithoutAnyChangeInRepo() throws Exception AsyncIndexUpdate async = new AsyncIndexUpdate("async", new MemoryNodeStore(), new PropertyIndexEditorProvider(), - statsProvider, false); + statsProvider, false, null); long execCnt1 = async.getIndexStats().getTotalExecutionCount(); runOneCycle(async); diff --git a/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/diff/DiffIndexTest.java b/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/diff/DiffIndexTest.java new file mode 100644 index 00000000000..2a0d8dfdf5e --- /dev/null +++ b/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/diff/DiffIndexTest.java @@ -0,0 +1,308 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.diff; + +import static org.apache.jackrabbit.oak.InitialContentHelper.INITIAL_CONTENT; +import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.INDEX_DEFINITIONS_NAME; +import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.TYPE_PROPERTY_NAME; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.mockStatic; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.jackrabbit.JcrConstants; +import org.apache.jackrabbit.oak.api.CommitFailedException; +import org.apache.jackrabbit.oak.api.Type; +import org.apache.jackrabbit.oak.commons.json.JsonObject; +import org.apache.jackrabbit.oak.plugins.index.AsyncIndexUpdate; +import org.apache.jackrabbit.oak.plugins.index.CompositeIndexEditorProvider; +import org.apache.jackrabbit.oak.plugins.index.IndexConstants; +import org.apache.jackrabbit.oak.plugins.index.IndexEditorProvider; +import org.apache.jackrabbit.oak.plugins.index.IndexUpdateProvider; +import org.apache.jackrabbit.oak.plugins.index.counter.NodeCounterEditorProvider; +import org.apache.jackrabbit.oak.plugins.index.optimizer.DiffIndexUpdater; +import org.apache.jackrabbit.oak.plugins.index.property.PropertyIndexEditorProvider; +import org.apache.jackrabbit.oak.plugins.index.reference.ReferenceEditorProvider; +import org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore; +import org.apache.jackrabbit.oak.spi.commit.CommitInfo; +import org.apache.jackrabbit.oak.spi.commit.EditorHook; +import org.apache.jackrabbit.oak.spi.state.NodeBuilder; +import org.apache.jackrabbit.oak.spi.state.NodeStore; +import org.junit.Test; +import org.mockito.MockedStatic; + +/** + * Tests for DiffIndex functionality. + */ +public class DiffIndexTest { + + @Test + public void testFindMatchingIndexName() throws IOException { + String indexJson = "{\n" + + " \"index\": {\n" + + " \"compatVersion\": 2,\n" + + " \"async\": \"async\",\n" + + " \"queryPaths\": [\"/content/dam/test\"],\n" + + " \"includedPaths\": [\"/content/dam/test\"],\n" + + " \"jcr:primaryType\": \"nam:oak:QueryIndexDefinition\",\n" + + " \"evaluatePathRestrictions\": true,\n" + + " \"type\": \"lucene\",\n" + + " \"tags\": [\"fragments\"],\n" + + " \"indexRules\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"dam:Asset\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"properties\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"title\": {\n" + + " \"name\": \"str:jcr:title\",\n" + + " \"propertyIndex\": true,\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\"\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + "}"; + + try (MockedStatic mockedStatic = mockStatic(RootIndexesListService.class)) { + NodeStore store = mock(NodeStore.class); + + String indexesJsonString; + + try (InputStream stream = getClass().getResourceAsStream("/org/apache/jackrabbit/oak/plugins/index/diff/indexes.json")) { + indexesJsonString = IOUtils.toString(stream, StandardCharsets.UTF_8); + } + + mockedStatic.when(() -> RootIndexesListService.getRootIndexDefinitions(eq(store), anyString())) + .thenReturn(JsonObject.fromJson(indexesJsonString, true)); + + Optional matchingIndexName = DiffIndexUpdater.findMatchingIndexName(store, indexJson); + + assertTrue(matchingIndexName.isPresent()); + } + } + + @Test + public void listIndexes() { + NodeStore store = new MemoryNodeStore(INITIAL_CONTENT); + JsonObject indexDefs = RootIndexesListService.getRootIndexDefinitions(store, "property"); + // expect at least one index + assertFalse(indexDefs.getChildren().isEmpty()); + } + + @Test + public void testDiffIndexUpdate() throws Exception { + // Create a memory node store + NodeStore store = new MemoryNodeStore(INITIAL_CONTENT); + + storeDiff(store, "" + + "{ \"acme.testIndex\": {\n" + + " \"async\": [ \"async\", \"nrt\" ],\n" + + " \"compatVersion\": 2,\n" + + " \"evaluatePathRestrictions\": true,\n" + + " \"includedPaths\": [ \"/content/dam\" ],\n" + + " \"jcr:primaryType\": \"oak:QueryIndexDefinition\",\n" + + " \"queryPaths\": [ \"/content/dam\" ],\n" + + " \"selectionPolicy\": \"tag\",\n" + + " \"tags\": [ \"abc\" ],\n" + + " \"type\": \"lucene\",\n" + + " \"indexRules\": {\n" + + " \"jcr:primaryType\": \"nt:unstructured\",\n" + + " \"dam:Asset\": {\n" + + " \"jcr:primaryType\": \"nt:unstructured\",\n" + + " \"properties\": {\n" + + " \"jcr:primaryType\": \"nt:unstructured\",\n" + + " \"created\": {\n" + + " \"jcr:primaryType\": \"nt:unstructured\",\n" + + " \"name\": \"str:jcr:created\",\n" + + " \"ordered\": true,\n" + + " \"propertyIndex\": true,\n" + + " \"type\": \"Date\"\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " } }"); + + JsonObject repositoryDefinitions = RootIndexesListService.getRootIndexDefinitions(store, "lucene"); + assertSameJson("{\n" + + " \"/oak:index/acme.testIndex-1-custom-1\": {\n" + + " \"compatVersion\": 2,\n" + + " \"async\": [\"async\", \"nrt\"],\n" + + " \"mergeChecksum\": \"34e7f7f0eb480ea781317b56134bc85fc59ed97031d95f518fdcff230aec28a2\",\n" + + " \"mergeInfo\": \"This index was auto-merged. See also https://thomasmueller.github.io/oakTools/simplified.html\",\n" + + " \"selectionPolicy\": \"tag\",\n" + + " \"queryPaths\": [\"/content/dam\"],\n" + + " \"includedPaths\": [\"/content/dam\"],\n" + + " \"jcr:primaryType\": \"nam:oak:QueryIndexDefinition\",\n" + + " \"evaluatePathRestrictions\": true,\n" + + " \"type\": \"lucene\",\n" + + " \"tags\": [\"abc\"],\n" + + " \"merges\": [\"/oak:index/acme.testIndex\"],\n" + + " \"indexRules\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"dam:Asset\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"properties\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"created\": {\n" + + " \"ordered\": true,\n" + + " \"name\": \"str:jcr:created\",\n" + + " \"propertyIndex\": true,\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"type\": \"Date\"\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + "}", repositoryDefinitions.toString()); + + storeDiff(store, "" + + "{ \"acme.testIndex\": {\n" + + " \"async\": [ \"async\", \"nrt\" ],\n" + + " \"compatVersion\": 2,\n" + + " \"evaluatePathRestrictions\": true,\n" + + " \"includedPaths\": [ \"/content/dam\" ],\n" + + " \"jcr:primaryType\": \"oak:QueryIndexDefinition\",\n" + + " \"queryPaths\": [ \"/content/dam\" ],\n" + + " \"selectionPolicy\": \"tag\",\n" + + " \"tags\": [ \"abc\" ],\n" + + " \"type\": \"lucene\",\n" + + " \"indexRules\": {\n" + + " \"jcr:primaryType\": \"nt:unstructured\",\n" + + " \"dam:Asset\": {\n" + + " \"jcr:primaryType\": \"nt:unstructured\",\n" + + " \"properties\": {\n" + + " \"jcr:primaryType\": \"nt:unstructured\",\n" + + " \"created\": {\n" + + " \"jcr:primaryType\": \"nt:unstructured\",\n" + + " \"name\": \"str:jcr:created\",\n" + + " \"propertyIndex\": true\n" + + " },\n" + + " \"modified\": {\n" + + " \"jcr:primaryType\": \"nt:unstructured\",\n" + + " \"name\": \"str:jcr:modified\",\n" + + " \"propertyIndex\": true\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " } }"); + + repositoryDefinitions = RootIndexesListService.getRootIndexDefinitions(store, "lucene"); + assertSameJson("{\n" + + " \"/oak:index/acme.testIndex-1-custom-2\": {\n" + + " \"compatVersion\": 2,\n" + + " \"async\": [\"async\", \"nrt\"],\n" + + " \"mergeChecksum\": \"41df9c87e4d4fca446aed3f55e6d188304a2cb49bae442b75403dc23a89b266f\",\n" + + " \"mergeInfo\": \"This index was auto-merged. See also https://thomasmueller.github.io/oakTools/simplified.html\",\n" + + " \"selectionPolicy\": \"tag\",\n" + + " \"queryPaths\": [\"/content/dam\"],\n" + + " \"includedPaths\": [\"/content/dam\"],\n" + + " \"jcr:primaryType\": \"nam:oak:QueryIndexDefinition\",\n" + + " \"evaluatePathRestrictions\": true,\n" + + " \"type\": \"lucene\",\n" + + " \"tags\": [\"abc\"],\n" + + " \"merges\": [\"/oak:index/acme.testIndex\"],\n" + + " \"indexRules\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"dam:Asset\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"properties\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"created\": {\n" + + " \"name\": \"str:jcr:created\",\n" + + " \"propertyIndex\": true,\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\"\n" + + " },\n" + + " \"modified\": {\n" + + " \"name\": \"str:jcr:modified\",\n" + + " \"propertyIndex\": true,\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\"\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + "}", repositoryDefinitions.toString()); + + storeDiff(store, "" + + "{}"); + + repositoryDefinitions = RootIndexesListService.getRootIndexDefinitions(store, "lucene"); + assertSameJson("{}", repositoryDefinitions.toString()); + } + + private void assertSameJson(String a, String b) { + JsonObject ja = JsonObject.fromJson(a, true); + JsonObject jb = JsonObject.fromJson(b, true); + if (!DiffIndexMerger.isSameIgnorePropertyOrder(ja, jb)) { + assertEquals(a, b); + } + } + + private void storeDiff(NodeStore store, String json) throws CommitFailedException { + // Get the root builder + NodeBuilder builder = store.getRoot().builder(); + + List indexEditors = List.of( + new ReferenceEditorProvider(), new PropertyIndexEditorProvider(), new NodeCounterEditorProvider()); + IndexEditorProvider provider = CompositeIndexEditorProvider.compose(indexEditors); + EditorHook hook = new EditorHook(new IndexUpdateProvider(provider)); + + // Create the index definition at /oak:index/diff.index + NodeBuilder indexDefs = builder.child(INDEX_DEFINITIONS_NAME); + NodeBuilder diffIndex = indexDefs.child("diff.index"); + + // Set index properties + diffIndex.setProperty("jcr:primaryType", IndexConstants.INDEX_DEFINITIONS_NODE_TYPE, Type.NAME); + diffIndex.setProperty(TYPE_PROPERTY_NAME, "disabled"); + + // Create the diff.json child node with primary type nt:file + NodeBuilder diffJson = diffIndex.child("diff.json"); + diffJson.setProperty(JcrConstants.JCR_PRIMARYTYPE, JcrConstants.NT_FILE, Type.NAME); + + // Create jcr:content child node (required for nt:file) with empty text + NodeBuilder content = diffJson.child(JcrConstants.JCR_CONTENT); + content.setProperty(JcrConstants.JCR_PRIMARYTYPE, JcrConstants.NT_RESOURCE, Type.NAME); + + content.setProperty("jcr:data", json); + + // Merge changes to the store + store.merge(builder, hook, CommitInfo.EMPTY); + + // Run async indexing explicitly + for (int i = 0; i < 5; i++) { + try (AsyncIndexUpdate async = new AsyncIndexUpdate("async", store, provider)) { + async.run(); + } + } + } +} + diff --git a/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/optimizer/FunctionNameConverterTest.java b/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/optimizer/FunctionNameConverterTest.java new file mode 100644 index 00000000000..5a5ffd357e7 --- /dev/null +++ b/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/optimizer/FunctionNameConverterTest.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.optimizer; + +import org.junit.Test; + +public class FunctionNameConverterTest { + @Test + public void testFormatNameSQL2() { + checkConvert("function*upper*@data", "upperData", false); + checkConvert("function*lower*@test/data", "lowerData", false); + checkConvert("function*lower*@:name", "lowerName", false); + checkConvert("function*lower*@:localname", "lowerLocalname", false); + checkConvert("function*length*@test/data", "lengthData", false); + checkConvert("function*length*@:name", "lengthName", false); + checkConvert("function*@:path", "path", false); + checkConvert("function*length*@:path", "lengthPath", false); + checkConvert("function*lower*upper*@test/data", "lowerUpperData", false); + checkConvert("function*coalesce*@jcr:content/foo2*@jcr:content/foo", "coalesceFoo2Foo", false); + checkConvert("function*coalesce*@jcr:content/foo2*lower*@jcr:content/foo", + "coalesceFoo2LowerFoo", false); + checkConvert("function*coalesce*@jcr:content/foo2*coalesce*@jcr:content/foo*lower*@:name", + "coalesceFoo2CoalesceFooLowerName", false); + checkConvert( + "function*coalesce*coalesce*@jcr:content/foo2*@jcr:content/foo*coalesce*@a:b*@c:d", + "coalesceCoalesceFoo2FooCoalesceBD", false); + checkConvert("function*first*@jcr:content/foo2", "firstFoo2", false); + } + + @Test + public void testFormatNameXPath() { + checkConvert("function*upper*@data", "upperCaseData", true); + checkConvert("function*lower*@test/data", "lowerCaseData", true); + checkConvert("function*lower*@:name", "lowerCaseName", true); + checkConvert("function*lower*@:localname", "lowerCaseLocalname", true); + checkConvert("function*length*@test/data", "stringLengthData", true); + checkConvert("function*length*@:name", "stringLengthName", true); + checkConvert("function*@:path", "path", true); + checkConvert("function*length*@:path", "stringLengthPath", true); + checkConvert("function*lower*upper*@test/data", "lowerCaseUpperCaseData", true); + checkConvert("function*coalesce*@jcr:content/foo2*@jcr:content/foo", "coalesceFoo2Foo", true); + checkConvert("function*coalesce*@jcr:content/foo2*lower*@jcr:content/foo", + "coalesceFoo2LowerCaseFoo", true); + checkConvert("function*coalesce*@jcr:content/foo2*coalesce*@jcr:content/foo*lower*@:name", + "coalesceFoo2CoalesceFooLowerCaseName", true); + checkConvert( + "function*coalesce*coalesce*@jcr:content/foo2*@jcr:content/foo*coalesce*@a:b*@c:d", + "coalesceCoalesceFoo2FooCoalesceBD", true); + checkConvert("function*first*@jcr:content/foo2", "firstFoo2", true); + } + + + private static void checkConvert(String input, String expected, boolean isXPath) { + String actual = FunctionNameConverter.apply(input, isXPath); + assert expected.equals(actual); + } +} diff --git a/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/optimizer/IndexDefinitionGeneratorTest.java b/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/optimizer/IndexDefinitionGeneratorTest.java new file mode 100644 index 00000000000..a703bddf4a3 --- /dev/null +++ b/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/optimizer/IndexDefinitionGeneratorTest.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.optimizer; + +import static org.junit.Assert.assertEquals; + +import org.junit.Test; + +public class IndexDefinitionGeneratorTest { + + @Test + public void test() { + String def = IndexDefinitionGenerator.generateIndexDefinition("xpath", "/jcr:root/content//element(*, acme:test)[@test=1] option (index tag testTag)"); + assertEquals("{\n" + + " \"index\": {\n" + + " \"compatVersion\": 2,\n" + + " \"async\": \"async\",\n" + + " \"queryPaths\": [\"/content\"],\n" + + " \"includedPaths\": [\"/content\"],\n" + + " \"jcr:primaryType\": \"nam:oak:QueryIndexDefinition\",\n" + + " \"evaluatePathRestrictions\": true,\n" + + " \"type\": \"lucene\",\n" + + " \"tags\": [\"testTag\"],\n" + + " \"indexRules\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"acme:test\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"properties\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"test\": {\n" + + " \"name\": \"test\",\n" + + " \"propertyIndex\": true,\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\"\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + "}", def); + } +} diff --git a/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/optimizer/PolishToQueryConverterTest.java b/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/optimizer/PolishToQueryConverterTest.java new file mode 100644 index 00000000000..fcf8e4f75b1 --- /dev/null +++ b/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/optimizer/PolishToQueryConverterTest.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.optimizer; + +import static org.junit.Assert.assertEquals; + +import org.junit.Test; + +public class PolishToQueryConverterTest { + + // taken from: https://github.com/apache/jackrabbit-oak/blob/trunk/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/search/util/FunctionIndexProcessorTest.java + @Test + public void testXPath() { + checkConvert( + "function*upper*@data", + "fn:upper-case(@data)", true); + checkConvert( + "function*lower*@test/data", + "fn:lower-case(test/@data)", true); + checkConvert( + "function*lower*@:name", + "fn:lower-case(fn:name())", true); + checkConvert( + "function*lower*@:localname", + "fn:lower-case(fn:local-name())", true); + checkConvert( + "function*length*@test/data", + "fn:string-length(test/@data)", true); + checkConvert( + "function*length*@:name", + "fn:string-length(fn:name())", true); + checkConvert( + "function*@:path", + "fn:path()", true); + checkConvert( + "function*length*@:path", + "fn:string-length(fn:path())", true); + checkConvert( + "function*length*@:path", + "fn:string-length(fn:path())", true); + checkConvert( + "function*lower*upper*@test/data", + "fn:lower-case(fn:upper-case(test/@data))", true); + checkConvert( + "function*coalesce*@jcr:content/foo2*@jcr:content/foo", + "fn:coalesce(jcr:content/@foo2,jcr:content/@foo)", true); + checkConvert("function*coalesce*@jcr:content/foo2*lower*@jcr:content/foo", + "fn:coalesce(jcr:content/@foo2,fn:lower-case(jcr:content/@foo))", true); + checkConvert("function*coalesce*@jcr:content/foo2*coalesce*@jcr:content/foo*lower*@:name", + "fn:coalesce(jcr:content/@foo2,fn:coalesce(jcr:content/@foo,fn:lower-case(fn:name())))", + true); + checkConvert( + "function*coalesce*coalesce*@jcr:content/foo2*@jcr:content/foo*coalesce*@a:b*@c:d", + "fn:coalesce(fn:coalesce(jcr:content/@foo2,jcr:content/@foo),fn:coalesce(@a:b,@c:d))", + true); + checkConvert("function*first*@jcr:content/foo2", + "jcr:first(jcr:content/@foo2)", true); + } + + @Test + public void testSql2() { + checkConvert( + "function*upper*@data", + "upper([data])", false); + checkConvert( + "function*lower*@test/data", + "lower([test/data])", false); + checkConvert( + "function*lower*@:name", + "lower(name())", false); + checkConvert( + "function*lower*@:localname", + "lower(localname())", false); + checkConvert( + "function*length*@test/data", + "length([test/data])", false); + checkConvert( + "function*length*@:name", + "length(name())", false); + checkConvert( + "function*@:path", + "path()", false); + checkConvert( + "function*length*@:path", + "length(path())", false); + checkConvert( + "function*length*@:path", + "length(path())", false); + checkConvert( + "function*lower*upper*@test/data", + "lower(upper([test/data]))", false); + // the ']' character is escaped as ']]' + checkConvert( + "function*@strange[0]", + "[strange[0]]]", false); + checkConvert("function*coalesce*@jcr:content/foo2*@jcr:content/foo", + "coalesce([jcr:content/foo2],[jcr:content/foo])", false); + checkConvert("function*coalesce*@jcr:content/foo2*lower*@jcr:content/foo", + "coalesce([jcr:content/foo2],lower([jcr:content/foo]))", false); + checkConvert("function*coalesce*@jcr:content/foo2*coalesce*@jcr:content/foo*lower*@:name", + "coalesce([jcr:content/foo2],coalesce([jcr:content/foo],lower(name())))", false); + checkConvert( + "function*coalesce*coalesce*@jcr:content/foo2*@jcr:content/foo*coalesce*@a:b*@c:d", + "coalesce(coalesce([jcr:content/foo2],[jcr:content/foo]),coalesce([a:b],[c:d]))", + false); + checkConvert("function*first*@jcr:content/foo2", + "first([jcr:content/foo2])", false); + } + + private static void checkConvert(String input, String expected, boolean isXPath) { + String res = PolishToQueryConverter.apply(input, isXPath); + assertEquals(expected, res); + } +} diff --git a/oak-core/src/test/resources/org/apache/jackrabbit/oak/plugins/index/diff/indexes.json b/oak-core/src/test/resources/org/apache/jackrabbit/oak/plugins/index/diff/indexes.json new file mode 100644 index 00000000000..a5a16d0fb72 --- /dev/null +++ b/oak-core/src/test/resources/org/apache/jackrabbit/oak/plugins/index/diff/indexes.json @@ -0,0 +1,187 @@ +{ + "/oak:index/ntFolder": { + "jcr:primaryType": "nam:oak:QueryIndexDefinition", + "includedPaths": [ + "/content/test" + ], + "tags": [ + "testTag1", + "testTag2" + ], + "type": "lucene", + "async": [ + "async", + "nrt" + ], + "indexRules": { + "jcr:primaryType": "nam:nt:unstructured", + "nt:folder": { + "jcr:primaryType": "nam:nt:unstructured", + "properties": { + "jcr:primaryType": "nam:nt:unstructured", + "jcrTitle": { + "jcr:primaryType": "nam:nt:unstructured", + "nodeScopeIndex": true, + "useInSuggest": true, + "useInSpellcheck": true, + "name": "str:jcr:content/jcr:title" + } + } + } + } + }, + "/oak:index/share": { + "jcr:primaryType": "nam:oak:QueryIndexDefinition", + "selectionPolicy": "tag", + "includedPaths": [ + "/var/share" + ], + "tags": [ + "share" + ], + "type": "lucene", + "async": [ + "async", + "nrt" + ], + "indexRules": { + "jcr:primaryType": "nam:nt:unstructured", + "nt:unstructured": { + "jcr:primaryType": "nam:nt:unstructured" + } + } + }, + "/oak:index/versionStoreIndex": { + "jcr:primaryType": "nam:oak:QueryIndexDefinition", + "includedPaths": [ + "/jcr:system/jcr:versionStorage" + ], + "type": "lucene", + "async": [ + "async", + "sync" + ], + "indexRules": { + "jcr:primaryType": "nam:nt:unstructured", + "nt:version": { + "jcr:primaryType": "nam:nt:unstructured" + }, + "nt:frozenNode": { + "jcr:primaryType": "nam:nt:unstructured" + }, + "nt:base": { + "jcr:primaryType": "nam:nt:unstructured" + } + } + }, + "/oak:index/authorizables": { + "jcr:primaryType": "nam:oak:QueryIndexDefinition", + ":version": 2, + "type": "lucene", + "async": [ + "async", + "nrt" + ], + "excludedPaths": [ + "/var", + "/jcr:system" + ], + "indexRules": { + "jcr:primaryType": "nam:nt:unstructured", + "rep:Authorizable": { + "jcr:primaryType": "nam:nt:unstructured", + "properties": { + "jcr:primaryType": "nam:nt:unstructured" + } + } + } + }, + "/oak:index/internalVerificationLucene": { + "jcr:primaryType": "nam:oak:QueryIndexDefinition", + ":version": 2, + "includedPaths": [ + "/tmp" + ], + "type": "lucene", + "async": [ + "async" + ], + "indexRules": { + "jcr:primaryType": "nam:nt:unstructured", + "nt:base": { + "jcr:primaryType": "nam:nt:unstructured", + "properties": { + "jcr:primaryType": "nam:nt:unstructured", + "verification": { + "jcr:primaryType": "nam:nt:unstructured", + "propertyIndex": true, + "name": "verification", + "type": "String" + } + } + } + } + }, + "/oak:index/ntBaseLucene-2": { + "jcr:primaryType": "nam:oak:QueryIndexDefinition", + "type": "lucene", + "async": [ + "async", + "nrt" + ], + "evaluatePathRestrictions": true, + "excludedPaths": [ + "/oak:index" + ], + "indexRules": { + "jcr:primaryType": "nam:nt:unstructured", + "nt:base": { + "jcr:primaryType": "nam:nt:unstructured" + } + } + }, + "/oak:index/fragments": { + "jcr:primaryType": "nam:oak:QueryIndexDefinition", + "selectionPolicy": "tag", + "includedPaths": [ + "/content/dam", + "/content/launches" + ], + "tags": [ + "fragments" + ], + "type": "lucene", + "async": [ + "async", + "nrt" + ], + "indexRules": { + "jcr:primaryType": "nam:nt:unstructured", + "dam:Asset": { + "jcr:primaryType": "nam:nt:unstructured", + "properties": { + "jcr:primaryType": "nam:nt:unstructured" + } + } + } + }, + "/oak:index/assetLucene": { + "jcr:primaryType": "nam:oak:QueryIndexDefinition", + "includedPaths": [ + "/content/dam", + "/content/assets" + ], + "tags": [], + "type": "lucene", + "async": [ + "async", + "nrt" + ], + "indexRules": { + "jcr:primaryType": "nam:nt:unstructured", + "dam:Asset": { + "jcr:primaryType": "nam:nt:unstructured" + } + } + } +} diff --git a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndex.java b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndex.java index f8beab7a3ba..0d47a4d148b 100644 --- a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndex.java +++ b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndex.java @@ -118,6 +118,8 @@ public List getPlans(Filter filter, List sortOrder, NodeS .collectIndexNodePaths(filter); if (filterReplacedIndexes()) { indexPaths = IndexName.filterReplacedIndexes(indexPaths, rootState, runIsActiveIndexCheck()); + } else { + indexPaths = IndexName.filterNewestIndexes(indexPaths, rootState); } List plans = new ArrayList<>(indexPaths.size()); for (String path : indexPaths) {