From 2826884b50e3065a6d94d4b1e8b274c2e7e46849 Mon Sep 17 00:00:00 2001 From: Robert Munteanu Date: Mon, 22 Dec 2025 14:12:21 +0100 Subject: [PATCH 1/2] OAK-12046 - Update default Tika config --- .../oak/plugins/index/lucene/tika-config.xml | 11 ++++++++++- .../plugins/index/search/spi/editor/tika-config.xml | 11 ++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/oak-lucene/src/main/resources/org/apache/jackrabbit/oak/plugins/index/lucene/tika-config.xml b/oak-lucene/src/main/resources/org/apache/jackrabbit/oak/plugins/index/lucene/tika-config.xml index 54f229d6999..07e002c479b 100644 --- a/oak-lucene/src/main/resources/org/apache/jackrabbit/oak/plugins/index/lucene/tika-config.xml +++ b/oak-lucene/src/main/resources/org/apache/jackrabbit/oak/plugins/index/lucene/tika-config.xml @@ -24,7 +24,16 @@ - + + + + + + + + false + + application/x-archive diff --git a/oak-search/src/main/resources/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/tika-config.xml b/oak-search/src/main/resources/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/tika-config.xml index 54f229d6999..07e002c479b 100644 --- a/oak-search/src/main/resources/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/tika-config.xml +++ b/oak-search/src/main/resources/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/tika-config.xml @@ -24,7 +24,16 @@ - + + + + + + + + false + + application/x-archive From 3ad34c5351f1e3bcdd2f4d4a0b24bdeceb949cfd Mon Sep 17 00:00:00 2001 From: Robert Munteanu Date: Mon, 22 Dec 2025 16:20:26 +0100 Subject: [PATCH 2/2] OAK-12046 - Update default Tika config Adjust the class loader used for loading Tika configurations to allow configuring the PDFParser. By default Tika does not use the context class loader so we plug it in the existing abstraction. This effectively substitutes the tika-core classloader with the oak-lucene classloader, given that the FulltextBinaryTextExtractor ends up being embedded in oak-lucene. --- oak-lucene/pom.xml | 1 + .../search/spi/binary/FulltextBinaryTextExtractor.java | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/oak-lucene/pom.xml b/oak-lucene/pom.xml index b5b411cdd1d..e44169d4849 100644 --- a/oak-lucene/pom.xml +++ b/oak-lucene/pom.xml @@ -116,6 +116,7 @@ org.apache.lucene.sandbox.*;resolution:=optional, + org.apache.tika.parser.pdf;resolution:=optional, !org.apache.lucene.*, !org.apache.jackrabbit.oak.cache, !com.sun.management.*, diff --git a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/FulltextBinaryTextExtractor.java b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/FulltextBinaryTextExtractor.java index 5db229908ca..43ddb4dfc70 100644 --- a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/FulltextBinaryTextExtractor.java +++ b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/FulltextBinaryTextExtractor.java @@ -33,6 +33,7 @@ import org.apache.jackrabbit.oak.stats.StatisticsProvider; import org.apache.jackrabbit.oak.stats.StatsOptions; import org.apache.jackrabbit.oak.stats.TimerStats; +import org.apache.tika.config.ServiceLoader; import org.apache.tika.config.TikaConfig; import org.apache.tika.exception.TikaException; import org.apache.tika.exception.WriteLimitReachedException; @@ -287,7 +288,9 @@ private static TikaConfigHolder initializeTikaConfig(@Nullable IndexDefinition d String configSource = null; try { - Thread.currentThread().setContextClassLoader(FulltextIndexEditorContext.class.getClassLoader()); + ClassLoader newContextClassLoader = FulltextIndexEditorContext.class.getClassLoader(); + Thread.currentThread().setContextClassLoader(newContextClassLoader); + ServiceLoader.setContextClassLoader(newContextClassLoader); if (definition != null && definition.hasCustomTikaConfig()) { log.debug("[{}] Using custom tika config", definition.getIndexName()); configSource = "Custom config at " + definition.getIndexPath(); @@ -308,6 +311,7 @@ private static TikaConfigHolder initializeTikaConfig(@Nullable IndexDefinition d } finally { IOUtils.closeQuietly(configStream); Thread.currentThread().setContextClassLoader(current); + ServiceLoader.setContextClassLoader(null); // Tika default is null } return new TikaConfigHolder(TikaConfig.getDefaultConfig(), "Default Config"); }