Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions oak-lucene/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@
</Export-Package>
<Import-Package>
org.apache.lucene.sandbox.*;resolution:=optional,
org.apache.tika.parser.pdf;resolution:=optional, <!-- Allow configuring the PDFParser -->
!org.apache.lucene.*,
!org.apache.jackrabbit.oak.cache,
!com.sun.management.*,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,16 @@
<detector class="org.apache.tika.detect.TypeDetector"/>
</detectors>
<parsers>
<parser class="org.apache.tika.parser.DefaultParser"/>
<parser class="org.apache.tika.parser.DefaultParser">
<!-- the PDF parser is configured below -->
<parser-exclude class="org.apache.tika.parser.pdf.PDFParser"/>
</parser>
<parser class="org.apache.tika.parser.pdf.PDFParser">
<params>
<!-- Disable XFA/AcroForm extraction -->
<param name="extractAcroFormContent" type="bool">false</param>
</params>
</parser>
<parser class="org.apache.tika.parser.EmptyParser">
<!-- Disable package extraction as it's too resource-intensive -->
<mime>application/x-archive</mime>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import org.apache.jackrabbit.oak.stats.StatisticsProvider;
import org.apache.jackrabbit.oak.stats.StatsOptions;
import org.apache.jackrabbit.oak.stats.TimerStats;
import org.apache.tika.config.ServiceLoader;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
import org.apache.tika.exception.WriteLimitReachedException;
Expand Down Expand Up @@ -287,7 +288,9 @@ private static TikaConfigHolder initializeTikaConfig(@Nullable IndexDefinition d
String configSource = null;

try {
Thread.currentThread().setContextClassLoader(FulltextIndexEditorContext.class.getClassLoader());
ClassLoader newContextClassLoader = FulltextIndexEditorContext.class.getClassLoader();
Thread.currentThread().setContextClassLoader(newContextClassLoader);
ServiceLoader.setContextClassLoader(newContextClassLoader);
if (definition != null && definition.hasCustomTikaConfig()) {
log.debug("[{}] Using custom tika config", definition.getIndexName());
configSource = "Custom config at " + definition.getIndexPath();
Expand All @@ -308,6 +311,7 @@ private static TikaConfigHolder initializeTikaConfig(@Nullable IndexDefinition d
} finally {
IOUtils.closeQuietly(configStream);
Thread.currentThread().setContextClassLoader(current);
ServiceLoader.setContextClassLoader(null); // Tika default is null
}
return new TikaConfigHolder(TikaConfig.getDefaultConfig(), "Default Config");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,16 @@
<detector class="org.apache.tika.detect.TypeDetector"/>
</detectors>
<parsers>
<parser class="org.apache.tika.parser.DefaultParser"/>
<parser class="org.apache.tika.parser.DefaultParser">
<!-- the PDF parser is configured below -->
<parser-exclude class="org.apache.tika.parser.pdf.PDFParser"/>
</parser>
<parser class="org.apache.tika.parser.pdf.PDFParser">
<params>
<!-- Disable XFA/AcroForm extraction -->
<param name="extractAcroFormContent" type="bool">false</param>
</params>
</parser>
<parser class="org.apache.tika.parser.EmptyParser">
<!-- Disable package extraction as it's too resource-intensive -->
<mime>application/x-archive</mime>
Expand Down
Loading