diff --git a/gxflowfulltextsearch/pom.xml b/gxflowfulltextsearch/pom.xml
new file mode 100644
index 000000000..ea5c5856d
--- /dev/null
+++ b/gxflowfulltextsearch/pom.xml
@@ -0,0 +1,63 @@
+
+
+ 4.0.0
+
+
+ com.genexus
+ parent
+ ${revision}${changelist}
+
+
+ gxflowfulltextsearch
+ GXflow FullText Search
+
+
+
+ org.apache.commons
+ commons-collections4
+ ${commons.collections4.version}
+
+
+ commons-logging
+ commons-logging
+ ${commons.logging.version}
+
+
+ org.apache.pdfbox
+ pdfbox
+ ${pdfbox.version}
+
+
+ org.apache.lucene
+ lucene-core
+ ${lucene.version}
+
+
+ org.apache.poi
+ poi
+ ${poi.version}
+
+
+ org.apache.poi
+ poi-ooxml
+ ${poi.version}
+
+
+ org.apache.logging.log4j
+ log4j-core
+ ${log4j.version}
+
+
+
+
+ GXflowFullTextSearch
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 3.8.0
+
+
+
+
+
diff --git a/gxflowfulltextsearch/src/main/java/com/genexus/CA/search/AnalyzerManager.java b/gxflowfulltextsearch/src/main/java/com/genexus/CA/search/AnalyzerManager.java
new file mode 100644
index 000000000..d8df364a1
--- /dev/null
+++ b/gxflowfulltextsearch/src/main/java/com/genexus/CA/search/AnalyzerManager.java
@@ -0,0 +1,25 @@
+package com.genexus.CA.search;
+
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+
+public class AnalyzerManager {
+ private static final Map ANALYZERS = new ConcurrentHashMap<>();
+
+ static {
+ ANALYZERS.put("default", new StandardAnalyzer());
+ // In the future, when the Lucene version is updated, specific analyzers for different languages can be added here.
+ // For example, for Spanish:
+ // ANALYZERS.put("es", new org.apache.lucene.analysis.es.SpanishAnalyzer());
+ }
+
+ public static Analyzer getAnalyzer(String lang) {
+ if (lang == null || lang.trim().isEmpty()) {
+ return ANALYZERS.get("default");
+ }
+ return ANALYZERS.getOrDefault(lang, ANALYZERS.get("default"));
+ }
+}
diff --git a/gxflowfulltextsearch/src/main/java/com/genexus/CA/search/IndexManager.java b/gxflowfulltextsearch/src/main/java/com/genexus/CA/search/IndexManager.java
new file mode 100644
index 000000000..f2f26768f
--- /dev/null
+++ b/gxflowfulltextsearch/src/main/java/com/genexus/CA/search/IndexManager.java
@@ -0,0 +1,20 @@
+package com.genexus.CA.search;
+
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+public class IndexManager {
+ private static final Map INDEXERS = new ConcurrentHashMap<>();
+
+ public static void addContent(String dir, String uri, String lang, String title, String summary, byte fromFile, String body, String filePath) {
+ getIndexer(dir).addContent(uri, lang, title, summary, fromFile, body, filePath);
+ }
+
+ public static void deleteContent(String dir, String uri) {
+ getIndexer(dir).deleteContent(uri);
+ }
+
+ private static Indexer getIndexer(String dir) {
+ return INDEXERS.computeIfAbsent(dir, Indexer::new);
+ }
+}
diff --git a/gxflowfulltextsearch/src/main/java/com/genexus/CA/search/Indexer.java b/gxflowfulltextsearch/src/main/java/com/genexus/CA/search/Indexer.java
new file mode 100644
index 000000000..f4b982ac4
--- /dev/null
+++ b/gxflowfulltextsearch/src/main/java/com/genexus/CA/search/Indexer.java
@@ -0,0 +1,259 @@
+package com.genexus.CA.search;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Hits;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.pdfbox.Loader;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.text.PDFTextStripper;
+
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.apache.poi.xwpf.usermodel.XWPFParagraph;
+
+public final class Indexer {
+ private String indexDirectory = ".";
+ private static final int OPERATION_INDEX = 1;
+ private static final int OPERATION_DELETE = 2;
+
+ private static final Logger logger = LogManager.getLogger(Indexer.class);
+
+ Indexer(String directory) {
+ this.indexDirectory = normalizeIndexDirectory(directory);
+ if (!this.indexExists(this.indexDirectory)) {
+ try {
+ IndexWriter writer = new IndexWriter(this.indexDirectory, new StandardAnalyzer(), true);
+ writer.close();
+ } catch (Exception e) {
+ logger.error("Error creating index directory: {}", this.indexDirectory, e);
+ }
+ }
+
+ }
+
+ void addContent(String uri, String lang, String title, String summary, byte fromFile, String body, String filePath) {
+ Document doc = new Document();
+ StringBuilder contentBuilder = new StringBuilder();
+ boolean fileContentRead = false;
+ String normalizedUri = normalizeUri(uri);
+ String normalizedLang = normalizeLang(lang);
+
+ if (fromFile == 1 && filePath != null && !filePath.trim().isEmpty()) {
+ String lowerFilePath = filePath.toLowerCase();
+ try {
+ if (this.isDocxExtension(lowerFilePath)) {
+ try (FileInputStream file = new FileInputStream(filePath); XWPFDocument reader = new XWPFDocument(file)) {
+ for (XWPFParagraph p : reader.getParagraphs()) {
+ contentBuilder.append(p.getText()).append(" ");
+ }
+ fileContentRead = true;
+ }
+ } else if (this.isPdfExtension(lowerFilePath)) {
+ try (PDDocument document = Loader.loadPDF(new File(filePath))) {
+ PDFTextStripper tStripper = new PDFTextStripper();
+ contentBuilder.append(tStripper.getText(document));
+ fileContentRead = true;
+ }
+ } else if (this.isTxtExtension(lowerFilePath)) {
+ contentBuilder.append(readTextFile(filePath));
+ fileContentRead = true;
+ }
+ } catch (IOException e) {
+ logger.error("Error reading file content from: {}", filePath, e);
+ }
+ }
+
+ if (body != null && !body.isEmpty() && !fileContentRead) {
+ contentBuilder.append(body);
+ }
+
+ String content = contentBuilder.toString();
+
+ this.indexOperation(OPERATION_DELETE, normalizedLang, null, normalizedUri);
+
+ doc.add(new Field("uri", normalizedUri, Store.YES, Index.UN_TOKENIZED));
+ doc.add(new Field("language", normalizedLang, Store.YES, Index.UN_TOKENIZED));
+ doc.add(new Field("title", title == null ? "" : title, Store.YES, Index.TOKENIZED));
+ doc.add(new Field("summary", summary == null ? "" : summary, Store.YES, Index.TOKENIZED));
+ doc.add(new Field("content", content, Store.YES, Index.TOKENIZED));
+
+ try {
+ this.indexOperation(OPERATION_INDEX, normalizedLang, doc, null);
+ } catch (Exception e) {
+ logger.error("Error indexing content. uri={}, lang={}", normalizedUri, normalizedLang, e);
+ }
+ }
+
+ void deleteContent(String uri) {
+ try {
+ this.indexOperation(OPERATION_DELETE, null, null, normalizeUri(uri));
+ } catch (Exception e) {
+ logger.error("Error deleting content. uri={}", uri, e);
+ }
+
+ }
+
+ private synchronized void indexOperation(int op, String lang, Document doc, String uri) {
+ switch(op) {
+ case OPERATION_INDEX:
+ try {
+ IndexWriter writer = new IndexWriter(this.getIndexDirectory(), AnalyzerManager.getAnalyzer(lang), false);
+ writer.addDocument(doc);
+ // writer.optimize(); // This is a costly operation and should not be done for every document.
+ writer.close();
+ } catch (Exception e) {
+ logger.error("Error indexing document. uri={}, lang={}", uri, lang, e);
+ }
+ break;
+ case OPERATION_DELETE:
+ IndexReader reader = null;
+ try {
+ Term term = null;
+ int docId = 0;
+ if (lang == null) {
+ term = new Term("uri", uri);
+ } else {
+ docId = this.getDocumentId(uri, lang);
+ }
+
+ reader = IndexReader.open(this.getIndexDirectory());
+ if (lang == null) {
+ reader.deleteDocuments(term);
+ } else if (docId != -1) {
+ reader.deleteDocument(docId);
+ }
+
+ } catch (Exception e) {
+ logger.error("Error deleting document. uri={}, lang={}", uri, lang, e);
+ } finally {
+ if (reader != null) {
+ try {
+ reader.close();
+ } catch (IOException e) {
+ logger.error("Error closing IndexReader", e);
+ }
+ }
+ }
+ }
+
+ }
+
+ public String getIndexDirectory() {
+ return this.indexDirectory;
+ }
+
+ private String normalizeIndexDirectory(String dir) {
+ if (dir == null || dir.trim().isEmpty()) {
+ return ".";
+ }
+ return new File(dir).getAbsolutePath();
+ }
+
+ private boolean indexExists(String dir) {
+ try {
+ new IndexSearcher(dir);
+ return true;
+ } catch (IOException e) {
+ return false;
+ }
+ }
+
+ private int getDocumentId(String uri, String lang) {
+ int documentId = -1;
+
+ try {
+ Hits hits = this.getHits(uri, lang);
+ if (hits.length() > 0) {
+ documentId = hits.id(0);
+ }
+ } catch (IOException e) {
+ logger.error("Error getting document id. uri={}, lang={}", uri, lang, e);
+ }
+
+ return documentId;
+ }
+
+ private boolean isDocxExtension(String filePath) {
+ return filePath.toLowerCase().endsWith(".docx");
+ }
+
+ private Hits getHits(String uri, String lang) {
+ IndexSearcher searcher = null;
+ Hits hits = null;
+ try {
+ searcher = new IndexSearcher(this.indexDirectory);
+ BooleanQuery query = new BooleanQuery();
+ query.add(new TermQuery(new Term("uri", uri)), Occur.MUST);
+ if (lang != null && !lang.trim().isEmpty()) {
+ query.add(new TermQuery(new Term("language", lang)), Occur.MUST);
+ }
+ hits = searcher.search(query);
+ } catch (IOException e) {
+ logger.error("Error searching hits. uri={}, lang={}", uri, lang, e);
+ } finally {
+ if (searcher != null) {
+ try {
+ searcher.close();
+ } catch (IOException e) {
+ logger.error("Error closing IndexSearcher", e);
+ }
+ }
+ }
+
+ return hits;
+ }
+
+ private String normalizeUri(String uri) {
+ if (uri == null) {
+ return "";
+ }
+ return uri.trim().toLowerCase();
+ }
+
+ private String normalizeLang(String lang) {
+ if (lang == null) {
+ return "";
+ }
+ return lang.trim().toLowerCase();
+ }
+
+ private String readTextFile(String filePath) throws IOException {
+ StringBuilder builder = new StringBuilder();
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(filePath)), StandardCharsets.UTF_8))) {
+ String line;
+ while ((line = reader.readLine()) != null) {
+ builder.append(line).append(' ');
+ }
+ }
+ return builder.toString();
+ }
+
+ private boolean isPdfExtension(String filePath) {
+ return filePath.toLowerCase().endsWith(".pdf");
+ }
+
+ private boolean isTxtExtension(String filePath) {
+ String lowerFilePath = filePath.toLowerCase();
+ return lowerFilePath.endsWith(".txt") || lowerFilePath.endsWith(".html");
+ }
+}
diff --git a/gxflowfulltextsearch/src/main/java/com/genexus/CA/search/Searcher.java b/gxflowfulltextsearch/src/main/java/com/genexus/CA/search/Searcher.java
new file mode 100644
index 000000000..676627357
--- /dev/null
+++ b/gxflowfulltextsearch/src/main/java/com/genexus/CA/search/Searcher.java
@@ -0,0 +1,131 @@
+package com.genexus.CA.search;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryParser.MultiFieldQueryParser;
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Hits;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.BooleanClause.Occur;
+
+public class Searcher {
+ private static final Logger logger = LogManager.getLogger(Searcher.class);
+
+ private static String escapeXml(String text) {
+ if (text == null) {
+ return "";
+ }
+ return text.replace("&", "&")
+ .replace("<", "<")
+ .replace(">", ">")
+ .replace("\"", """)
+ .replace("'", "'");
+ }
+
+ public static String search(String dir, String lang, String query, int maxResults, int from) {
+ StringBuilder buff = new StringBuilder();
+ long startTime = System.currentTimeMillis();
+
+ if (from < 0) {
+ logger.warn("Search 'from' cannot be negative. Using 0 instead. from={}", from);
+ from = 0;
+ }
+ if (maxResults < 0) {
+ logger.warn("Search 'maxResults' cannot be negative. Using 0 instead. maxResults={}", maxResults);
+ maxResults = 0;
+ }
+
+ if (!indexExists(dir)) {
+ buff.append("");
+ return buff.toString();
+ }
+
+ IndexSearcher searcher = null;
+ try {
+ searcher = new IndexSearcher(dir);
+ String[] fields = new String[]{"title", "content", "summary"};
+ Occur[] clauses = new Occur[]{Occur.SHOULD, Occur.SHOULD, Occur.SHOULD};
+
+ Query q;
+ try {
+ q = MultiFieldQueryParser.parse(query, fields, clauses, AnalyzerManager.getAnalyzer(lang));
+ } catch (ParseException e) {
+ try {
+ String escapedQuery = QueryParser.escape(query);
+ q = MultiFieldQueryParser.parse(escapedQuery, fields, clauses, AnalyzerManager.getAnalyzer(lang));
+ logger.warn("Query had invalid syntax. Escaped version was used: {}", escapedQuery, e);
+ } catch (ParseException escapedException) {
+ logger.warn("Could not parse query, falling back to TermQuery: " + query, escapedException);
+ q = new TermQuery(new Term("content", query));
+ }
+ }
+
+ if (lang != null && !lang.trim().isEmpty() && !"IND".equalsIgnoreCase(lang)) {
+ Query q2 = new TermQuery(new Term("language", lang));
+ BooleanQuery bq = new BooleanQuery();
+ bq.add(q, Occur.MUST);
+ bq.add(q2, Occur.MUST);
+ q = bq;
+ }
+
+ Hits hits = searcher.search(q);
+ int totalHits = hits.length();
+
+ long endTime = System.currentTimeMillis();
+ String time = String.valueOf(endTime - startTime);
+
+ buff.append("");
+ buff.append("");
+
+ int end = Math.min(totalHits, from + maxResults);
+ for (int i = from; i < end; i++) {
+ buff.append("");
+ Document doc = hits.doc(i);
+ String uri = doc.getField("uri").stringValue();
+ buff.append("").append(escapeXml(uri)).append("");
+ buff.append("");
+ }
+ } catch (Exception e) {
+ logger.error("Error during search", e);
+ // Return an empty but valid XML in case of error
+ buff.setLength(0); // Clear buffer
+ buff.append("");
+ return buff.toString();
+ } finally {
+ if (searcher != null) {
+ try {
+ searcher.close();
+ } catch (Exception e) {
+ logger.error("Error closing IndexSearcher", e);
+ }
+ }
+ }
+
+ buff.append("");
+ return buff.toString();
+ }
+
+ private static boolean indexExists(String dir) {
+ IndexSearcher searcher = null;
+ try {
+ searcher = new IndexSearcher(dir);
+ return true;
+ } catch (Exception e) {
+ return false;
+ } finally {
+ if (searcher != null) {
+ try {
+ searcher.close();
+ } catch (Exception e) {
+ logger.warn("Error closing IndexSearcher during indexExists check", e);
+ }
+ }
+ }
+ }
+}
diff --git a/gxsearch/pom.xml b/gxsearch/pom.xml
index 51abb16ea..2c4ba8012 100644
--- a/gxsearch/pom.xml
+++ b/gxsearch/pom.xml
@@ -32,17 +32,17 @@
org.apache.lucene
lucene-core
- 2.2.0
+ ${lucene.version}
org.apache.lucene
lucene-highlighter
- 2.2.0
+ ${lucene.version}
org.apache.lucene
lucene-spellchecker
- 2.2.0
+ ${lucene.version}
com.github.jtidy
diff --git a/java/pom.xml b/java/pom.xml
index e3c4e5750..c897b8cfc 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -37,7 +37,7 @@
org.apache.commons
commons-collections4
- 4.1
+ ${commons.collections4.version}
org.apache.logging.log4j
@@ -110,7 +110,7 @@
org.apache.pdfbox
pdfbox
- 3.0.3
+ ${pdfbox.version}
org.jsoup
diff --git a/pom.xml b/pom.xml
index ac5559817..d996afcc0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -22,6 +22,10 @@
3.0.17
UTF-8
5.4.1
+ 3.0.3
+ 2.2.0
+ 4.1
+ 1.2
2.16.2
4.13.2
2.40.8
@@ -130,6 +134,7 @@
gamutils
gamtotp
gxcloudstorage-azureblob-latest
+ gxflowfulltextsearch