From 4e163978d8c8351d29859549541f6d75f53b08f7 Mon Sep 17 00:00:00 2001 From: Ioannis Rosuochatzakis Date: Thu, 19 Feb 2026 00:51:55 +0100 Subject: [PATCH] TEDEFO-4911 Fix EFX-to-XPath escaping of quotes in string literals --- .../sdk1/xpath/XPathScriptGeneratorV1.java | 22 +++++++++ .../ted/efx/xpath/XPathScriptGenerator.java | 45 ++++++++++++++++++- .../sdk2/EfxExpressionTranslatorV2Test.java | 17 +++++++ 3 files changed, 82 insertions(+), 2 deletions(-) diff --git a/src/main/java/eu/europa/ted/efx/sdk1/xpath/XPathScriptGeneratorV1.java b/src/main/java/eu/europa/ted/efx/sdk1/xpath/XPathScriptGeneratorV1.java index a129ad86..03223f80 100644 --- a/src/main/java/eu/europa/ted/efx/sdk1/xpath/XPathScriptGeneratorV1.java +++ b/src/main/java/eu/europa/ted/efx/sdk1/xpath/XPathScriptGeneratorV1.java @@ -20,8 +20,10 @@ import eu.europa.ted.efx.interfaces.TranslatorOptions; import eu.europa.ted.efx.model.expressions.Expression; import eu.europa.ted.efx.model.expressions.PathExpression; +import eu.europa.ted.efx.model.expressions.scalar.BooleanExpression; import eu.europa.ted.efx.model.expressions.scalar.NumericExpression; import eu.europa.ted.efx.model.expressions.scalar.StringExpression; +import eu.europa.ted.efx.model.expressions.scalar.StringLiteral; import eu.europa.ted.efx.model.types.EfxDataType; import eu.europa.ted.efx.xpath.XPathScriptGenerator; @@ -68,6 +70,26 @@ public StringExpression composeToStringConversion(NumericExpression number) { * This function returns the list of languages used in the visualisation in the * order of preference (visualisation language followed by notice language(s)). */ + /** + * Preserved V1 behavior: pass EFX string literal through as-is without converting + * escape sequences to XPath format. + */ + @Override + public StringLiteral getStringLiteralEquivalent(String literal) { + return new StringLiteral(literal); + } + + /** + * Preserved V1 behavior: pass EFX pattern literal through as-is without converting + * escape sequences to XPath format. + */ + @Override + public BooleanExpression composePatternMatchCondition(StringExpression expression, + String pattern) { + return new BooleanExpression( + String.format("fn:matches(normalize-space(%s), %s)", expression.getScript(), pattern)); + } + @Override public PathExpression composeFieldValueReference(PathExpression fieldReference) { XPathInfo xpathInfo = XPathProcessor.parse(fieldReference.getScript()); diff --git a/src/main/java/eu/europa/ted/efx/xpath/XPathScriptGenerator.java b/src/main/java/eu/europa/ted/efx/xpath/XPathScriptGenerator.java index f1e8fbd9..09f1e964 100644 --- a/src/main/java/eu/europa/ted/efx/xpath/XPathScriptGenerator.java +++ b/src/main/java/eu/europa/ted/efx/xpath/XPathScriptGenerator.java @@ -183,7 +183,7 @@ public NumericLiteral getNumericLiteralEquivalent(String literal) { @Override public StringLiteral getStringLiteralEquivalent(String literal) { - return new StringLiteral(literal); + return new StringLiteral(efxToXPathLiteral(literal)); } @Override @@ -224,7 +224,8 @@ public BooleanExpression composeContainsCondition( public BooleanExpression composePatternMatchCondition(StringExpression expression, String pattern) { return new BooleanExpression( - String.format("fn:matches(normalize-space(%s), %s)", expression.getScript(), pattern)); + String.format("fn:matches(normalize-space(%s), %s)", expression.getScript(), + efxToXPathLiteral(pattern))); } @Override @@ -930,6 +931,46 @@ private String quoted(final String text) { return "'" + text.replaceAll("\"", "").replaceAll("'", "") + "'"; } + /** + * Converts an EFX string literal to a valid XPath string literal. + * + * EFX uses backslash escaping for quotes ({@code \'} or {@code \"}), while XPath uses + * quote doubling ({@code ''} or {@code ""}). Other backslash sequences (e.g. regex + * escapes like {@code \d}, {@code \w}) are passed through unchanged. + */ + protected static String efxToXPathLiteral(String efxLiteral) { + if (efxLiteral == null || efxLiteral.length() < 2) { + return efxLiteral; + } + + char delimiter = efxLiteral.charAt(0); + String content = efxLiteral.substring(1, efxLiteral.length() - 1); + + StringBuilder result = new StringBuilder(); + result.append(delimiter); + for (int i = 0; i < content.length(); i++) { + char c = content.charAt(i); + if (c == '\\' && i + 1 < content.length()) { + char next = content.charAt(i + 1); + if (next == '\'' || next == '"') { + if (next == delimiter) { + result.append(delimiter); + result.append(delimiter); + } else { + result.append(next); + } + i++; + } else { + result.append(c); + } + } else { + result.append(c); + } + } + result.append(delimiter); + return result.toString(); + } + private int getWeeksFromDurationLiteral(final String literal) { Matcher weeksMatcher = Pattern.compile("(?<=\\D)\\d+(?=W)").matcher(literal); return weeksMatcher.find() ? Integer.parseInt(weeksMatcher.group()) : 0; diff --git a/src/test/java/eu/europa/ted/efx/sdk2/EfxExpressionTranslatorV2Test.java b/src/test/java/eu/europa/ted/efx/sdk2/EfxExpressionTranslatorV2Test.java index 203a7d42..d47d6f2f 100644 --- a/src/test/java/eu/europa/ted/efx/sdk2/EfxExpressionTranslatorV2Test.java +++ b/src/test/java/eu/europa/ted/efx/sdk2/EfxExpressionTranslatorV2Test.java @@ -141,6 +141,18 @@ void testLikePatternCondition_WithEscapedDot() { "BT-00-Text", "'12.3' like '\\d+\\.\\d+'"); } + @Test + void testLikePatternCondition_WithEscapedSingleQuote() { + testExpressionTranslationWithContext("fn:matches(normalize-space('test'), 'a''b')", + "BT-00-Text", "'test' like 'a\\'b'"); + } + + @Test + void testLikePatternCondition_WithEscapedDoubleQuote() { + testExpressionTranslationWithContext("fn:matches(normalize-space('test'), 'a\"b')", + "BT-00-Text", "'test' like 'a\\\"b'"); + } + @Test void testLikePatternCondition_WithNot() { testExpressionTranslationWithContext("not(fn:matches(normalize-space('123'), '[0-9]*'))", @@ -271,6 +283,11 @@ void testStringComparison() { testExpressionTranslationWithContext("'aaa' < 'bbb'", "BT-00-Text", "'aaa' < 'bbb'"); } + @Test + void testStringComparison_WithEscapedSingleQuote() { + testExpressionTranslationWithContext("'a''b' = 'c''d'", "BT-00-Text", "'a\\'b' == 'c\\'d'"); + } + @Test void testDateComparison_OfTwoDateLiterals() { testExpressionTranslationWithContext("xs:date('2018-01-01Z') > xs:date('2018-01-01Z')",