diff --git a/src/main/java/eu/europa/ted/efx/exceptions/InvalidUsageException.java b/src/main/java/eu/europa/ted/efx/exceptions/InvalidUsageException.java index d91bb818..8f62a23d 100644 --- a/src/main/java/eu/europa/ted/efx/exceptions/InvalidUsageException.java +++ b/src/main/java/eu/europa/ted/efx/exceptions/InvalidUsageException.java @@ -29,7 +29,8 @@ public enum ErrorCode { INVALID_NOTICE_SUBTYPE_RANGE_ORDER, INVALID_NOTICE_SUBTYPE_TOKEN, FIELD_NOT_WITHHOLDABLE, - TEMPLATE_ONLY_FUNCTION + TEMPLATE_ONLY_FUNCTION, + UNSUPPORTED_REGEX_CONSTRUCT } private static final String SHORTHAND_REQUIRES_CODE_OR_INDICATOR = "Indirect label reference shorthand #{%1$s}, requires a field of type 'code' or 'indicator'. Field %1$s is of type %2$s."; @@ -38,6 +39,7 @@ public enum ErrorCode { private static final String INVALID_NOTICE_SUBTYPE_TOKEN = "Invalid notice subtype token '%s'. Expected format: 'X' or 'X-Y'."; private static final String FIELD_NOT_WITHHOLDABLE = "Field '%s' is always published and cannot be withheld from publication."; private static final String TEMPLATE_ONLY_FUNCTION = "Function '%s' can only be used in templates, not in expressions or validation rules."; + private static final String UNSUPPORTED_REGEX_CONSTRUCT = "Invalid regex pattern %s at position %d: %s"; private final ErrorCode errorCode; @@ -73,4 +75,8 @@ public static InvalidUsageException fieldNotWithholdable(String fieldId) { public static InvalidUsageException templateOnlyFunction(String functionName) { return new InvalidUsageException(ErrorCode.TEMPLATE_ONLY_FUNCTION, String.format(TEMPLATE_ONLY_FUNCTION, functionName)); } + + public static InvalidUsageException unsupportedRegexConstruct(String pattern, int position, String reason) { + return new InvalidUsageException(ErrorCode.UNSUPPORTED_REGEX_CONSTRUCT, String.format(UNSUPPORTED_REGEX_CONSTRUCT, pattern, position, reason)); + } } diff --git a/src/main/java/eu/europa/ted/efx/sdk2/EfxExpressionTranslatorV2.java b/src/main/java/eu/europa/ted/efx/sdk2/EfxExpressionTranslatorV2.java index 0259e41d..335465b1 100644 --- a/src/main/java/eu/europa/ted/efx/sdk2/EfxExpressionTranslatorV2.java +++ b/src/main/java/eu/europa/ted/efx/sdk2/EfxExpressionTranslatorV2.java @@ -43,6 +43,7 @@ import eu.europa.ted.efx.exceptions.SymbolResolutionException; import eu.europa.ted.efx.exceptions.TypeMismatchException; import eu.europa.ted.efx.exceptions.ConsistencyCheckException; +import eu.europa.ted.efx.util.EfxRegexValidator; import eu.europa.ted.efx.interfaces.EfxExpressionTranslator; import eu.europa.ted.efx.interfaces.ScriptGenerator; import eu.europa.ted.efx.interfaces.SymbolResolver; @@ -637,6 +638,7 @@ private void exitSequenceDistinctCondition( @Override public void exitLikePatternCondition(LikePatternConditionContext ctx) { + EfxRegexValidator.validate(ctx.pattern.getText()); StringExpression expression = this.stack.pop(StringExpression.class); BooleanExpression condition = this.script.composePatternMatchCondition(expression, ctx.pattern.getText()); if (ctx.modifier != null && ctx.modifier.getText().equals(NOT_MODIFIER)) { @@ -2344,6 +2346,9 @@ public void exitReplaceFunction(ReplaceFunctionContext ctx) { @Override public void exitReplaceRegexFunction(ReplaceRegexFunctionContext ctx) { + if (ctx.pattern instanceof StringLiteralExpressionContext) { + EfxRegexValidator.validate(ctx.pattern.getText()); + } final StringExpression replacement = this.stack.pop(StringExpression.class); final StringExpression pattern = this.stack.pop(StringExpression.class); final StringExpression text = this.stack.pop(StringExpression.class); diff --git a/src/main/java/eu/europa/ted/efx/util/EfxRegexValidator.java b/src/main/java/eu/europa/ted/efx/util/EfxRegexValidator.java new file mode 100644 index 00000000..f5878f79 --- /dev/null +++ b/src/main/java/eu/europa/ted/efx/util/EfxRegexValidator.java @@ -0,0 +1,261 @@ +/* + * Copyright 2025 European Union + * + * Licensed under the EUPL, Version 1.2 or – as soon they will be approved by the European + * Commission – subsequent versions of the EUPL (the "Licence"); You may not use this work except in + * compliance with the Licence. You may obtain a copy of the Licence at: + * https://joinup.ec.europa.eu/software/page/eupl + * + * Unless required by applicable law or agreed to in writing, software distributed under the Licence + * is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the Licence for the specific language governing permissions and limitations under + * the Licence. + */ +package eu.europa.ted.efx.util; + +import java.util.Set; + +import eu.europa.ted.efx.exceptions.InvalidUsageException; + +/** + * Validates that a regex pattern used in an EFX {@code like} expression only uses constructs + * from the portable EFX regex subset. This subset is designed to work identically across all + * EFX target languages (XPath, Java, JavaScript, Python, C#, Swift). + * + * Allowed constructs: + * + * Literal characters + * {@code .} (any character) + * {@code *}, {@code +}, {@code ?} (quantifiers, greedy or non-greedy) + * {@code {n}}, {@code {n,}}, {@code {n,m}} (repetition, greedy or non-greedy) + * {@code [...]}, {@code [^...]} (character classes with ranges) + * {@code (...)} (grouping), {@code |} (alternation) + * {@code ^}, {@code $} (anchors) + * Escaped metacharacters: {@code \.} {@code \\} {@code \(} {@code \)} + * {@code \[} {@code \]} {@code \{} {@code \}} {@code \*} {@code \+} + * {@code \?} {@code \|} {@code \^} {@code \$} + * {@code \p{Category}}, {@code \P{Category}} — Unicode property escapes, + * e.g. {@code \p{L}} (any Unicode letter). All EFX target languages support these + * (JavaScript requires the {@code u} flag; Python requires the {@code regex} module + * instead of {@code re} — both are handled transparently by the EFX translator). + * + * Disallowed constructs (not portable): + * + * {@code \d}, {@code \D}, {@code \w}, {@code \W}, {@code \s}, {@code \S} + * — shorthand character classes have inconsistent semantics across target languages + * (ASCII in XPath/JavaScript, Unicode in Python/C#/Swift). Use explicit character + * classes instead, e.g. {@code [0-9]}, {@code [a-zA-Z0-9_]}, {@code [ \t\r\n]}. + */ +public final class EfxRegexValidator { + + private static final Set SHORTHAND_CLASSES = Set.of('d', 'D', 'w', 'W', 's', 'S'); + + private static final Set ESCAPABLE_METACHARACTERS = + Set.of('.', '\\', '(', ')', '[', ']', '{', '}', '*', '+', '?', '|', '^', '$'); + + private EfxRegexValidator() {} + + /** + * Validates that the given EFX regex pattern only uses portable constructs. + * + * @param rawPattern the raw token text of the pattern including delimiters (e.g. {@code '[0-9]+'}) + * @throws InvalidUsageException if the pattern uses unsupported constructs + */ + public static void validate(String rawPattern) { + if (rawPattern == null || rawPattern.length() < 2) { + return; + } + + String content = rawPattern.substring(1, rawPattern.length() - 1); + int groupDepth = 0; + + for (int i = 0; i < content.length(); i++) { + char c = content.charAt(i); + + if (c == '\\') { + i = validateEscape(rawPattern, content, i); + } else if (c == '[') { + i = validateCharacterClass(rawPattern, content, i); + } else if (c == '(') { + i = validateGroupOpen(rawPattern, content, i); + groupDepth++; + } else if (c == ')') { + if (groupDepth <= 0) { + throw InvalidUsageException.unsupportedRegexConstruct(rawPattern, i + 1, "unmatched closing parenthesis"); + } + groupDepth--; + } else if (c == '{') { + i = validateRepetition(rawPattern, content, i); + } + // '.', '|', '^', '$', and literal characters are allowed as-is + } + + if (groupDepth > 0) { + throw InvalidUsageException.unsupportedRegexConstruct(rawPattern, content.length(), "unclosed group — missing ')'"); + } + } + + private static int validateEscape(String rawPattern, String content, int pos) { + if (pos + 1 >= content.length()) { + throw InvalidUsageException.unsupportedRegexConstruct(rawPattern, pos + 1, "trailing backslash"); + } + + char next = content.charAt(pos + 1); + + if (ESCAPABLE_METACHARACTERS.contains(next)) { + return pos + 1; + } + + // Portable whitespace escapes — same meaning in all target languages + if (next == 't' || next == 'r' || next == 'n' || next == 'f') { + return pos + 1; + } + + // EFX quote escapes (\' and \") — these represent literal quote characters + if (next == '\'' || next == '"') { + return pos + 1; + } + + if (SHORTHAND_CLASSES.contains(next)) { + throw InvalidUsageException.unsupportedRegexConstruct(rawPattern, pos + 1, + "shorthand class '\\" + next + "' is not allowed in EFX regex — its semantics differ across target languages. " + + "Use an explicit character class instead (e.g. [0-9], [a-zA-Z0-9_], [ \\t\\r\\n])"); + } + + if (next == 'b' || next == 'B') { + throw InvalidUsageException.unsupportedRegexConstruct(rawPattern, pos + 1, + "word boundary '\\" + next + "' is not allowed in EFX regex — use '^' and '$' anchors instead"); + } + + if (next >= '1' && next <= '9') { + throw InvalidUsageException.unsupportedRegexConstruct(rawPattern, pos + 1, + "backreference '\\" + next + "' is not allowed in EFX regex"); + } + + if (next == 'p' || next == 'P') { + // Unicode property escape \p{Category} or \P{Category} — allowed + if (pos + 2 >= content.length() || content.charAt(pos + 2) != '{') { + throw InvalidUsageException.unsupportedRegexConstruct(rawPattern, pos + 1, + "Unicode property escape '\\" + next + "' must be followed by '{Category}' (e.g. \\p{L})"); + } + int closeIdx = content.indexOf('}', pos + 3); + if (closeIdx < 0) { + throw InvalidUsageException.unsupportedRegexConstruct(rawPattern, pos + 1, + "Unicode property escape '\\" + next + "{...}' is missing closing '}'"); + } + return closeIdx; + } + + if (next == '0' || next == 'x' || next == 'u') { + throw InvalidUsageException.unsupportedRegexConstruct(rawPattern, pos + 1, + "numeric character escape '\\" + next + "' is not allowed in EFX regex — use the literal character instead"); + } + + throw InvalidUsageException.unsupportedRegexConstruct(rawPattern, pos + 1, + "escape sequence '\\" + next + "' is not allowed in EFX regex"); + } + + private static int validateCharacterClass(String rawPattern, String content, int startPos) { + int i = startPos + 1; + + // Allow ^ for negation at the start + if (i < content.length() && content.charAt(i) == '^') { + i++; + } + + // Allow ] as a literal if it's the first character in the class (or after ^) + if (i < content.length() && content.charAt(i) == ']') { + i++; + } + + while (i < content.length()) { + char c = content.charAt(i); + + if (c == ']') { + return i; // end of character class + } + + if (c == '\\') { + if (i + 1 >= content.length()) { + throw InvalidUsageException.unsupportedRegexConstruct(rawPattern, i + 1, "trailing backslash in character class"); + } + char next = content.charAt(i + 1); + if (ESCAPABLE_METACHARACTERS.contains(next) + || next == 't' || next == 'r' || next == 'n' || next == 'f' + || next == '\'' || next == '"' || next == '-') { + i += 2; + continue; + } + // Validate using the same rules as outside a character class + i = validateEscape(rawPattern, content, i) + 1; + continue; + } + + // Regular characters and ranges (a-z) are allowed + i++; + } + + throw InvalidUsageException.unsupportedRegexConstruct(rawPattern, startPos + 1, "unclosed character class — missing ']'"); + } + + private static int validateGroupOpen(String rawPattern, String content, int pos) { + if (pos + 1 < content.length() && content.charAt(pos + 1) == '?') { + String message; + if (pos + 2 < content.length()) { + char modifier = content.charAt(pos + 2); + if (modifier == '=') { + message = "lookahead '(?=...)' is not allowed in EFX regex"; + } else if (modifier == '!') { + message = "negative lookahead '(?!...)' is not allowed in EFX regex"; + } else if (modifier == '<') { + message = "lookbehind '(?<...)' is not allowed in EFX regex"; + } else if (modifier == ':') { + message = "non-capturing group '(?:...)' is not allowed in EFX regex — use plain '(...)' instead"; + } else { + message = "extended group syntax '(?" + modifier + "...)' is not allowed in EFX regex"; + } + } else { + message = "extended group syntax '(?...)' is not allowed in EFX regex"; + } + throw InvalidUsageException.unsupportedRegexConstruct(rawPattern, pos + 1, message); + } + return pos; + } + + private static int validateRepetition(String rawPattern, String content, int startPos) { + int i = startPos + 1; + + // Expect at least one digit + if (i >= content.length() || !Character.isDigit(content.charAt(i))) { + // Treat { as a literal character (some regex engines allow this) + return startPos; + } + + // Parse first number + while (i < content.length() && Character.isDigit(content.charAt(i))) { + i++; + } + + if (i >= content.length()) { + throw InvalidUsageException.unsupportedRegexConstruct(rawPattern, startPos + 1, "unclosed repetition quantifier — missing '}'"); + } + + if (content.charAt(i) == '}') { + return i; + } + + if (content.charAt(i) == ',') { + i++; + // Optional second number + while (i < content.length() && Character.isDigit(content.charAt(i))) { + i++; + } + if (i >= content.length() || content.charAt(i) != '}') { + throw InvalidUsageException.unsupportedRegexConstruct(rawPattern, startPos + 1, "unclosed repetition quantifier — missing '}'"); + } + return i; + } + + throw InvalidUsageException.unsupportedRegexConstruct(rawPattern, startPos + 1, "invalid repetition quantifier"); + } +} diff --git a/src/test/java/eu/europa/ted/efx/sdk2/EfxExpressionTranslatorV2Test.java b/src/test/java/eu/europa/ted/efx/sdk2/EfxExpressionTranslatorV2Test.java index 35976fab..35b168d1 100644 --- a/src/test/java/eu/europa/ted/efx/sdk2/EfxExpressionTranslatorV2Test.java +++ b/src/test/java/eu/europa/ted/efx/sdk2/EfxExpressionTranslatorV2Test.java @@ -137,8 +137,8 @@ void testLikePatternCondition() { @Test void testLikePatternCondition_WithEscapedDot() { testExpressionTranslationWithContext( - "fn:matches(normalize-space('12.3'), '\\d+\\.\\d+')", - "BT-00-Text", "'12.3' like '\\d+\\.\\d+'"); + "fn:matches(normalize-space('12.3'), '[0-9]+\\.[0-9]+')", + "BT-00-Text", "'12.3' like '[0-9]+\\.[0-9]+'"); } @Test @@ -1951,8 +1951,23 @@ void testReplaceRegexFunction() { @Test void testReplaceRegexFunction_WithFieldReference() { testExpressionTranslation( - "replace(PathNode/TextField/normalize-space(text()), '\\s+', ' ')", - "{ND-Root} ${replace-regex(BT-00-Text, '\\s+', ' ')}"); + "replace(PathNode/TextField/normalize-space(text()), '[ \\t]+', ' ')", + "{ND-Root} ${replace-regex(BT-00-Text, '[ \\t]+', ' ')}"); + } + + @Test + void testReplaceRegexFunction_WithShorthandPattern_ThrowsError() { + assertThrows(InvalidUsageException.class, () -> + testExpressionTranslationWithContext( + "", "ND-Root", "replace-regex('hello', '\\w+', 'x')")); + } + + @Test + void testReplaceRegexFunction_WithDynamicPattern_DoesNotThrow() { + // Pattern is a field reference (non-literal) — static regex validation is skipped + testExpressionTranslation( + "replace('hello', PathNode/TextField/normalize-space(text()), 'x')", + "{ND-Root} ${replace-regex('hello', BT-00-Text, 'x')}"); } @Test diff --git a/src/test/java/eu/europa/ted/efx/util/EfxRegexValidatorTest.java b/src/test/java/eu/europa/ted/efx/util/EfxRegexValidatorTest.java new file mode 100644 index 00000000..1270499f --- /dev/null +++ b/src/test/java/eu/europa/ted/efx/util/EfxRegexValidatorTest.java @@ -0,0 +1,337 @@ +/* + * Copyright 2025 European Union + * + * Licensed under the EUPL, Version 1.2 or – as soon they will be approved by the European + * Commission – subsequent versions of the EUPL (the "Licence"); You may not use this work except in + * compliance with the Licence. You may obtain a copy of the Licence at: + * https://joinup.ec.europa.eu/software/page/eupl + * + * Unless required by applicable law or agreed to in writing, software distributed under the Licence + * is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the Licence for the specific language governing permissions and limitations under + * the Licence. + */ +package eu.europa.ted.efx.util; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; + +import eu.europa.ted.efx.exceptions.InvalidUsageException; + +class EfxRegexValidatorTest { + + @Nested + class AllowedConstructs { + + @Test + void testLiteralCharacters() { + assertDoesNotThrow(() -> EfxRegexValidator.validate("'abc123'")); + } + + @Test + void testDot() { + assertDoesNotThrow(() -> EfxRegexValidator.validate("'a.b'")); + } + + @Test + void testQuantifiers() { + assertDoesNotThrow(() -> EfxRegexValidator.validate("'a*b+c?'")); + } + + @Test + void testRepetition_Exact() { + assertDoesNotThrow(() -> EfxRegexValidator.validate("'a{3}'")); + } + + @Test + void testRepetition_AtLeast() { + assertDoesNotThrow(() -> EfxRegexValidator.validate("'a{3,}'")); + } + + @Test + void testRepetition_Range() { + assertDoesNotThrow(() -> EfxRegexValidator.validate("'a{3,5}'")); + } + + @Test + void testNonGreedyQuantifiers() { + assertDoesNotThrow(() -> EfxRegexValidator.validate("'a*?'")); + assertDoesNotThrow(() -> EfxRegexValidator.validate("'a+?'")); + assertDoesNotThrow(() -> EfxRegexValidator.validate("'a??'")); + assertDoesNotThrow(() -> EfxRegexValidator.validate("'a{2,3}?'")); + } + + @Test + void testCharacterClass() { + assertDoesNotThrow(() -> EfxRegexValidator.validate("'[abc]'")); + } + + @Test + void testCharacterClassWithRange() { + assertDoesNotThrow(() -> EfxRegexValidator.validate("'[a-z0-9]'")); + } + + @Test + void testNegatedCharacterClass() { + assertDoesNotThrow(() -> EfxRegexValidator.validate("'[^abc]'")); + } + + @Test + void testCharacterClassWithClosingBracketAsFirst() { + assertDoesNotThrow(() -> EfxRegexValidator.validate("'[]abc]'")); + } + + @Test + void testGrouping() { + assertDoesNotThrow(() -> EfxRegexValidator.validate("'(abc)'")); + } + + @Test + void testNestedGroups() { + assertDoesNotThrow(() -> EfxRegexValidator.validate("'((a)(b))'")); + } + + @Test + void testAlternation() { + assertDoesNotThrow(() -> EfxRegexValidator.validate("'a|b|c'")); + } + + @Test + void testAnchors() { + assertDoesNotThrow(() -> EfxRegexValidator.validate("'^abc$'")); + } + + @Test + void testEscapedMetacharacters() { + assertDoesNotThrow(() -> EfxRegexValidator.validate("'\\.\\\\\\(\\)\\[\\]\\{\\}\\*\\+\\?\\|\\^\\$'")); + } + + @Test + void testWhitespaceEscapes() { + assertDoesNotThrow(() -> EfxRegexValidator.validate("'[ \\t\\r\\n\\f]+'")); + } + + @Test + void testUnicodePropertyEscape() { + assertDoesNotThrow(() -> EfxRegexValidator.validate("'\\p{L}+'")); + assertDoesNotThrow(() -> EfxRegexValidator.validate("'\\P{Z}+'")); + assertDoesNotThrow(() -> EfxRegexValidator.validate("'[\\p{L}\\p{N}]+'")); + } + + @Test + void testEscapedQuotes() { + assertDoesNotThrow(() -> EfxRegexValidator.validate("'a\\'b'")); + assertDoesNotThrow(() -> EfxRegexValidator.validate("'a\\\"b'")); + } + + @Test + void testComplexPattern() { + assertDoesNotThrow(() -> EfxRegexValidator.validate("'^[a-zA-Z][0-9]{2,4}(\\.[0-9]+)?$'")); + } + + @Test + void testEmptyPattern() { + assertDoesNotThrow(() -> EfxRegexValidator.validate("''")); + } + + @Test + void testDoubleQuotedPattern() { + assertDoesNotThrow(() -> EfxRegexValidator.validate("\"[0-9]+\"")); + } + + @Test + void testLiteralBraceWhenNotQuantifier() { + assertDoesNotThrow(() -> EfxRegexValidator.validate("'{abc'")); + } + } + + @Nested + class DisallowedConstructs { + + @Test + void testShorthandClass_d() { + InvalidUsageException ex = assertThrows(InvalidUsageException.class, + () -> EfxRegexValidator.validate("'\\d'")); + assertTrue(ex.getMessage().contains("shorthand class")); + } + + @Test + void testShorthandClass_w() { + InvalidUsageException ex = assertThrows(InvalidUsageException.class, + () -> EfxRegexValidator.validate("'\\w'")); + assertTrue(ex.getMessage().contains("shorthand class")); + } + + @Test + void testShorthandClass_s() { + InvalidUsageException ex = assertThrows(InvalidUsageException.class, + () -> EfxRegexValidator.validate("'\\s'")); + assertTrue(ex.getMessage().contains("shorthand class")); + } + + @Test + void testShorthandClass_D() { + InvalidUsageException ex = assertThrows(InvalidUsageException.class, + () -> EfxRegexValidator.validate("'\\D'")); + assertTrue(ex.getMessage().contains("shorthand class")); + } + + @Test + void testShorthandClass_W() { + InvalidUsageException ex = assertThrows(InvalidUsageException.class, + () -> EfxRegexValidator.validate("'\\W'")); + assertTrue(ex.getMessage().contains("shorthand class")); + } + + @Test + void testShorthandClass_S() { + InvalidUsageException ex = assertThrows(InvalidUsageException.class, + () -> EfxRegexValidator.validate("'\\S'")); + assertTrue(ex.getMessage().contains("shorthand class")); + } + + @Test + void testShorthandClassInsideCharacterClass() { + InvalidUsageException ex = assertThrows(InvalidUsageException.class, + () -> EfxRegexValidator.validate("'[\\d\\w]'")); + assertTrue(ex.getMessage().contains("shorthand class")); + } + + @Test + void testWordBoundary() { + InvalidUsageException ex = assertThrows(InvalidUsageException.class, + () -> EfxRegexValidator.validate("'\\b'")); + assertTrue(ex.getMessage().contains("word boundary")); + } + + @Test + void testNonWordBoundary() { + InvalidUsageException ex = assertThrows(InvalidUsageException.class, + () -> EfxRegexValidator.validate("'\\B'")); + assertTrue(ex.getMessage().contains("word boundary")); + } + + @Test + void testBackreference() { + InvalidUsageException ex = assertThrows(InvalidUsageException.class, + () -> EfxRegexValidator.validate("'(a)\\1'")); + assertTrue(ex.getMessage().contains("backreference")); + } + + @Test + void testUnicodePropertyEscape_MissingBraces() { + InvalidUsageException ex = assertThrows(InvalidUsageException.class, + () -> EfxRegexValidator.validate("'\\p'")); + assertTrue(ex.getMessage().contains("\\p")); + } + + @Test + void testUnicodePropertyEscape_UpperCase_MissingBraces() { + InvalidUsageException ex = assertThrows(InvalidUsageException.class, + () -> EfxRegexValidator.validate("'\\P'")); + assertTrue(ex.getMessage().contains("\\P")); + } + + @Test + void testUnicodePropertyEscape_MissingClosingBrace() { + InvalidUsageException ex = assertThrows(InvalidUsageException.class, + () -> EfxRegexValidator.validate("'\\p{L'")); + assertTrue(ex.getMessage().contains("\\p")); + } + + @Test + void testUnicodePropertyEscape_UpperCase_MissingClosingBrace() { + InvalidUsageException ex = assertThrows(InvalidUsageException.class, + () -> EfxRegexValidator.validate("'\\P{Z'")); + assertTrue(ex.getMessage().contains("\\P")); + } + + @Test + void testNumericEscape_Hex() { + InvalidUsageException ex = assertThrows(InvalidUsageException.class, + () -> EfxRegexValidator.validate("'\\x41'")); + assertTrue(ex.getMessage().contains("numeric character")); + } + + @Test + void testNumericEscape_Unicode() { + InvalidUsageException ex = assertThrows(InvalidUsageException.class, + () -> EfxRegexValidator.validate("'\\u0041'")); + assertTrue(ex.getMessage().contains("numeric character")); + } + + @Test + void testNumericEscape_Null() { + InvalidUsageException ex = assertThrows(InvalidUsageException.class, + () -> EfxRegexValidator.validate("'\\0'")); + assertTrue(ex.getMessage().contains("numeric character")); + } + + @Test + void testLookahead() { + InvalidUsageException ex = assertThrows(InvalidUsageException.class, + () -> EfxRegexValidator.validate("'a(?=b)'")); + assertTrue(ex.getMessage().contains("lookahead")); + } + + @Test + void testNegativeLookahead() { + InvalidUsageException ex = assertThrows(InvalidUsageException.class, + () -> EfxRegexValidator.validate("'a(?!b)'")); + assertTrue(ex.getMessage().contains("negative lookahead")); + } + + @Test + void testLookbehind() { + InvalidUsageException ex = assertThrows(InvalidUsageException.class, + () -> EfxRegexValidator.validate("'(? EfxRegexValidator.validate("'(?:abc)'")); + assertTrue(ex.getMessage().contains("non-capturing group")); + } + + + @Test + void testUnsupportedEscapeSequence() { + assertThrows(InvalidUsageException.class, + () -> EfxRegexValidator.validate("'\\a'")); + } + + @Test + void testUnclosedCharacterClass() { + InvalidUsageException ex = assertThrows(InvalidUsageException.class, + () -> EfxRegexValidator.validate("'[abc'")); + assertTrue(ex.getMessage().contains("unclosed character class")); + } + + @Test + void testUnclosedGroup() { + InvalidUsageException ex = assertThrows(InvalidUsageException.class, + () -> EfxRegexValidator.validate("'(abc'")); + assertTrue(ex.getMessage().contains("unclosed group")); + } + + @Test + void testUnmatchedClosingParenthesis() { + InvalidUsageException ex = assertThrows(InvalidUsageException.class, + () -> EfxRegexValidator.validate("'abc)'")); + assertTrue(ex.getMessage().contains("unmatched closing parenthesis")); + } + + @Test + void testTrailingBackslash() { + InvalidUsageException ex = assertThrows(InvalidUsageException.class, + () -> EfxRegexValidator.validate("'abc\\'")); + assertTrue(ex.getMessage().contains("trailing backslash")); + } + } +}