Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions conformance/src/test/java/dev/cel/conformance/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,7 @@ _TESTS_TO_SKIP_LEGACY = [

# Skip until fixed.
"fields/qualified_identifier_resolution/map_value_repeat_key_heterogeneous",
# TODO: Add strings.format and strings.quote.
"string_ext/quote",
# TODO: Add strings.format.
"string_ext/format",
"string_ext/format_errors",

Expand Down Expand Up @@ -148,8 +147,7 @@ _TESTS_TO_SKIP_LEGACY = [
]

_TESTS_TO_SKIP_PLANNER = [
# TODO: Add strings.format and strings.quote.
"string_ext/quote",
# TODO: Add strings.format.
"string_ext/format",
"string_ext/format_errors",

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,17 @@ public enum Function {
SimpleType.STRING,
SimpleType.STRING)),
CelFunctionBinding.from("string_lower_ascii", String.class, Ascii::toLowerCase)),
QUOTE(
CelFunctionDecl.newFunctionDeclaration(
"strings.quote",
CelOverloadDecl.newGlobalOverload(
"strings_quote",
"Takes the given string and makes it safe to print (without any formatting"
+ " due to escape sequences). If any invalid UTF-8 characters are"
+ " encountered, they are replaced with \\uFFFD.",
SimpleType.STRING,
ImmutableList.of(SimpleType.STRING))),
CelFunctionBinding.from("strings_quote", String.class, CelStringExtensions::quote)),
REPLACE(
CelFunctionDecl.newFunctionDeclaration(
"replace",
Expand Down Expand Up @@ -164,6 +175,16 @@ public enum Function {
"string_replace_string_string_int",
ImmutableList.of(String.class, String.class, String.class, Long.class),
CelStringExtensions::replace)),
REVERSE(
CelFunctionDecl.newFunctionDeclaration(
"reverse",
CelOverloadDecl.newMemberOverload(
"string_reverse",
"Returns a new string whose characters are the same as the target string,"
+ " only formatted in reverse order.",
SimpleType.STRING,
SimpleType.STRING)),
CelFunctionBinding.from("string_reverse", String.class, CelStringExtensions::reverse)),
SPLIT(
CelFunctionDecl.newFunctionDeclaration(
"split",
Expand Down Expand Up @@ -449,6 +470,67 @@ private static Long lastIndexOf(CelCodePointArray str, CelCodePointArray substr,
return -1L;
}

private static String quote(String s) {
StringBuilder sb = new StringBuilder(s.length() + 2);
sb.append('"');
for (int i = 0; i < s.length(); ) {
int codePoint = s.codePointAt(i);
if (isMalformedUtf16(s, i, codePoint)) {
sb.append('\uFFFD');
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add a test for the replacement behavior here? Something similar to cel-go's test here should work: https://github.com/google/cel-go/blob/7114ed27a63255f33c689fbff0ee8a08298f70ab/ext/strings_test.go#L623

We should also add tests for low/high surrogate boundaries:

// Exact boundary at i + 1 >= s.length()
end with high surrogate \uD83D -> "end with high surrogate \uFFFD"

// \uD83D is a high surrogate, followed immediately by 'A' instead of a low surrogate
bad pair \uD83DA -> "bad pair \uFFFDA"

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added tests for low/high boundaries. The test for cel-go isn't 100% matching what is possible in Go as converting from bytes to a String already replaces invalid bytes w/ replacement characters. I can remove the quote_escapesMalformed if it isn't of high value to keep it.

i++;
continue;
}
switch (codePoint) {
case '\u0007':
sb.append("\\a");
break;
case '\b':
sb.append("\\b");
break;
case '\f':
sb.append("\\f");
break;
case '\n':
sb.append("\\n");
break;
case '\r':
sb.append("\\r");
break;
case '\t':
sb.append("\\t");
break;
case '\u000B':
sb.append("\\v");
break;
case '\\':
sb.append("\\\\");
break;
case '"':
sb.append("\\\"");
break;
default:
sb.appendCodePoint(codePoint);
break;
}
i += Character.charCount(codePoint);
}
sb.append('"');
return sb.toString();
}

private static boolean isMalformedUtf16(String s, int index, int codePoint) {
char currentChar = s.charAt(index);
if (!Character.isValidCodePoint(codePoint)) {
return true;
}
if (Character.isLowSurrogate(currentChar)) {
return true;
}
// Check for unpaired high surrogate
return Character.isHighSurrogate(currentChar)
&& (index + 1 >= s.length() || !Character.isLowSurrogate(s.charAt(index + 1)));
}

private static String replaceAll(Object[] objects) {
return replace((String) objects[0], (String) objects[1], (String) objects[2], -1);
}
Expand Down Expand Up @@ -504,6 +586,10 @@ private static String replace(String text, String searchString, String replaceme
return sb.append(textCpa.slice(start, textCpa.length())).toString();
}

private static String reverse(String s) {
return new StringBuilder(s).reverse().toString();
}

private static List<String> split(String str, String separator) {
return split(str, separator, Integer.MAX_VALUE);
}
Expand Down
27 changes: 27 additions & 0 deletions extensions/src/main/java/dev/cel/extensions/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,19 @@ Examples:
'TacoCat'.lowerAscii() // returns 'tacocat'
'TacoCÆt Xii'.lowerAscii() // returns 'tacocÆt xii'

### Quote

Takes the given string and makes it safe to print (without any formatting due
to escape sequences).
If any invalid UTF-8 characters are encountered, they are replaced with \uFFFD.

strings.quote(<string>)

Examples:

strings.quote('single-quote with "double quote"') // returns '"single-quote with \"double quote\""'
strings.quote("two escape sequences \a\n") // returns '"two escape sequences \\a\\n"'

### Replace

Returns a new string based on the target, which replaces the occurrences of a
Expand All @@ -493,6 +506,20 @@ Examples:
'hello hello'.replace('he', 'we', 1) // returns 'wello hello'
'hello hello'.replace('he', 'we', 0) // returns 'hello hello'

### Reverse

Returns a new string whose characters are the same as the target string, only
formatted in reverse order.
This function relies on converting strings to Unicode code point arrays in
order to reverse.

<string>.reverse() -> <string>

Examples:

'gums'.reverse() // returns 'smug'
'John Smith'.reverse() // returns 'htimS nhoJ'

### Split

Returns a mutable list of strings split from the input by the given separator. The
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ public void getAllFunctionNames() {
"join",
"lastIndexOf",
"lowerAscii",
"strings.quote",
"replace",
"split",
"substring",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
import dev.cel.runtime.CelEvaluationException;
import dev.cel.runtime.CelRuntime;
import dev.cel.runtime.CelRuntimeFactory;

import java.nio.charset.StandardCharsets;
import java.util.List;
import org.junit.Test;
import org.junit.runner.RunWith;
Expand Down Expand Up @@ -70,7 +72,9 @@ public void library() {
"lastIndexOf",
"lowerAscii",
"replace",
"reverse",
"split",
"strings.quote",
"substring",
"trim",
"upperAscii");
Expand Down Expand Up @@ -1467,6 +1471,99 @@ public void stringExtension_functionSubset_success() throws Exception {
assertThat(evaluatedResult).isEqualTo(true);
}

@Test
@TestParameters("{string: 'abcd', expectedResult: 'dcba'}")
@TestParameters("{string: '', expectedResult: ''}")
@TestParameters("{string: 'a', expectedResult: 'a'}")
@TestParameters("{string: 'hello world', expectedResult: 'dlrow olleh'}")
@TestParameters("{string: 'ab가cd', expectedResult: 'dc가ba'}")
public void reverse_success(String string, String expectedResult) throws Exception {
CelAbstractSyntaxTree ast = COMPILER.compile("s.reverse()").getAst();
CelRuntime.Program program = RUNTIME.createProgram(ast);

Object evaluatedResult = program.eval(ImmutableMap.of("s", string));

assertThat(evaluatedResult).isEqualTo(expectedResult);
}

@Test
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we also port the test over for invisible formatting characters?

https://github.com/google/cel-go/blob/7114ed27a63255f33c689fbff0ee8a08298f70ab/ext/strings_test.go#L111

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added in 943ae33.

@TestParameters("{string: '😁😑😦', expectedResult: '😦😑😁'}")
@TestParameters("{string: '\u180e\u200b\u200c\u200d\u2060\ufeff', expectedResult: '\ufeff\u2060\u200d\u200c\u200b\u180e'}")
public void reverse_unicode(String string, String expectedResult) throws Exception {
CelAbstractSyntaxTree ast = COMPILER.compile("s.reverse()").getAst();
CelRuntime.Program program = RUNTIME.createProgram(ast);

Object evaluatedResult = program.eval(ImmutableMap.of("s", string));

assertThat(evaluatedResult).isEqualTo(expectedResult);
}

@Test
@TestParameters("{string: 'hello', expectedResult: '\"hello\"'}")
@TestParameters("{string: '', expectedResult: '\"\"'}")
@TestParameters("{string: 'contains \\\"quotes\\\"', expectedResult: '\"contains \\\\\\\"quotes\\\\\\\"\"'}")
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added in 943ae33.

@TestParameters("{string: 'ends with \\\\', expectedResult: '\"ends with \\\\\\\\\"'}")
@TestParameters("{string: '\\\\ starts with', expectedResult: '\"\\\\\\\\ starts with\"'}")
public void quote_success(String string, String expectedResult) throws Exception {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Boundary cases for backslashes appearing at the very beginning or the end are also worth adding:

https://github.com/google/cel-go/blob/7114ed27a63255f33c689fbff0ee8a08298f70ab/ext/strings_test.go#L128-L129

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added in 943ae33.

CelAbstractSyntaxTree ast = COMPILER.compile("strings.quote(s)").getAst();
CelRuntime.Program program = RUNTIME.createProgram(ast);

Object evaluatedResult = program.eval(ImmutableMap.of("s", string));

assertThat(evaluatedResult).isEqualTo(expectedResult);
}

@Test
public void quote_singleWithDoubleQuotes() throws Exception {
CelAbstractSyntaxTree ast = COMPILER.compile(
"strings.quote('single-quote with \"double quote\"') == \"\\\"single-quote with \\\\\\\"double quote\\\\\\\"\\\"\""
).getAst();
CelRuntime.Program program = RUNTIME.createProgram(ast);

Object evaluatedResult = program.eval();

assertThat(evaluatedResult).isEqualTo(true);
}

@Test
public void quote_escapesSpecialCharacters() throws Exception {
CelAbstractSyntaxTree ast = COMPILER.compile("strings.quote(s)").getAst();
CelRuntime.Program program = RUNTIME.createProgram(ast);

Object evaluatedResult =
program.eval(
ImmutableMap.of(
"s", "\u0007bell\u000Bvtab\bback\ffeed\rret\nline\ttab\\slash 가 😁"));

assertThat(evaluatedResult)
.isEqualTo("\"\\abell\\vvtab\\bback\\ffeed\\rret\\nline\\ttab\\\\slash 가 😁\"");
}

@Test
@TestParameters({"{rawString: !!binary 'ZmlsbGVyIJ8=', expectedResult: '\"filler \uFFFD\"'}"}) // "filler \x9f"
public void quote_escapesMalformed(byte[] rawString, String expectedResult) throws Exception {
CelAbstractSyntaxTree ast = COMPILER.compile("strings.quote(s)").getAst();
CelRuntime.Program program = RUNTIME.createProgram(ast);

Object evaluatedResult = program.eval(ImmutableMap.of("s", new String(rawString, StandardCharsets.UTF_8)));

assertThat(evaluatedResult).isEqualTo(expectedResult);
}

@Test
public void quote_escapesMalformed_endWithHighSurrogate() throws Exception {
CelRuntime.Program program = RUNTIME.createProgram(COMPILER.compile("strings.quote(s)").getAst());
assertThat(program.eval(ImmutableMap.of("s", "end with high surrogate \uD83D")))
.isEqualTo("\"end with high surrogate \uFFFD\"");
}

@Test
public void quote_escapesMalformed_unpairedHighSurrogate() throws Exception {
CelRuntime.Program program = RUNTIME.createProgram(COMPILER.compile("strings.quote(s)").getAst());
assertThat(program.eval(ImmutableMap.of("s", "bad pair \uD83DA")))
.isEqualTo("\"bad pair \uFFFDA\"");
}

@Test
public void stringExtension_compileUnallowedFunction_throws() {
CelCompiler celCompiler =
Expand Down
Loading