From d30375d943ff0286e74dd73c58017198f4b9ee46 Mon Sep 17 00:00:00 2001 From: Harry Vennik Date: Sun, 26 Nov 2023 09:00:20 +0100 Subject: [PATCH 1/6] Add variable support to lexer --- src/Lexer.php | 53 ++++++++++++++++++++++++++++++++++++++------- tests/LexerTest.php | 2 ++ 2 files changed, 47 insertions(+), 8 deletions(-) diff --git a/src/Lexer.php b/src/Lexer.php index c98ffb6..a08d667 100644 --- a/src/Lexer.php +++ b/src/Lexer.php @@ -20,6 +20,7 @@ class Lexer const T_RBRACKET = 'rbracket'; const T_FLATTEN = 'flatten'; const T_IDENTIFIER = 'identifier'; + const T_VARIABLE = 'variable'; const T_NUMBER = 'number'; const T_QUOTED_IDENTIFIER = 'quoted_identifier'; const T_UNKNOWN = 'unknown'; @@ -46,6 +47,7 @@ class Lexer const STATE_EQ = 11; const STATE_NOT = 12; const STATE_AND = 13; + const STATE_VARIABLE = 14; /** @var array We know what token we are consuming based on each char */ private static $transitionTable = [ @@ -84,6 +86,7 @@ class Lexer ')' => self::STATE_SINGLE_CHAR, '{' => self::STATE_SINGLE_CHAR, '}' => self::STATE_SINGLE_CHAR, + '$' => self::STATE_VARIABLE, '_' => self::STATE_IDENTIFIER, 'A' => self::STATE_IDENTIFIER, 'B' => self::STATE_IDENTIFIER, @@ -223,18 +226,31 @@ public function tokenize($input) } elseif ($state === self::STATE_IDENTIFIER) { // Consume identifiers - $start = key($chars); - $buffer = ''; - do { - $buffer .= $current; - $current = next($chars); - } while ($current !== false && isset($this->validIdentifier[$current])); $tokens[] = [ 'type' => self::T_IDENTIFIER, - 'value' => $buffer, - 'pos' => $start + 'pos' => key($chars), + 'value' => $this->consumeIdentifier($chars) ]; + } elseif ($state === self::STATE_VARIABLE) { + + // Consume variable reference + $start = key($chars); + $actual = next($chars); + if (self::$transitionTable[$actual] === self::STATE_IDENTIFIER) { + $tokens[] = [ + 'type' => self::T_VARIABLE, + 'pos' => $start, + 'value' => $this->consumeIdentifier($chars) + ]; + } else { + $tokens[] = [ + 'type' => self::T_UNKNOWN, + 'pos' => $start, + 'value' => $current + ]; + } + } elseif ($state === self::STATE_WHITESPACE) { // Skip whitespace @@ -417,6 +433,27 @@ private function inside(array &$chars, $delim, $type) return ['type' => $type, 'value' => $buffer, 'pos' => $position]; } + /** + * Consumes input until any character is found that is invalid in an identifier. + * + * It is assumed the first character in the input has already been recognized as + * a valid first character for an identifier. + * + * @param &array $chars Reference to the input to be consumed + * + * @return string Returns the consumed identifier + */ + private function consumeIdentifier(array &$chars): string { + $current = current($chars); + $buffer = ''; + do { + $buffer .= $current; + $current = next($chars); + } while ($current !== false && isset($this->validIdentifier[$current])); + + return $buffer; + } + /** * Parses a JSON token or sets the token type to "unknown" on error. * diff --git a/tests/LexerTest.php b/tests/LexerTest.php index 3c0ed2b..b7d451e 100644 --- a/tests/LexerTest.php +++ b/tests/LexerTest.php @@ -35,7 +35,9 @@ public static function inputProvider(): array [',', 'comma'], ['||', 'or'], ['*', 'star'], + ['$', 'unknown'], ['foo', 'identifier'], + ['$foo', 'variable'], ['"foo"', 'quoted_identifier'], ['`true`', 'literal'], ['`false`', 'literal'], From cc5918653d5570fe4e7f28ae73b4574e71d43ef4 Mon Sep 17 00:00:00 2001 From: Harry Vennik Date: Mon, 27 Nov 2023 09:03:08 +0100 Subject: [PATCH 2/6] Add support for assignment operator to lexer --- src/Lexer.php | 3 ++- tests/LexerTest.php | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Lexer.php b/src/Lexer.php index a08d667..45539f6 100644 --- a/src/Lexer.php +++ b/src/Lexer.php @@ -32,6 +32,7 @@ class Lexer const T_LITERAL = 'literal'; const T_EOF = 'eof'; const T_COMPARATOR = 'comparator'; + const T_ASSIGN = 'assign'; const STATE_IDENTIFIER = 0; const STATE_NUMBER = 1; @@ -333,7 +334,7 @@ public function tokenize($input) } elseif ($state === self::STATE_EQ) { // Consume equals - $tokens[] = $this->matchOr($chars, '=', '=', self::T_COMPARATOR, self::T_UNKNOWN); + $tokens[] = $this->matchOr($chars, '=', '=', self::T_COMPARATOR, self::T_ASSIGN); } elseif ($state == self::STATE_AND) { diff --git a/tests/LexerTest.php b/tests/LexerTest.php index b7d451e..d506286 100644 --- a/tests/LexerTest.php +++ b/tests/LexerTest.php @@ -35,6 +35,8 @@ public static function inputProvider(): array [',', 'comma'], ['||', 'or'], ['*', 'star'], + ['=', 'assign'], + ['==', 'comparator'], ['$', 'unknown'], ['foo', 'identifier'], ['$foo', 'variable'], From e8511e4c3a1ff1d521a3741529d340e9632550ac Mon Sep 17 00:00:00 2001 From: Harry Vennik Date: Mon, 4 Dec 2023 09:45:05 +0100 Subject: [PATCH 3/6] Add support for let expression to parser --- src/Parser.php | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/src/Parser.php b/src/Parser.php index 0733f20..737fda7 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -22,6 +22,7 @@ class Parser T::T_EOF => 0, T::T_QUOTED_IDENTIFIER => 0, T::T_IDENTIFIER => 0, + T::T_VARIABLE => 0, T::T_RBRACKET => 0, T::T_RPAREN => 0, T::T_COMMA => 0, @@ -105,6 +106,11 @@ private function expr($rbp = 0) private function nud_identifier() { $token = $this->token; + + if ($token['value'] === 'let' && $this->lookahead() === T::T_VARIABLE) { + return $this->parseLetExpression(); + } + $this->next(); return ['type' => 'field', 'value' => $token['value']]; } @@ -117,6 +123,14 @@ private function nud_quoted_identifier() return ['type' => 'field', 'value' => $token['value']]; } + private function nud_variable() + { + $token = $this->token; + + $this->next(); + return $token; + } + private function nud_current() { $this->next(); @@ -461,6 +475,42 @@ private function parseMultiSelectList() return ['type' => 'multi_select_list', 'children' => $nodes]; } + private function parseLetExpression() + { + static $validVariable = [ T::T_VARIABLE => true ]; + static $validAssign = [ T::T_ASSIGN => true ]; + + $bindings = []; + + do { + $this->next($validVariable); + $variable = $this->token['value']; + + $this->next($validAssign); + $this->next(); + + $bindings[] = [ + 'type' => 'variable_binding', + 'value' => $variable, + 'children' => [ $this->expr() ] + ]; + } while ($this->token['type'] === T::T_COMMA); + + if ($this->token['type'] !== T::T_IDENTIFIER || $this->token['value'] !== 'in') { + $this->syntax("Expected ',' or 'in'"); + } + + $this->next(); + + return [ + 'type' => 'let', + 'children' => [ + [ 'type' => 'bindings', 'children' => $bindings ], + $this->expr() + ] + ]; + } + private function syntax($msg) { return new SyntaxErrorException($msg, $this->token, $this->expression); From 7b652ac85039f43c70de03d3db4d338da7c15771 Mon Sep 17 00:00:00 2001 From: Harry Vennik Date: Wed, 6 Dec 2023 18:21:53 +0100 Subject: [PATCH 4/6] Add support for let expression to interpreter and compiler --- src/TreeCompiler.php | 46 ++++++++++++++++++++++++++- src/TreeInterpreter.php | 70 +++++++++++++++++++++++++++-------------- 2 files changed, 92 insertions(+), 24 deletions(-) diff --git a/src/TreeCompiler.php b/src/TreeCompiler.php index b5f0658..9621c8a 100644 --- a/src/TreeCompiler.php +++ b/src/TreeCompiler.php @@ -26,7 +26,7 @@ public function visit(array $ast, $fnName, $expr) ->write('use JmesPath\\FnDispatcher as Fd;') ->write('use JmesPath\\Utils;') ->write('') - ->write('function %s(Ti $interpreter, $value) {', $fnName) + ->write('function %s(Ti $interpreter, $value, array $bindings = []) {', $fnName) ->indent() ->dispatch($ast) ->write('') @@ -409,6 +409,50 @@ private function visit_comparator(array $node) return $this; } + private function visit_let(array $node) { + return $this + ->write('$value = (function() use ($value, $bindings) {') + ->indent() + ->write('$newBindings = [];') + ->dispatch($node['children'][0]) + ->write('$bindings = array_merge($bindings, $newBindings);') + ->dispatch($node['children'][1]) + ->write('return $value;') + ->outdent() + ->write('})();'); + } + + private function visit_bindings(array $node) { + $value = $this->makeVar('prev'); + $this + ->write('if ($value !== null) {') + ->indent() + ->write('%s = $value;', $value); + + $first = true; + foreach ($node['children'] as $child) { + if (!$first) { + $this->write('$value = %s;', $value); + } + $first = false; + $this->dispatch($child); + } + + return $this + ->write('$value = %s;', $value) + ->outdent() + ->write('}'); + } + + private function visit_variable_binding(array $node) { + return $this->dispatch($node['children'][0]) + ->write("\$newBindings['{$node['value']}'] = \$value;"); + } + + private function visit_variable(array $node) { + return $this->write("\$value = \$bindings['{$node['value']}'];"); + } + /** @internal */ public function __call($method, $args) { diff --git a/src/TreeInterpreter.php b/src/TreeInterpreter.php index f7eea86..1d990bb 100644 --- a/src/TreeInterpreter.php +++ b/src/TreeInterpreter.php @@ -22,12 +22,13 @@ public function __construct(callable $fnDispatcher = null) /** * Visits each node in a JMESPath AST and returns the evaluated result. * - * @param array $node JMESPath AST node - * @param mixed $data Data to evaluate + * @param array $node JMESPath AST node + * @param mixed $data Data to evaluate + * @param array $bindings Predefined variable bindings (keyed by variable name) * * @return mixed */ - public function visit(array $node, $data) + public function visit(array $node, $data, array $bindings = []) { return $this->dispatch($node, $data); } @@ -38,7 +39,7 @@ public function visit(array $node, $data) * statement to avoid the cost of "double dispatch". * @return mixed */ - private function dispatch(array $node, $value) + private function dispatch(array $node, $value, array $bindings = []) { $dispatcher = $this->fnDispatcher; @@ -55,7 +56,8 @@ private function dispatch(array $node, $value) case 'subexpression': return $this->dispatch( $node['children'][1], - $this->dispatch($node['children'][0], $value) + $this->dispatch($node['children'][0], $value, $bindings), + $bindings ); case 'index': @@ -68,7 +70,7 @@ private function dispatch(array $node, $value) return isset($value[$idx]) ? $value[$idx] : null; case 'projection': - $left = $this->dispatch($node['children'][0], $value); + $left = $this->dispatch($node['children'][0], $value, $bindings); switch ($node['from']) { case 'object': if (!Utils::isObject($left)) { @@ -88,7 +90,7 @@ private function dispatch(array $node, $value) $collected = []; foreach ((array) $left as $val) { - $result = $this->dispatch($node['children'][1], $val); + $result = $this->dispatch($node['children'][1], $val, $bindings); if ($result !== null) { $collected[] = $result; } @@ -98,7 +100,7 @@ private function dispatch(array $node, $value) case 'flatten': static $skipElement = []; - $value = $this->dispatch($node['children'][0], $value); + $value = $this->dispatch($node['children'][0], $value, $bindings); if (!Utils::isArray($value)) { return null; @@ -123,26 +125,26 @@ private function dispatch(array $node, $value) return $value; case 'or': - $result = $this->dispatch($node['children'][0], $value); + $result = $this->dispatch($node['children'][0], $value, $bindings); return Utils::isTruthy($result) ? $result - : $this->dispatch($node['children'][1], $value); + : $this->dispatch($node['children'][1], $value, $bindings); case 'and': - $result = $this->dispatch($node['children'][0], $value); + $result = $this->dispatch($node['children'][0], $value, $bindings); return Utils::isTruthy($result) - ? $this->dispatch($node['children'][1], $value) + ? $this->dispatch($node['children'][1], $value, $bindings) : $result; case 'not': return !Utils::isTruthy( - $this->dispatch($node['children'][0], $value) + $this->dispatch($node['children'][0], $value, $bindings) ); case 'pipe': return $this->dispatch( $node['children'][1], - $this->dispatch($node['children'][0], $value) + $this->dispatch($node['children'][0], $value, $bindings) ); case 'multi_select_list': @@ -152,7 +154,7 @@ private function dispatch(array $node, $value) $collected = []; foreach ($node['children'] as $node) { - $collected[] = $this->dispatch($node, $value); + $collected[] = $this->dispatch($node, $value, $bindings); } return $collected; @@ -166,15 +168,16 @@ private function dispatch(array $node, $value) foreach ($node['children'] as $node) { $collected[$node['value']] = $this->dispatch( $node['children'][0], - $value + $value, + $bindings ); } return $collected; case 'comparator': - $left = $this->dispatch($node['children'][0], $value); - $right = $this->dispatch($node['children'][1], $value); + $left = $this->dispatch($node['children'][0], $value, $bindings); + $right = $this->dispatch($node['children'][1], $value, $bindings); if ($node['value'] == '==') { return Utils::isEqual($left, $right); } elseif ($node['value'] == '!=') { @@ -184,14 +187,14 @@ private function dispatch(array $node, $value) } case 'condition': - return Utils::isTruthy($this->dispatch($node['children'][0], $value)) - ? $this->dispatch($node['children'][1], $value) + return Utils::isTruthy($this->dispatch($node['children'][0], $value, $bindings)) + ? $this->dispatch($node['children'][1], $value, $bindings) : null; case 'function': $args = []; foreach ($node['children'] as $arg) { - $args[] = $this->dispatch($arg, $value); + $args[] = $this->dispatch($arg, $value, $bindings); } return $dispatcher($node['value'], $args); @@ -206,10 +209,31 @@ private function dispatch(array $node, $value) case 'expref': $apply = $node['children'][0]; - return function ($value) use ($apply) { - return $this->visit($apply, $value); + return function ($value) use ($apply, $bindings) { + return $this->visit($apply, $value, $bindings); }; + case 'let': + return $this->dispatch( + $node['children'][1], + $value, + array_merge($bindings, $this->dispatch($node['children'][0], $value, $bindings)) + ); + + case 'bindings': + $newBindings = []; + foreach ($node['children'] as $bindingNode) { + $newBindings[$bindingNode['value']] = $this->dispatch($bindingNode['children'][0], $value, $bindings); + } + return $newBindings; + + case 'variable': + if (!array_key_exists($node['value'], $bindings)) { + throw new \RuntimeException("Undefined variable: \${$node['value']}"); + } + + return $bindings[$node['value']]; + default: throw new \RuntimeException("Unknown node type: {$node['type']}"); } From 6882968b8c63d10bd84a30c1fac2c03706c37a00 Mon Sep 17 00:00:00 2001 From: Harry Vennik Date: Wed, 6 Dec 2023 18:22:43 +0100 Subject: [PATCH 5/6] Add JEP-18 compliance tests --- tests/compliance/jep18_scope.json | 287 ++++++++++++++++++++++++++++++ 1 file changed, 287 insertions(+) create mode 100644 tests/compliance/jep18_scope.json diff --git a/tests/compliance/jep18_scope.json b/tests/compliance/jep18_scope.json new file mode 100644 index 0000000..cc66681 --- /dev/null +++ b/tests/compliance/jep18_scope.json @@ -0,0 +1,287 @@ +[ + { + "given": { + "foo": { + "bar": "baz" + } + }, + "cases": [ + { + "expression": "let $foo = foo in $foo", + "result": { + "bar": "baz" + } + }, + { + "expression": "let $foo = foo.bar in $foo", + "result": "baz" + }, + { + "expression": "let $foo = foo.bar in [$foo, $foo]", + "result": [ + "baz", + "baz" + ] + }, + { + "comment": "Multiple assignments", + "expression": "let $foo = 'foo', $bar = 'bar' in [$foo, $bar]", + "result": [ + "foo", + "bar" + ] + } + ] + }, + { + "given": { + "a": "topval", + "b": [ + { + "a": "inner1" + }, + { + "a": "inner2" + } + ] + }, + "cases": [ + { + "expression": "let $a = a in b[*].[a, $a, let $a = 'shadow' in $a]", + "result": [ + [ + "inner1", + "topval", + "shadow" + ], + [ + "inner2", + "topval", + "shadow" + ] + ] + }, + { + "comment": "Bindings only visible within expression clause", + "expression": "let $a = 'top-a' in let $a = 'in-a', $b = $a in $b", + "result": "top-a" + } + ] + }, + { + "given": { + "let": { + "let": "let-val", + "in": "in-val" + } + }, + "cases": [ + { + "expression": "let $let = let in {let: let, in: $let}", + "result": { + "let": { + "let": "let-val", + "in": "in-val" + }, + "in": { + "let": "let-val", + "in": "in-val" + } + } + }, + { + "expression": "let $let = 'let' in { let: let, in: $let }", + "result": { + "let": { + "let": "let-val", + "in": "in-val" + }, + "in": "let" + } + }, + { + "expression": "let $let = 'let' in { let: 'let', in: $let }", + "result": { + "let": "let", + "in": "let" + } + } + ] + }, + { + "given": { + "foo": [ + [ + 0, + 1 + ], + [ + 2, + 3 + ], + [ + 4, + 5 + ] + ] + }, + "cases": [ + { + "comment": "Projection is stopped when bound to variable", + "expression": "let $foo = foo[*] in $foo[0]", + "result": [ + 0, + 1 + ] + } + ] + }, + { + "given": [ + { + "home_state": "WA", + "states": [ + { + "name": "WA", + "cities": [ + "Seattle", + "Bellevue", + "Olympia" + ] + }, + { + "name": "CA", + "cities": [ + "Los Angeles", + "San Francisco" + ] + }, + { + "name": "NY", + "cities": [ + "New York City", + "Albany" + ] + } + ] + }, + { + "home_state": "NY", + "states": [ + { + "name": "WA", + "cities": [ + "Seattle", + "Bellevue", + "Olympia" + ] + }, + { + "name": "CA", + "cities": [ + "Los Angeles", + "San Francisco" + ] + }, + { + "name": "NY", + "cities": [ + "New York City", + "Albany" + ] + } + ] + } + ], + "cases": [ + { + "expression": "[*].[let $home_state = home_state in states[? name == $home_state].cities[]][]", + "result": [ + [ + "Seattle", + "Bellevue", + "Olympia" + ], + [ + "New York City", + "Albany" + ] + ] + } + ] + }, + { + "given": { + "imageDetails": [ + { + "repositoryName": "org/first-repo", + "imageTags": [ + "latest", + "v1.0", + "v1.2" + ], + "imageDigest": "sha256:abcd" + }, + { + "repositoryName": "org/second-repo", + "imageTags": [ + "v2.0", + "v2.2" + ], + "imageDigest": "sha256:efgh" + } + ] + }, + "cases": [ + { + "expression": "imageDetails[].[\n let $repo = repositoryName,\n $digest = imageDigest\n in\n imageTags[].[@, $digest, $repo]\n][][]\n", + "result": [ + [ + "latest", + "sha256:abcd", + "org/first-repo" + ], + [ + "v1.0", + "sha256:abcd", + "org/first-repo" + ], + [ + "v1.2", + "sha256:abcd", + "org/first-repo" + ], + [ + "v2.0", + "sha256:efgh", + "org/second-repo" + ], + [ + "v2.2", + "sha256:efgh", + "org/second-repo" + ] + ] + } + ] + }, + { + "given": {}, + "cases": [ + { + "expression": "$noexist", + "error": "undefined-variable" + }, + { + "comment": "Reference out of scope variable", + "expression": "[let $scope = 'foo' in [$scope], $scope]", + "error": "undefined-variable" + }, + { + "comment": "Can't use var ref in RHS of subexpression", + "expression": "foo.$bar", + "error": "syntax" + } + ] + } +] \ No newline at end of file From 9e05615906b5be4217a04d5502d2d4dbe0dd19e3 Mon Sep 17 00:00:00 2001 From: Harry Vennik Date: Thu, 7 Dec 2023 08:18:43 +0100 Subject: [PATCH 6/6] Error on undefined variable also with compiler --- src/TreeCompiler.php | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/TreeCompiler.php b/src/TreeCompiler.php index 9621c8a..a2e7ae9 100644 --- a/src/TreeCompiler.php +++ b/src/TreeCompiler.php @@ -450,7 +450,13 @@ private function visit_variable_binding(array $node) { } private function visit_variable(array $node) { - return $this->write("\$value = \$bindings['{$node['value']}'];"); + return $this + ->write("if (!array_key_exists('{$node['value']}', \$bindings)) {") + ->indent() + ->write('throw new \\RuntimeException("Undefined variable: \\${$node[\'value\']}");') + ->outdent() + ->write('}') + ->write("\$value = \$bindings['{$node['value']}'];"); } /** @internal */