From 4d30f377866931add3db60eb0002dc892028ed23 Mon Sep 17 00:00:00 2001 From: Jose Rodriguez Date: Mon, 25 Nov 2024 00:15:35 +0100 Subject: [PATCH] refact: improve typing and improve parsing in opt parser There are some syntax error that were not catched. Fixed. --- src/arch/z80/peephole/evaluator.py | 20 ++-- src/arch/z80/peephole/parser.py | 97 +++++++++++++----- tests/arch/zx48k/peephole/test_parser.py | 122 +++++++++++++++++++---- 3 files changed, 190 insertions(+), 49 deletions(-) diff --git a/src/arch/z80/peephole/evaluator.py b/src/arch/z80/peephole/evaluator.py index b7e90fa09..291564583 100644 --- a/src/arch/z80/peephole/evaluator.py +++ b/src/arch/z80/peephole/evaluator.py @@ -75,8 +75,8 @@ class FN(StrEnum): FN.CTEST: lambda x: memcell.MemCell(x, 1).condition_flag, # condition test, if any. E.g. retz returns 'z' FN.NEEDS: lambda x: memcell.MemCell(x[0], 1).needs(x[1]), FN.FLAGVAL: lambda x: helpers.new_tmp_val(), - FN.OP1: lambda x: (x.strip().replace(",", " ", 1).split() + [""])[1], - FN.OP2: lambda x: (x.strip().replace(",", " ", 1).split() + ["", ""])[2], + FN.OP1: lambda x: (x.strip().replace(",", " ", 1).split() + [""])[1], # 1st Operand of an instruction or "" + FN.OP2: lambda x: (x.strip().replace(",", " ", 1).split() + ["", ""])[2], # 2nd Operand of an instruction or "" } # Binary operators @@ -228,14 +228,22 @@ def eval(self, vars_: dict[str, Any] | None = None) -> str | Evaluator | list[An return vars_[val] if len(self.expression) == 2: - oper = FN(self.expression[0]) - assert oper in UNARY + try: + oper = FN(self.expression[0]) + assert oper in UNARY + except (AssertionError, ValueError): + raise ValueError(f"Invalid unary operator '{self.expression[0]}'") + operand = self.expression[1].eval(vars_) return self.normalize(UNARY[oper](operand)) if len(self.expression) == 3 and self.expression[1] != FN.OP_COMMA: - oper = FN(self.expression[1]) - assert oper in BINARY + try: + oper = FN(self.expression[1]) + assert oper in BINARY + except (AssertionError, ValueError): + raise ValueError(f"Invalid binary operator '{self.expression[1]}'") + # Do lazy evaluation left_ = lambda: self.expression[0].eval(vars_) right_ = lambda: self.expression[2].eval(vars_) diff --git a/src/arch/z80/peephole/parser.py b/src/arch/z80/peephole/parser.py index edf12ceb6..47332f3b1 100644 --- a/src/arch/z80/peephole/parser.py +++ b/src/arch/z80/peephole/parser.py @@ -25,7 +25,7 @@ O_LEVEL = "OLEVEL" O_FLAG = "OFLAG" -# Operators : priority (lower number -> highest priority) +# Operators : precedence (lower number -> highest priority) IF_OPERATORS: Final[MappingProxyType[FN, int]] = MappingProxyType( { FN.OP_NMUL: 3, @@ -54,6 +54,10 @@ REQUIRED = (REG_REPLACE, REG_WITH, O_LEVEL, O_FLAG) +class PeepholeParserSyntaxError(SyntaxError): + pass + + def simplify_expr(expr: list[Any]) -> list[Any]: """Simplifies ("unnest") a list, removing redundant brackets. i.e. [[x, [[y]]] becomes [x, [y]] @@ -79,23 +83,35 @@ class DefineLine(NamedTuple): expr: Evaluator -def parse_ifline(if_line: str, lineno: int) -> TreeType | None: - """Given a line from within a IF region (i.e. $1 == "af'") - returns it as a list of tokens ['$1', '==', "af'"] - """ - stack: list[TreeType] = [] - expr: TreeType = [] - paren = 0 - error_ = False +class Tokenizer: + def __init__(self, source: str, lineno: int) -> None: + self.source = source + self.lineno = lineno - while not error_ and if_line: - if_line = if_line.strip() - if not if_line: - break - qq = RE_IFPARSE.match(if_line) + def get_token(self) -> str: + """Returns next token, or "" as EOL""" + tok = self.lookahead() + self.source = self.source[len(tok) :] + return tok + + def get_next_token(self) -> str: + if self.has_finished(): + raise PeepholeParserSyntaxError("Unexpected EOL") + + return self.get_token() + + def has_finished(self) -> bool: + return self.source == "" + + def lookahead(self) -> str: + """Returns next token, or "" as EOL""" + self.source = self.source.strip() + if self.has_finished(): + return "" + + qq = RE_IFPARSE.match(self.source) if not qq: - error_ = True - break + raise PeepholeParserSyntaxError(f"Syntax error in line {self.lineno}: {self.source}") tok = qq.group() if not RE_ID.match(tok): @@ -104,7 +120,25 @@ def parse_ifline(if_line: str, lineno: int) -> TreeType | None: tok = tok[: len(oper)] break - if_line = if_line[len(tok) :] + return tok + + +def parse_ifline(if_line: str, lineno: int) -> TreeType | None: + """Given a line from within a IF region (i.e. $1 == "af'") + returns it as a list of tokens ['$1', '==', "af'"] + """ + stack: list[TreeType] = [] + expr: TreeType = [] + paren = 0 + error_ = False + tokenizer = Tokenizer(if_line, lineno) + + while not tokenizer.has_finished(): + try: + tok = tokenizer.get_token() + except PeepholeParserSyntaxError as e: + errmsg.warning(lineno, str(e)) + return None if tok == "(": paren += 1 @@ -139,8 +173,8 @@ def parse_ifline(if_line: str, lineno: int) -> TreeType | None: errmsg.warning(lineno, f"Unexpected {tok} in list") return None - while len(expr) == 2 and isinstance(expr[-2], str): - op: str | TreeType = expr[-2] + while len(expr) == 2 and isinstance(expr[0], str): + op: str = expr[0] if op in UNARY: stack[-1].append(expr) expr = stack.pop() @@ -162,7 +196,11 @@ def parse_ifline(if_line: str, lineno: int) -> TreeType | None: expr = [expr] - if not error_ and paren: + if error_: + errmsg.warning(lineno, "syntax error in IF section") + return None + + if paren: errmsg.warning(lineno, "unclosed parenthesis in IF section") return None @@ -181,11 +219,20 @@ def parse_ifline(if_line: str, lineno: int) -> TreeType | None: errmsg.warning(lineno, f"unexpected binary operator '{op}'") return None - if error_: - errmsg.warning(lineno, "syntax error in IF section") + expr = simplify_expr(expr) + if len(expr) == 2 and isinstance(expr[-1], str) and expr[-1] in BINARY: + errmsg.warning(lineno, f"Unexpected binary operator '{expr[-1]}'") + return None + + if len(expr) == 3 and (expr[1] not in BINARY or expr[1] == FN.OP_COMMA): + errmsg.warning(lineno, f"Unexpected binary operator '{expr[1]}'") + return None + + if len(expr) > 3: + errmsg.warning(lineno, "Lists not allowed in IF section condition. Missing operator") return None - return simplify_expr(expr) + return expr def parse_define_line(sourceline: SourceLine) -> tuple[str | None, TreeType | None]: @@ -329,12 +376,12 @@ def check_entry(key: str) -> bool: return result -def parse_file(fname: str): +def parse_file(fname: str) -> dict[str, str | int | list[str | list]] | None: """Opens and parse a file given by filename""" tmp = global_.FILENAME global_.FILENAME = fname # set filename so it shows up in error/warning msgs - with open(fname, "rt") as f: + with open(fname, "rt", encoding="utf-8") as f: result = parse_str(f.read()) global_.FILENAME = tmp # restores original filename diff --git a/tests/arch/zx48k/peephole/test_parser.py b/tests/arch/zx48k/peephole/test_parser.py index 314409deb..844f2b24d 100644 --- a/tests/arch/zx48k/peephole/test_parser.py +++ b/tests/arch/zx48k/peephole/test_parser.py @@ -32,25 +32,22 @@ def test_parse_string(self): self.maxDiff = None self.assertIsInstance(result, dict) - self.assertDictEqual( - result, - { - "DEFINE": [], - "IF": [ - [ - [["$1", "==", "af'"], "&&", ["$1", "==", 'Hello ""World""']], - "&&", - [["$1", "==", "(hl)"], "||", ["IS_INDIR", ["$1"]]], - ], - "||", - ["$1", "==", "aa"], + assert result == { + "DEFINE": [], + "IF": [ + [ + [["$1", "==", "af'"], "&&", ["$1", "==", 'Hello ""World""']], + "&&", + [["$1", "==", "(hl)"], "||", ["IS_INDIR", ["$1"]]], ], - "OFLAG": 15, - "OLEVEL": 1, - "REPLACE": ["push $1", "pop $1"], - "WITH": [], - }, - ) + "||", + ["$1", "==", "aa"], + ], + "OFLAG": 15, + "OLEVEL": 1, + "REPLACE": ["push $1", "pop $1"], + "WITH": [], + } def test_parse_call(self): result = parser.parse_str( @@ -398,3 +395,92 @@ def test_parse_if_must_start_in_a_new_line(self): """ ) assert result is None + + def test_parse_with_ending_binary_error(self): + result = parser.parse_str( + """ + ;; Remove the boolean normalization if it's done after calling + ;; certain routines that return the bool result already normalized. + + ;; The sequence + ;; sub 1 + ;; sbc a, a + ;; inc a + ;; can be removed + + OLEVEL: 1 + OFLAG: 20 + + REPLACE {{ + $1 + }} + + WITH {{ + }} + + IF {{ + $1 == "ld a, 0" || + }} + """ + ) + assert result is None + + def test_parse_with_comma_error(self): + result = parser.parse_str( + """ + ;; Remove the boolean normalization if it's done after calling + ;; certain routines that return the bool result already normalized. + + ;; The sequence + ;; sub 1 + ;; sbc a, a + ;; inc a + ;; can be removed + + OLEVEL: 1 + OFLAG: 20 + + REPLACE {{ + $1 + }} + + WITH {{ + }} + + IF {{ + $1 == "ld a, 0" , + $1 == "ld a, 0" + }} + """ + ) + assert result is None + + def test_parse_with_nested_comma_error(self): + result = parser.parse_str( + """ + ;; Remove the boolean normalization if it's done after calling + ;; certain routines that return the bool result already normalized. + + ;; The sequence + ;; sub 1 + ;; sbc a, a + ;; inc a + ;; can be removed + + OLEVEL: 1 + OFLAG: 20 + + REPLACE {{ + $1 + }} + + WITH {{ + }} + + IF {{ + ($1 == "ld a, 0" , + $1 == "ld a, 0") + }} + """ + ) + assert result is None