Skip to content

Commit d78e280

Browse files
Improve formula validation (#1359)
This PR improves formula validation and adds test coverage for all parser and lexer validation errors. ### Changes 1. Switch to conventional token span start/end semantics: [start, end). Spans are only used internally now, though (see (2)). 2. Introduced `FormulaError` and `FormulaSyntaxError` classes. `FormulaSyntaxError.__str__()` produces a human-readable error message, highlighting the offending span in the context of the original formula. This should be fit for non-technical end users. 3. Removed `ASTNode.span` property as it was unused. 4. Added tests and adapted existing ones to the changes described above.
2 parents 72bcf89 + 0c6c041 commit d78e280

8 files changed

Lines changed: 447 additions & 83 deletions

File tree

RELEASE_NOTES.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,4 @@
1414

1515
## Bug Fixes
1616

17-
<!-- Here goes notable bug fixes that are worth a special mention or explanation -->
17+
- Improved formula validation: Consistent error messages for invalid formulas and conventional span semantics.

src/frequenz/sdk/timeseries/formulas/_base_ast_node.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,6 @@
2424
class AstNode(abc.ABC, Generic[QuantityT]):
2525
"""An abstract syntax tree node representing a formula expression."""
2626

27-
span: tuple[int, int] | None = None
28-
"""The span (start, end) of the expression in the input string."""
29-
3027
@abc.abstractmethod
3128
async def evaluate(self) -> Sample[QuantityT] | QuantityT | None:
3229
"""Evaluate the expression and return its numerical value."""
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# License: MIT
2+
# Copyright © 2026 Frequenz Energy-as-a-Service GmbH
3+
4+
"""Exception classes for formulas parser and evaluator."""
5+
6+
7+
class FormulaError(Exception):
8+
"""Base class for exceptions in this module."""
9+
10+
11+
class FormulaSyntaxError(FormulaError):
12+
"""Exception raised for syntax errors in the formula."""
13+
14+
def __init__(
15+
self, *, formula: str, span: tuple[int, int] | None, message: str
16+
) -> None:
17+
"""Initialize this instance."""
18+
self._formula: str = formula
19+
self._span: tuple[int, int] | None = span
20+
self._message: str = message
21+
22+
def __str__(self) -> str:
23+
"""Return a string representation of the error.
24+
25+
The span (if present) will be used to highlight the error location.
26+
For long formulas, the beginning and/or end will be truncated as needed.
27+
"""
28+
if self._span is None:
29+
return self._message
30+
31+
formula_line = self._formula
32+
span_padding = " " * self._span[0]
33+
span_highlight = "^" * (self._span[1] - self._span[0])
34+
error_line = f"{span_padding}{span_highlight} {self._message}"
35+
36+
char_limit: int = 80 - 11 # Account for " Formula: " prefix in final output
37+
38+
if len(error_line) > char_limit:
39+
# Remove characters from the left to fit error line within char limit
40+
num_chars_to_truncate = len(error_line) - char_limit
41+
error_line = error_line[num_chars_to_truncate:]
42+
# Shift formula line by the same amount and insert leading ellipsis
43+
formula_line = f"... {formula_line[num_chars_to_truncate+4:]}"
44+
45+
if len(formula_line) > char_limit:
46+
# Truncate the formula line, insert ellipsis
47+
formula_line = f"{formula_line[:char_limit-4]} ..."
48+
49+
return (
50+
"Formula syntax error:\n"
51+
f" Formula: {formula_line}\n"
52+
f" {error_line}"
53+
)

src/frequenz/sdk/timeseries/formulas/_functions.py

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -85,19 +85,14 @@ async def unsubscribe(self) -> None:
8585
)
8686

8787
@classmethod
88-
def from_string(
89-
cls, name: str, params: list[AstNode[QuantityT]]
90-
) -> Function[QuantityT]:
91-
"""Create a function instance from its name."""
92-
match name.upper():
93-
case "COALESCE":
94-
return Coalesce(params)
95-
case "MAX":
96-
return Max(params)
97-
case "MIN":
98-
return Min(params)
99-
case _:
100-
raise ValueError(f"Unknown function name: {name}")
88+
def function_class_by_name(cls, name: str) -> type[Function[QuantityT]] | None:
89+
"""Return the function class corresponding to the given name."""
90+
known_functions: dict[str, type[Function[QuantityT]]] = {
91+
"COALESCE": Coalesce,
92+
"MAX": Max,
93+
"MIN": Min,
94+
}
95+
return known_functions.get(name.upper())
10196

10297

10398
@dataclass

src/frequenz/sdk/timeseries/formulas/_lexer.py

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from typing_extensions import override
1212

1313
from . import _token
14+
from ._exceptions import FormulaSyntaxError
1415
from ._peekable import Peekable
1516

1617

@@ -74,53 +75,58 @@ def __next__(self) -> _token.Token: # pylint: disable=too-many-branches
7475
_ = next(self._iter) # consume '#'
7576
comp_id = self._read_integer()
7677
if not comp_id:
77-
raise ValueError(f"Expected integer after '#' at position {pos}")
78-
end_pos = pos + len(comp_id)
78+
raise FormulaSyntaxError(
79+
formula=self._formula,
80+
span=(pos + 1, pos + 2),
81+
message="Expected integer after '#'",
82+
)
83+
end_pos = pos + len(comp_id) + 1 # account for '#'
7984
return _token.Component(
80-
span=(
81-
pos + 1,
82-
end_pos + 1, # account for '#'
83-
),
85+
span=(pos, end_pos),
8486
id=comp_id,
8587
value=self._formula[pos:end_pos],
8688
)
8789

8890
if char == "+":
8991
_, char = next(self._iter) # consume operator
90-
return _token.Plus(span=(pos + 1, pos + 1), value=char)
92+
return _token.Plus(span=(pos, pos + 1), value=char)
9193

9294
if char == "-":
9395
_, char = next(self._iter)
94-
return _token.Minus(span=(pos + 1, pos + 1), value=char)
96+
return _token.Minus(span=(pos, pos + 1), value=char)
9597

9698
if char == "*":
9799
_, char = next(self._iter)
98-
return _token.Mul(span=(pos + 1, pos + 1), value=char)
100+
return _token.Mul(span=(pos, pos + 1), value=char)
99101

100102
if char == "/":
101103
_, char = next(self._iter)
102-
return _token.Div(span=(pos + 1, pos + 1), value=char)
104+
return _token.Div(span=(pos, pos + 1), value=char)
103105

104106
if char == "(":
105107
_, char = next(self._iter)
106-
return _token.OpenParen(span=(pos + 1, pos + 1), value=char)
108+
return _token.OpenParen(span=(pos, pos + 1), value=char)
107109

108110
if char == ")":
109111
_, char = next(self._iter)
110-
return _token.CloseParen(span=(pos + 1, pos + 1), value=char)
112+
return _token.CloseParen(span=(pos, pos + 1), value=char)
111113

112114
if char == ",":
113115
_, char = next(self._iter)
114-
return _token.Comma(span=(pos + 1, pos + 1), value=char)
116+
return _token.Comma(span=(pos, pos + 1), value=char)
115117

116118
if char.isdigit():
117119
num = self._read_number()
118120
end_pos = pos + len(num)
119-
return _token.Number(span=(pos + 1, end_pos), value=num)
121+
return _token.Number(span=(pos, end_pos), value=num)
120122

121123
if char.isalpha():
122124
symbol = self._read_symbol()
123125
end_pos = pos + len(symbol)
124-
return _token.Symbol(span=(pos + 1, end_pos), value=symbol)
126+
return _token.Symbol(span=(pos, end_pos), value=symbol)
125127

126-
raise ValueError(f"Unexpected character '{char}' at position {pos}")
128+
raise FormulaSyntaxError(
129+
formula=self._formula,
130+
span=(pos, pos + 1),
131+
message="Unexpected character",
132+
)

0 commit comments

Comments
 (0)