Skip to content

Commit f0891d4

Browse files
authored
Merge pull request #24 from microsoft/fixes
Fixes
2 parents 5edc9a2 + a7c297f commit f0891d4

File tree

22 files changed

+2071
-46
lines changed

22 files changed

+2071
-46
lines changed

flowquery-py/src/graph/relationship.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ def find_match(id_: str, h: int) -> bool:
167167
follow_id = 'left_id' if is_left else 'right_id'
168168
while self._data and find_match(left_id, hop):
169169
data = self._data.current(hop)
170-
if data and self._hops and hop >= self._hops.min:
170+
if data and self._hops and hop + 1 >= self._hops.min:
171171
self.set_value(self)
172172
if not self._matches_properties(hop):
173173
continue
@@ -178,6 +178,10 @@ def find_match(id_: str, h: int) -> bool:
178178
if self._hops and hop + 1 < self._hops.max:
179179
await self.find(data[follow_id], hop + 1)
180180
self._matches.pop()
181+
elif data and self._hops:
182+
# Below minimum hops: traverse the edge without yielding a match
183+
if follow_id in data:
184+
await self.find(data[follow_id], hop + 1)
181185

182186
# Restore original source node
183187
self._source = original

flowquery-py/src/parsing/expressions/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,15 @@
1313
Equals,
1414
GreaterThan,
1515
GreaterThanOrEqual,
16+
In,
1617
Is,
1718
LessThan,
1819
LessThanOrEqual,
1920
Modulo,
2021
Multiply,
2122
Not,
2223
NotEquals,
24+
NotIn,
2325
Operator,
2426
Or,
2527
Power,
@@ -54,4 +56,6 @@
5456
"Or",
5557
"Not",
5658
"Is",
59+
"In",
60+
"NotIn",
5761
]

flowquery-py/src/parsing/expressions/operator.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,3 +167,105 @@ def __init__(self) -> None:
167167

168168
def value(self) -> int:
169169
return 1 if self.lhs.value() == self.rhs.value() else 0
170+
171+
172+
class IsNot(Operator):
173+
def __init__(self) -> None:
174+
super().__init__(-1, True)
175+
176+
def value(self) -> int:
177+
return 1 if self.lhs.value() != self.rhs.value() else 0
178+
179+
180+
class In(Operator):
181+
def __init__(self) -> None:
182+
super().__init__(-1, True)
183+
184+
def value(self) -> int:
185+
lst = self.rhs.value()
186+
if not isinstance(lst, list):
187+
raise ValueError("Right operand of IN must be a list")
188+
return 1 if self.lhs.value() in lst else 0
189+
190+
191+
class NotIn(Operator):
192+
def __init__(self) -> None:
193+
super().__init__(-1, True)
194+
195+
def value(self) -> int:
196+
lst = self.rhs.value()
197+
if not isinstance(lst, list):
198+
raise ValueError("Right operand of NOT IN must be a list")
199+
return 0 if self.lhs.value() in lst else 1
200+
201+
202+
class Contains(Operator):
203+
def __init__(self) -> None:
204+
super().__init__(0, True)
205+
206+
def value(self) -> int:
207+
s = self.lhs.value()
208+
search = self.rhs.value()
209+
if not isinstance(s, str) or not isinstance(search, str):
210+
raise ValueError("CONTAINS requires string operands")
211+
return 1 if search in s else 0
212+
213+
214+
class NotContains(Operator):
215+
def __init__(self) -> None:
216+
super().__init__(0, True)
217+
218+
def value(self) -> int:
219+
s = self.lhs.value()
220+
search = self.rhs.value()
221+
if not isinstance(s, str) or not isinstance(search, str):
222+
raise ValueError("NOT CONTAINS requires string operands")
223+
return 0 if search in s else 1
224+
225+
226+
class StartsWith(Operator):
227+
def __init__(self) -> None:
228+
super().__init__(0, True)
229+
230+
def value(self) -> int:
231+
s = self.lhs.value()
232+
search = self.rhs.value()
233+
if not isinstance(s, str) or not isinstance(search, str):
234+
raise ValueError("STARTS WITH requires string operands")
235+
return 1 if s.startswith(search) else 0
236+
237+
238+
class NotStartsWith(Operator):
239+
def __init__(self) -> None:
240+
super().__init__(0, True)
241+
242+
def value(self) -> int:
243+
s = self.lhs.value()
244+
search = self.rhs.value()
245+
if not isinstance(s, str) or not isinstance(search, str):
246+
raise ValueError("NOT STARTS WITH requires string operands")
247+
return 0 if s.startswith(search) else 1
248+
249+
250+
class EndsWith(Operator):
251+
def __init__(self) -> None:
252+
super().__init__(0, True)
253+
254+
def value(self) -> int:
255+
s = self.lhs.value()
256+
search = self.rhs.value()
257+
if not isinstance(s, str) or not isinstance(search, str):
258+
raise ValueError("ENDS WITH requires string operands")
259+
return 1 if s.endswith(search) else 0
260+
261+
262+
class NotEndsWith(Operator):
263+
def __init__(self) -> None:
264+
super().__init__(0, True)
265+
266+
def value(self) -> int:
267+
s = self.lhs.value()
268+
search = self.rhs.value()
269+
if not isinstance(s, str) or not isinstance(search, str):
270+
raise ValueError("NOT ENDS WITH requires string operands")
271+
return 0 if s.endswith(search) else 1

flowquery-py/src/parsing/functions/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from .schema import Schema
3131
from .size import Size
3232
from .split import Split
33+
from .string_distance import StringDistance
3334
from .stringify import Stringify
3435

3536
# Built-in functions
@@ -68,6 +69,7 @@
6869
"Round",
6970
"Size",
7071
"Split",
72+
"StringDistance",
7173
"Stringify",
7274
"ToJson",
7375
"Type",
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
"""String distance function using Levenshtein distance."""
2+
3+
from .function import Function
4+
from .function_metadata import FunctionDef
5+
6+
7+
def _levenshtein_distance(a: str, b: str) -> float:
8+
"""Compute the normalized Levenshtein distance between two strings.
9+
10+
The Levenshtein distance is the minimum number of single-character edits
11+
(insertions, deletions, or substitutions) required to change one string
12+
into the other. The result is normalized to [0, 1] by dividing by the
13+
length of the longer string.
14+
15+
Args:
16+
a: First string
17+
b: Second string
18+
19+
Returns:
20+
The normalized Levenshtein distance (0 = identical, 1 = completely different)
21+
"""
22+
m = len(a)
23+
n = len(b)
24+
25+
# Both empty strings are identical
26+
if m == 0 and n == 0:
27+
return 0.0
28+
29+
# Create a matrix of size (m+1) x (n+1)
30+
dp = [[0] * (n + 1) for _ in range(m + 1)]
31+
32+
# Base cases: transforming empty string to/from a prefix
33+
for i in range(m + 1):
34+
dp[i][0] = i
35+
for j in range(n + 1):
36+
dp[0][j] = j
37+
38+
# Fill in the rest of the matrix
39+
for i in range(1, m + 1):
40+
for j in range(1, n + 1):
41+
cost = 0 if a[i - 1] == b[j - 1] else 1
42+
dp[i][j] = min(
43+
dp[i - 1][j] + 1, # deletion
44+
dp[i][j - 1] + 1, # insertion
45+
dp[i - 1][j - 1] + cost # substitution
46+
)
47+
48+
# Normalize by the length of the longer string
49+
return dp[m][n] / max(m, n)
50+
51+
52+
@FunctionDef({
53+
"description": (
54+
"Computes the normalized Levenshtein distance between two strings. "
55+
"Returns a value in [0, 1] where 0 means identical and 1 means completely different."
56+
),
57+
"category": "scalar",
58+
"parameters": [
59+
{"name": "string1", "description": "First string", "type": "string"},
60+
{"name": "string2", "description": "Second string", "type": "string"}
61+
],
62+
"output": {
63+
"description": "Normalized Levenshtein distance (0 = identical, 1 = completely different)",
64+
"type": "number",
65+
"example": 0.43,
66+
},
67+
"examples": [
68+
"RETURN string_distance('kitten', 'sitting')",
69+
"WITH 'hello' AS a, 'hallo' AS b RETURN string_distance(a, b)"
70+
]
71+
})
72+
class StringDistance(Function):
73+
"""String distance function.
74+
75+
Computes the normalized Levenshtein distance between two strings.
76+
Returns a value in [0, 1] where 0 means identical and 1 means completely different.
77+
"""
78+
79+
def __init__(self) -> None:
80+
super().__init__("string_distance")
81+
self._expected_parameter_count = 2
82+
83+
def value(self) -> float:
84+
str1 = self.get_children()[0].value()
85+
str2 = self.get_children()[1].value()
86+
if not isinstance(str1, str) or not isinstance(str2, str):
87+
raise ValueError("Invalid arguments for string_distance function: both arguments must be strings")
88+
return _levenshtein_distance(str1, str2)

0 commit comments

Comments
 (0)