Skip to content

Commit 2933e01

Browse files
authored
Merge pull request #21 from microsoft/fix/tokenizer-camelcase-keyword
fix: tokenizer word_continuation case sensitivity for camelCase ident…
2 parents b58a92a + ba55992 commit 2933e01

4 files changed

Lines changed: 111 additions & 17 deletions

File tree

flowquery-py/src/tokenization/string_walker.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,4 +155,4 @@ def word_continuation(self, word: str) -> bool:
155155
if next_pos >= len(self._text):
156156
return False
157157
next_char = self._text[next_pos]
158-
return next_char in StringUtils.word_valid_chars
158+
return next_char.lower() in StringUtils.word_valid_chars

flowquery-py/tests/parsing/test_parser.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,52 @@ def test_lookup_with_reserved_keyword_property_names(self):
325325
)
326326
assert ast.print() == expected
327327

328+
def test_lookup_with_from_keyword_as_property_name(self):
329+
"""Test lookup with from and to keywords as property names."""
330+
parser = Parser()
331+
ast = parser.parse("with {from: 1, to: 2} as x return x.from, x.to")
332+
expected = (
333+
"ASTNode\n"
334+
"- With\n"
335+
"-- Expression (x)\n"
336+
"--- AssociativeArray\n"
337+
"---- KeyValuePair\n"
338+
"----- String (from)\n"
339+
"----- Expression\n"
340+
"------ Number (1)\n"
341+
"---- KeyValuePair\n"
342+
"----- String (to)\n"
343+
"----- Expression\n"
344+
"------ Number (2)\n"
345+
"- Return\n"
346+
"-- Expression\n"
347+
"--- Lookup\n"
348+
"---- Identifier (from)\n"
349+
"---- Reference (x)\n"
350+
"-- Expression\n"
351+
"--- Lookup\n"
352+
"---- Identifier (to)\n"
353+
"---- Reference (x)"
354+
)
355+
assert ast.print() == expected
356+
357+
def test_camel_case_alias_starting_with_keyword(self):
358+
"""Test that camelCase identifiers starting with a keyword (e.g. fromUser) are tokenized correctly."""
359+
parser = Parser()
360+
ast = parser.parse("LOAD JSON FROM '/data.json' AS x RETURN x.from AS fromUser")
361+
output = ast.print()
362+
assert "Lookup" in output
363+
assert "Identifier (from)" in output
364+
365+
def test_from_keyword_property_in_create_virtual_subquery(self):
366+
"""Test that email.from parses correctly inside a CREATE VIRTUAL subquery."""
367+
parser = Parser()
368+
# Should not raise - email.from should be parsed correctly even with FROM being a keyword
369+
parser.parse(
370+
"CREATE VIRTUAL (:Email) AS { LOAD JSON FROM '/data/emails.json' AS email "
371+
"RETURN email.id AS id, email.from AS fromUser }"
372+
)
373+
328374
def test_load_with_post(self):
329375
"""Test load with post."""
330376
parser = Parser()

src/tokenization/string_walker.ts

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@ import StringUtils from "../utils/string_utils";
22

33
/**
44
* Utility class for walking through a string character by character during tokenization.
5-
*
5+
*
66
* Provides methods to check for specific character patterns, move through the string,
77
* and extract substrings. Used by the Tokenizer to process input text.
8-
*
8+
*
99
* @example
1010
* ```typescript
1111
* const walker = new StringWalker("WITH x as variable");
@@ -20,7 +20,7 @@ class StringWalker {
2020

2121
/**
2222
* Creates a new StringWalker for the given text.
23-
*
23+
*
2424
* @param text - The input text to walk through
2525
*/
2626
constructor(text: string) {
@@ -81,42 +81,45 @@ class StringWalker {
8181
}
8282

8383
public singleLineCommentStart(): boolean {
84-
return this.currentChar === '/' && this.nextChar === '/';
84+
return this.currentChar === "/" && this.nextChar === "/";
8585
}
8686

8787
public multiLineCommentStart(): boolean {
88-
return this.currentChar === '/' && this.nextChar === '*';
88+
return this.currentChar === "/" && this.nextChar === "*";
8989
}
9090

9191
public multiLineCommentEnd(): boolean {
92-
return this.currentChar === '*' && this.nextChar === '/';
92+
return this.currentChar === "*" && this.nextChar === "/";
9393
}
9494

9595
public newLine(): boolean {
96-
if (this.currentChar === '\n') {
96+
if (this.currentChar === "\n") {
9797
return true;
9898
}
9999
return false;
100100
}
101101

102102
public escaped(char: string): boolean {
103-
return this.currentChar === '\\' && this.nextChar === char;
103+
return this.currentChar === "\\" && this.nextChar === char;
104104
}
105105

106106
public escapedBrace(): boolean {
107-
return (this.currentChar === '{' && this.nextChar === '{') || (this.currentChar === '}' && this.nextChar === '}');
107+
return (
108+
(this.currentChar === "{" && this.nextChar === "{") ||
109+
(this.currentChar === "}" && this.nextChar === "}")
110+
);
108111
}
109112

110113
public openingBrace(): boolean {
111-
return this.currentChar === '{';
114+
return this.currentChar === "{";
112115
}
113116

114117
public closingBrace(): boolean {
115-
return this.currentChar === '}';
118+
return this.currentChar === "}";
116119
}
117120

118121
public checkForUnderScore(): boolean {
119-
const foundUnderScore = this.currentChar === '_';
122+
const foundUnderScore = this.currentChar === "_";
120123
if (foundUnderScore) {
121124
this._position++;
122125
}
@@ -141,7 +144,7 @@ class StringWalker {
141144

142145
public checkForQuote(): string | null {
143146
const quoteChar = this.currentChar;
144-
if (quoteChar === '"' || quoteChar === "'" || quoteChar === '`') {
147+
if (quoteChar === '"' || quoteChar === "'" || quoteChar === "`") {
145148
this._position++;
146149
return quoteChar;
147150
}
@@ -163,7 +166,7 @@ class StringWalker {
163166
}
164167

165168
public checkForFStringStart(): boolean {
166-
return this.currentChar.toLowerCase() === 'f' && ['\'', '"', '`'].includes(this.nextChar);
169+
return this.currentChar.toLowerCase() === "f" && ["'", '"', "`"].includes(this.nextChar);
167170
}
168171

169172
public moveNext(): void {
@@ -187,8 +190,8 @@ class StringWalker {
187190

188191
public word_continuation(word: string): boolean {
189192
const next = this.text[this.position + word.length];
190-
return StringUtils.word_valid_chars.includes(next);
193+
return next !== undefined && StringUtils.word_valid_chars.includes(next.toLowerCase());
191194
}
192195
}
193196

194-
export default StringWalker;
197+
export default StringWalker;

tests/parsing/parser.test.ts

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,51 @@ test("Test lookup with JSON array", () => {
264264
expect(_return.firstChild().value()).toBe(2);
265265
});
266266

267+
test("Test lookup with from keyword as property name", () => {
268+
const parser = new Parser();
269+
const ast = parser.parse("with {from: 1, to: 2} as x return x.from, x.to");
270+
expect(ast.print()).toBe(
271+
"ASTNode\n" +
272+
"- With\n" +
273+
"-- Expression (x)\n" +
274+
"--- AssociativeArray\n" +
275+
"---- KeyValuePair\n" +
276+
"----- String (from)\n" +
277+
"----- Expression\n" +
278+
"------ Number (1)\n" +
279+
"---- KeyValuePair\n" +
280+
"----- String (to)\n" +
281+
"----- Expression\n" +
282+
"------ Number (2)\n" +
283+
"- Return\n" +
284+
"-- Expression\n" +
285+
"--- Lookup\n" +
286+
"---- Identifier (from)\n" +
287+
"---- Reference (x)\n" +
288+
"-- Expression\n" +
289+
"--- Lookup\n" +
290+
"---- Identifier (to)\n" +
291+
"---- Reference (x)"
292+
);
293+
});
294+
295+
test("Test camelCase alias starting with keyword", () => {
296+
const parser = new Parser();
297+
const ast = parser.parse("LOAD JSON FROM '/data.json' AS x RETURN x.from AS fromUser");
298+
expect(ast.print()).toContain("Lookup");
299+
expect(ast.print()).toContain("Identifier (from)");
300+
});
301+
302+
test("Test from keyword property in create virtual subquery", () => {
303+
const parser = new Parser();
304+
// Should not throw - email.from should be parsed correctly even with FROM being a keyword
305+
expect(() => {
306+
parser.parse(
307+
"CREATE VIRTUAL (:Email) AS { LOAD JSON FROM '/data/emails.json' AS email RETURN email.id AS id, email.from AS fromUser }"
308+
);
309+
}).not.toThrow();
310+
});
311+
267312
test("Test lookup with reserved keyword property names", () => {
268313
const parser = new Parser();
269314
const ast = parser.parse("with {end: 1, null: 2, case: 3} as x return x.end, x.null, x.case");

0 commit comments

Comments
 (0)