Skip to content

Commit 088ecf6

Browse files
committed
re: implement search, findall and sub
1 parent 770914f commit 088ecf6

3 files changed

Lines changed: 147 additions & 7 deletions

File tree

native/src/regex_wrapper.cpp

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,54 @@ extern "C" {
4343
return false;
4444
}
4545
}
46+
47+
// Search for a regex pattern in a string
48+
const char* search_pattern(int id, const char* text) {
49+
auto it = regex_cache.find(id);
50+
if (it == regex_cache.end()) {
51+
return nullptr; // Return nullptr if the ID is not found
52+
}
53+
54+
std::smatch match;
55+
std::string str(text);
56+
if (std::regex_search(str, match, *it->second)) {
57+
return match.str().c_str(); // Return the matched substring
58+
}
59+
return nullptr; // Return nullptr if no match is found
60+
}
61+
62+
// Find all matches of a regex pattern in a string
63+
const char* findall_pattern(int id, const char* text) {
64+
auto it = regex_cache.find(id);
65+
if (it == regex_cache.end()) {
66+
return nullptr; // Return nullptr if the ID is not found
67+
}
68+
69+
std::string str(text);
70+
std::smatch match;
71+
std::string result;
72+
std::string::const_iterator searchStart(str.cbegin());
73+
74+
while (std::regex_search(searchStart, str.cend(), match, *it->second)) {
75+
result += match.str() + "\n"; // Append each match to the result string
76+
searchStart = match.suffix().first;
77+
}
78+
79+
if (!result.empty()) {
80+
return result.c_str(); // Return all matches as a single string separated by newlines
81+
}
82+
return nullptr; // Return nullptr if no matches are found
83+
}
84+
85+
// Substitute all occurrences of a regex pattern in a string
86+
const char* substitute_pattern(int id, const char* text, const char* replacement) {
87+
auto it = regex_cache.find(id);
88+
if (it == regex_cache.end()) {
89+
return nullptr; // Return nullptr if the ID is not found
90+
}
91+
92+
std::string str(text);
93+
std::string result = std::regex_replace(str, *it->second, replacement);
94+
return result.c_str(); // Return the modified string
95+
}
4696
}

src/stdlib/re.py

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,19 +21,40 @@ def match(pattern: str, text: str) -> bool:
2121
# Python wrapper class
2222
class CompiledRegex:
2323
def __init__(self, pattern):
24-
self.id = lib.compile_pattern( # pyright: ignore [reportAttributeAccessIssue]
25-
pattern.encode("utf-8")
26-
)
24+
self.id = lib.compile_pattern(pattern.encode("utf-8")) # type: ignore
2725
if self.id == -1:
2826
raise ValueError("Invalid regex pattern")
2927

3028
def match(self, text):
31-
return lib.match_compiled( # pyright: ignore [reportAttributeAccessIssue]
32-
self.id, text.encode("utf-8")
33-
)
29+
return lib.match_compiled(self.id, text.encode("utf-8")) # type: ignore
3430

3531
def __del__(self):
36-
lib.release_compiled(self.id) # pyright: ignore [reportAttributeAccessIssue]
32+
lib.release_compiled(self.id) # type: ignore
33+
34+
def search(self, text: str) -> str | None:
35+
# Search for the compiled regex in the text
36+
result = lib.search_pattern(self.id, text.encode("utf-8")) # type: ignore
37+
if result:
38+
return result.decode("utf-8")
39+
return None
40+
41+
def findall(self, text: str) -> list[str]:
42+
# Find all matches of the compiled regex in the text
43+
result = lib.search_pattern(self.id, text.encode("utf-8")) # type: ignore
44+
if result:
45+
return result.decode("utf-8").split("\n")[
46+
:-1
47+
] # Split by newline and remove the last empty string
48+
return []
49+
50+
def sub(self, replacement: str, text: str) -> str:
51+
# Substitute all occurrences of the compiled regex in the text
52+
result = lib.substitute_pattern( # type: ignore
53+
self.id, text.encode("utf-8"), replacement.encode("utf-8")
54+
)
55+
if result:
56+
return result.decode("utf-8")
57+
return text # Return the original text if substitution fails
3758

3859

3960
def compile(pattern: str) -> CompiledRegex:

tests/test_re.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,3 +112,72 @@ def test_edge_cases():
112112
) # Email regex
113113
assert regex.match("test@example.com") is True # Should match
114114
assert regex.match("invalid-email") is False # Should not match
115+
116+
117+
# Fixture to create a CompiledRegex object for testing
118+
@pytest.fixture
119+
def compiled_regex():
120+
return CompiledRegex(r"\d+")
121+
122+
123+
# Test cases for the `search` method
124+
def test_search_found(compiled_regex):
125+
text = "There are 123 apples and 456 oranges."
126+
result = compiled_regex.search(text)
127+
assert result == "123" # First match should be "123"
128+
129+
130+
def test_search_not_found(compiled_regex):
131+
text = "There are no numbers here."
132+
result = compiled_regex.search(text)
133+
assert result is None # No match should return None
134+
135+
136+
# Test cases for the `findall` method
137+
def test_findall_multiple_matches(compiled_regex):
138+
text = "There are 123 apples and 456 oranges."
139+
result = compiled_regex.findall(text)
140+
assert result == ["123", "456"] # All matches should be returned
141+
142+
143+
def test_findall_no_matches(compiled_regex):
144+
text = "There are no numbers here."
145+
result = compiled_regex.findall(text)
146+
assert result == [] # No matches should return an empty list
147+
148+
149+
def test_findall_empty_string(compiled_regex):
150+
text = ""
151+
result = compiled_regex.findall(text)
152+
assert result == [] # Empty string should return an empty list
153+
154+
155+
# Test cases for the `sub` method
156+
def test_sub_single_replacement(compiled_regex):
157+
text = "There are 123 apples."
158+
result = compiled_regex.sub("NUM", text)
159+
assert result == "There are NUM apples." # Single replacement
160+
161+
162+
def test_sub_multiple_replacements(compiled_regex):
163+
text = "There are 123 apples and 456 oranges."
164+
result = compiled_regex.sub("NUM", text)
165+
assert result == "There are NUM apples and NUM oranges." # Multiple replacements
166+
167+
168+
def test_sub_no_matches(compiled_regex):
169+
text = "There are no numbers here."
170+
result = compiled_regex.sub("NUM", text)
171+
assert result == text # No matches, original text should be returned
172+
173+
174+
def test_sub_empty_string(compiled_regex):
175+
text = ""
176+
result = compiled_regex.sub("NUM", text)
177+
assert result == "" # Empty string should remain unchanged
178+
179+
180+
# Edge case: Invalid regex pattern
181+
def test_invalid_regex_pattern():
182+
with pytest.raises(ValueError):
183+
CompiledRegex(r"*invalid") # Invalid regex pattern should raise ValueError

0 commit comments

Comments
 (0)