Skip to content

Commit 0f273ff

Browse files
committed
re: compiled regex
1 parent e03209e commit 0f273ff

4 files changed

Lines changed: 125 additions & 7 deletions

File tree

native/src/regex_wrapper.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,40 @@
11
// regex_wrapper.cpp
22
#include <regex>
33
#include <string>
4+
#include <unordered_map>
5+
#include <memory>
6+
7+
// A map to store compiled regex objects
8+
std::unordered_map<int, std::shared_ptr<std::regex>> regex_cache;
9+
int next_id = 0;
410

511
extern "C" {
12+
// Compile a regex pattern and return an ID
13+
int compile_pattern(const char* pattern) {
14+
try {
15+
auto re = std::make_shared<std::regex>(pattern);
16+
int id = next_id++;
17+
regex_cache[id] = re;
18+
return id;
19+
} catch (const std::regex_error&) {
20+
return -1; // Return -1 to indicate an error
21+
}
22+
}
23+
24+
// Match a compiled regex against text
25+
bool match_compiled(int id, const char* text) {
26+
auto it = regex_cache.find(id);
27+
if (it != regex_cache.end()) {
28+
return std::regex_match(text, *it->second);
29+
}
30+
return false; // Return false if the ID is not found
31+
}
32+
33+
// Release a compiled regex by ID
34+
void release_compiled(int id) {
35+
regex_cache.erase(id);
36+
}
37+
638
bool match(const char* pattern, const char* text) {
739
try {
840
std::regex re(pattern);

src/stdlib/_cffi_util.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import cffi
55

66

7-
def load_library(lib_name):
7+
def load_library(lib_name, interface=""):
88
"""
99
Load a shared library based on the operating system.
1010
@@ -30,11 +30,7 @@ def load_library(lib_name):
3030
)
3131

3232
ffi = cffi.FFI()
33-
ffi.cdef(
34-
"""
35-
bool match(const char* pattern, const char* text);
36-
"""
37-
)
33+
ffi.cdef(interface)
3834
lib_path = Path(__file__).parent.parent / "lib" / lib_filename
3935

4036
return ffi.dlopen(str(lib_path))

src/stdlib/re.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,15 @@
11
from stdlib._cffi_util import load_library
22

3+
# Define the C interface
4+
interface = """
5+
int compile_pattern(const char* pattern);
6+
bool match_compiled(int id, const char* text);
7+
void release_compiled(int id);
8+
bool match(const char* pattern, const char* text);
9+
"""
10+
311
# Load the shared library
4-
lib = load_library("regex_wrapper")
12+
lib = load_library("regex_wrapper", interface)
513

614

715
def match(pattern: str, text: str) -> bool:
@@ -10,6 +18,28 @@ def match(pattern: str, text: str) -> bool:
1018
)
1119

1220

21+
# Python wrapper class
22+
class CompiledRegex:
23+
def __init__(self, pattern):
24+
self.id = lib.compile_pattern(
25+
pattern.encode("utf-8")
26+
) # pyright: ignore [reportAttributeAccessIssue]
27+
if self.id == -1:
28+
raise ValueError("Invalid regex pattern")
29+
30+
def match(self, text):
31+
return lib.match_compiled(
32+
self.id, text.encode("utf-8")
33+
) # pyright: ignore [reportAttributeAccessIssue]
34+
35+
def __del__(self):
36+
lib.release_compiled(self.id) # pyright: ignore [reportAttributeAccessIssue]
37+
38+
39+
def compile(pattern: str) -> CompiledRegex:
40+
return CompiledRegex(pattern)
41+
42+
1343
# Example usage
1444
if __name__ == "__main__":
1545
pattern = r"^\d{3}-\d{2}-\d{4}$"

tests/test_re.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
import pytest
2+
3+
from stdlib.re import CompiledRegex
4+
5+
16
def test_match_simple_pattern(regex_matcher):
27
"""Test matching a simple pattern."""
38
assert regex_matcher(r"^\d{3}-\d{2}-\d{4}$", "123-45-6789") == True
@@ -52,3 +57,58 @@ def test_match_with_anchors(regex_matcher):
5257
assert (
5358
regex_matcher(r"exact", "not exact") == False
5459
) # "not exact" is not the full string
60+
61+
62+
# Test valid regex compilation and matching
63+
def test_valid_regex():
64+
regex = CompiledRegex(r"\d+") # Compile a regex pattern for digits
65+
assert regex.match("123") is True # Should match
66+
assert regex.match("abc") is False # Should not match
67+
68+
69+
# Test invalid regex pattern
70+
def test_invalid_regex():
71+
with pytest.raises(ValueError, match="Invalid regex pattern"):
72+
CompiledRegex(r"*invalid*") # Invalid regex pattern
73+
74+
75+
# Test matching with a compiled regex
76+
def test_match_compiled():
77+
regex = CompiledRegex(r"[A-Za-z]+") # Compile a regex pattern for letters
78+
assert regex.match("Hello") is True # Should match
79+
assert regex.match("123") is False # Should not match
80+
81+
82+
# Test multiple regex instances
83+
def test_multiple_regex_instances():
84+
regex1 = CompiledRegex(r"\d{3}") # Compile a regex pattern for exactly 3 digits
85+
regex2 = CompiledRegex(r"[a-z]+") # Compile a regex pattern for lowercase letters
86+
87+
assert regex1.match("123") is True # Should match
88+
assert regex1.match("12") is False # Should not match
89+
90+
assert regex2.match("abc") is True # Should match
91+
assert regex2.match("ABC") is False # Should not match
92+
93+
94+
# Test releasing compiled regex
95+
def test_release_compiled():
96+
regex = CompiledRegex(r"\w+") # Compile a regex pattern for word characters
97+
assert regex.match("word") is True # Should match
98+
del regex # Release the compiled regex
99+
# No direct way to test if the C++ object was released, but this ensures no crashes
100+
101+
102+
# Test edge cases
103+
def test_edge_cases():
104+
# Empty pattern
105+
regex = CompiledRegex(r"")
106+
assert regex.match("") is True # Should match empty string
107+
assert regex.match("a") is True # Should match any string
108+
109+
# Complex pattern
110+
regex = CompiledRegex(
111+
r"^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}$"
112+
) # Email regex
113+
assert regex.match("test@example.com") is True # Should match
114+
assert regex.match("invalid-email") is False # Should not match

0 commit comments

Comments
 (0)