From fd9330abd68c3d048315f43c13665da7877cee9c Mon Sep 17 00:00:00 2001 From: Steve Ayers Date: Tue, 4 Feb 2025 15:28:21 -0500 Subject: [PATCH 1/2] Additional checks --- protovalidate/internal/extra_func.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/protovalidate/internal/extra_func.py b/protovalidate/internal/extra_func.py index 1463b6d6..6079ac6f 100644 --- a/protovalidate/internal/extra_func.py +++ b/protovalidate/internal/extra_func.py @@ -165,8 +165,20 @@ def is_email(string: celtypes.Value) -> celpy.Result: def is_uri(string: celtypes.Value) -> celpy.Result: url = urlparse.urlparse(string) - if not all([url.scheme, url.netloc, url.path]): - return celtypes.BoolType(False) + # urlparse correctly reads the scheme from URNs but parses everything + # after (except the query string) as the path. + if url.scheme == "urn": + if not (url.path): + return celtypes.BoolType(False) + else: + if not all([url.scheme, url.netloc, url.path]): + return celtypes.BoolType(False) + + # If the query string contains percent-encoding, then try to decode it. + # unquote will return the same string if it is improperly encoded. + if "%" in url.query: + return celtypes.BoolType(urlparse.unquote(url.query) != url.query) + return celtypes.BoolType(True) From 030036ccbe76107267c42e48ac96f63b66f72978 Mon Sep 17 00:00:00 2001 From: Steve Ayers Date: Tue, 4 Feb 2025 15:36:00 -0500 Subject: [PATCH 2/2] Formatter --- protovalidate/internal/extra_func.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/protovalidate/internal/extra_func.py b/protovalidate/internal/extra_func.py index 6079ac6f..d32a525c 100644 --- a/protovalidate/internal/extra_func.py +++ b/protovalidate/internal/extra_func.py @@ -166,13 +166,12 @@ def is_email(string: celtypes.Value) -> celpy.Result: def is_uri(string: celtypes.Value) -> celpy.Result: url = urlparse.urlparse(string) # urlparse correctly reads the scheme from URNs but parses everything - # after (except the query string) as the path. + # after (except the query string) as the path. if url.scheme == "urn": if not (url.path): return celtypes.BoolType(False) - else: - if not all([url.scheme, url.netloc, url.path]): - return celtypes.BoolType(False) + elif not all([url.scheme, url.netloc, url.path]): + return celtypes.BoolType(False) # If the query string contains percent-encoding, then try to decode it. # unquote will return the same string if it is improperly encoded.