From 10aaefb28a6050668bd2828b964caf0e41f5e34f Mon Sep 17 00:00:00 2001 From: Lukas Melninkas Date: Wed, 18 Mar 2026 14:31:33 +0200 Subject: [PATCH] Don't match incomplete street name --- pyap/source_US/data.py | 22 +++++++++++++--------- tests/test_parser_us.py | 3 ++- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/pyap/source_US/data.py b/pyap/source_US/data.py index 516f1d0..74c331b 100644 --- a/pyap/source_US/data.py +++ b/pyap/source_US/data.py @@ -827,17 +827,20 @@ def street_type_list_to_regex(street_type_list: list[str]) -> str: street_types_leading_re = street_type_list_to_regex(street_type_leading_list) -street_type_extended = r""" + +def street_type_extended(idx: str) -> str: + return r""" (?: - {street_type_a} - (?P + {street_type} + (?P {space_div}\(?[Rr][Oo][Uu][Tt][Ee]\ [A-Za-z0-9]+(?:\ ?\))? )? ) -""".format( - street_type_a=rf"(?P{street_types_with_interstate_re})", - space_div=space_div, -) + """.format( + street_type=rf"(?P{street_types_with_interstate_re})", + idx=idx, + space_div=space_div, + ) typed_street_name = r""" @@ -852,7 +855,7 @@ def street_type_list_to_regex(street_type_list: list[str]) -> str: """.format( space_div=space_div, street_name_a=rf"(?P{street_name_multi_word_re})", - street_type_a=street_type_extended, + street_type_a=street_type_extended("a"), street_type_b=rf"(?P{street_types_leading_re})", street_name_b=rf"(?P{street_name_one_word_re})", post_direction_re=post_direction_re, @@ -1052,7 +1055,7 @@ def street_type_list_to_regex(street_type_list: list[str]) -> str: | (?: {post_direction_re}\ - \d{{,3}}[A-Za-z\-]{{1,31}} + \d{{,3}}[A-Za-z\-]{{1,31}}(?!\s+{street_type}) ) ) (?:{space_div}{post_direction})? @@ -1080,6 +1083,7 @@ def street_type_list_to_regex(street_type_list: list[str]) -> str: street_number=street_number, typed_street_name=typed_street_name, numbered_or_typeless_street_name=numbered_or_typeless_street_name, + street_type=street_type_extended("c"), post_direction=post_direction, post_direction_re=post_direction_re, floor=floor, diff --git a/tests/test_parser_us.py b/tests/test_parser_us.py index f4a8dd4..8b52347 100644 --- a/tests/test_parser_us.py +++ b/tests/test_parser_us.py @@ -245,7 +245,7 @@ def test_post_direction(input, expected): ) def test_street_type(input, expected): """tests string match for a street id""" - execute_matching_test(input, expected, data_us.street_type_extended) + execute_matching_test(input, expected, data_us.street_type_extended("a")) @pytest.mark.parametrize( @@ -662,6 +662,7 @@ def test_full_street_positive(input, expected): ("1234 Fowlstown Rd Lot#18 \nBainbridge, GA 39817", True), ("1234 LONG LANE\nB2 \nUPPER DARBY PA 19082", True), # negative assertions + ("123 Nw Awesome Drive\n12345", False), ("ONE HEALING CENTER LLC, 16444", False), ("85 STEEL REGULAR SHAFT - NE", False), ("3 STRUCTURE WITH PE", False),