-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathci_validate.py
More file actions
executable file
·219 lines (173 loc) · 8.22 KB
/
ci_validate.py
File metadata and controls
executable file
·219 lines (173 loc) · 8.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
#!/usr/bin/python
import re
import iso3166
import phonenumbers
import sys
from pathlib import Path
COUNTRY_ASM = Path('country.asm')
def is_alpha2(code):
if code.upper() == 'XX': # Middle East
return True
if code.upper() == 'YU': # Yugoslavia
return True
return code.upper() in iso3166._by_alpha2
def is_country(code, pnum):
if code.upper() == 'CA' and pnum =='2': # French speaking Canada
return True
if code.upper() == 'LA' and pnum =='3': # Latin America
return True
if code.upper() == 'XX' and pnum =='785': # Middle East
return True
if code.upper() == 'YU' and pnum =='38': # Yugoslavia
return True
if code.upper() == 'CZ' and pnum =='42': # Czechoslovakia
return True
return code.upper() in phonenumbers.region_codes_for_country_code(int(pnum, 10))
def extract_known_codepages(lines):
"""
Extract known codepages from the CODEPAGES comment block in country.asm.
Returns:
set: Set of known valid codepage numbers as strings
"""
known_codepages = set()
in_codepages_block = False
for line in lines:
stripped = line.strip()
# Start of CODEPAGES block
if stripped == '; CODEPAGES:':
in_codepages_block = True
continue
# End of block (next section starts with ; ==)
if in_codepages_block and stripped.startswith('; =='):
break
# Parse codepage numbers from comment lines
if in_codepages_block and stripped.startswith(';'):
# Match patterns like "437 = US/OEM" or "30033 = Bulgarian MIK"
for match in re.finditer(r'\b(\d+)\s*=\s*\w', stripped):
known_codepages.add(match.group(1))
return known_codepages
def check_master(lines, known_codepages):
"""
Validates COUNTRY, OLD_COUNTRY, COUNTRY_LCASE, COUNTRY_DBCS, and COUNTRY_ML macro invocations in NASM assembly.
Checks:
- Country codes are valid ISO3166-1-A2 (extracted from country.asm comments)
- Country codes match international phone prefixes
Returns:
tuple: (errors, num_found, obsolete_entries_found)
"""
errors = 0
num_found = 0
obsolete_entries_found = 0
# Build country map from comments in country.asm
# Format: ; 1 = United States (US) 2 = Canada (CA)
country_map = {}
comment_country_re = re.compile(r"(\d+)\s*=\s*[^()]+\(([A-Z]{2})\)")
for line in lines:
if line.strip().startswith(';'):
for match in comment_country_re.finditer(line):
num_code, alpha2 = match.groups()
country_map[num_code] = alpha2
# COUNTRY 1, 437, ...
# OLD_COUNTRY 38, 852, ...
# COUNTRY_LCASE 7, 808, ...
# COUNTRY_DBCS 81, 932, ...
country_re = r"^(OLD_)?COUNTRY(?:_LCASE|_DBCS)?\s+(\d+)\s*,\s*(\d+)"
# COUNTRY_ML 32, 0, 850, ...
country_ml_re = r"^(OLD_)?COUNTRY_ML\s+(\d+)\s*,\s*(\d+)\s*,\s*(\d+)"
for lineNo, line in enumerate(lines, start=1):
# Strip comments and whitespace
line_clean = line.split(';')[0].strip()
# Check standard COUNTRY macros
country_match = re.match(country_re, line_clean)
if country_match:
is_old = country_match.group(1) == "OLD_"
numeric_country = country_match.group(2)
codepage = country_match.group(3)
if is_old:
obsolete_entries_found += 1
else:
num_found += 1
# Lookup alpha2 code
country_code = country_map.get(numeric_country)
if not country_code:
print(f"Line {lineNo}: Numeric country code {numeric_country} not found in country map")
errors += 1
continue
# Validate country code is ISO3166-1-A2
if not is_alpha2(country_code):
print(f"Line {lineNo}: Country ISO3166-1-A2 ({country_code}) invalid in '{line_clean}'")
errors += 1
continue
# Validate country code matches numeric country code
if not is_country(country_code, numeric_country):
print(f"Line {lineNo}: Country ISO3166-1-A2 ({country_code}) mismatch with International Phone Prefix ({numeric_country}) in '{line_clean}'")
errors += 1
continue
# validate codepage is at least within known set of codepages
if codepage and codepage not in known_codepages:
print(f"Line {lineNo}: New codepage found {codepage}, update CODEPAGES comment block in country.asm or correct country.asm with correct codepage if it was just a typo.")
errors += 1
continue
continue
# Check COUNTRY_ML
ml_match = re.match(country_ml_re, line_clean)
if ml_match:
is_old = ml_match.group(1) == "OLD_"
base_cc = ml_match.group(2)
ml_idx = ml_match.group(3)
codepage = ml_match.group(4)
# Compute extended country code: 40000 + (ml_idx * 1000) + base_cc
numeric_country = str(40000 + (int(ml_idx) * 1000) + int(base_cc))
if is_old:
obsolete_entries_found += 1
else:
num_found += 1
# For ML, we validate against the base country code for the alpha2 lookup
country_code = country_map.get(base_cc)
if not country_code:
print(f"Line {lineNo}: Base numeric country code {base_cc} not found in country map")
errors += 1
continue
# Validate country code is ISO3166-1-A2
if not is_alpha2(country_code):
print(f"Line {lineNo}: Country ISO3166-1-A2 ({country_code}) invalid in '{line_clean}'")
errors += 1
continue
# Validate country code matches numeric country code
# Note: for ML, we check the base country code against the alpha2
if not is_country(country_code, base_cc):
print(f"Line {lineNo}: Country ISO3166-1-A2 ({country_code}) mismatch with International Phone Prefix ({base_cc}) in '{line_clean}'")
errors += 1
continue
# verify ml_idx is within expected range, currently multi-language sets are 3 or 4 variations, so 0 to 2 or 0 to 3 idx
if not (0 <= int(ml_idx) <= 3):
print(f"Line {lineNo}: ml_idx ({ml_idx}) not in expected range of 0 to 3")
errors += 1
continue
# verify numeric_country is within expected range, 4XCCC
if not (40000 <= int(numeric_country) <= 43999):
print(f"Line {lineNo}: numeric_country ({numeric_country}) not in expected range")
errors += 1
continue
# verify base_cc <= 999 (higher country codes not supported for multilang usage).
if not (1 <= int(base_cc) <= 999):
print(f"Line {lineNo}: base_cc ({base_cc}) not in expected range")
errors += 1
continue
# validate codepage is at least within known set of codepages
if codepage and codepage not in known_codepages:
print(f"Line {lineNo}: New codepage found {codepage}, update CODEPAGES comment block in country.asm or correct country.asm with correct codepage if it was just a typo.")
errors += 1
continue
continue
return (errors, num_found, obsolete_entries_found)
# Usage
lines = COUNTRY_ASM.read_text(encoding='utf-8').splitlines()
# gather codepage list from source comment instead of hard coding set
known_codepages = extract_known_codepages(lines)
# Country code validation
errors, entries_found, obsolete_entries_found = check_master(lines, known_codepages)
if errors:
print(f"Errors = {errors}")
sys.exit(2)
print(f"\n✅ Validation passed: {entries_found} entries found, with {obsolete_entries_found} obsolete entries; {len(known_codepages)} codepages")