-
Notifications
You must be signed in to change notification settings - Fork 215
Expand file tree
/
Copy pathsync_db_to_json.py
More file actions
396 lines (332 loc) · 16.2 KB
/
sync_db_to_json.py
File metadata and controls
396 lines (332 loc) · 16.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
#!/usr/bin/env python3
"""
HWID Analysis Script v2 - Improved duplicate detection
This script analyzes HWID prefixes from device-db.sh and compares them with devices.json.
It updates devices.json with any missing HWID/boardname entries in the correct platform section.
Version 2 improves duplicate detection by checking for similar boardnames.
"""
import json
import re
import sys
import argparse
from pathlib import Path
# Board names to skip/ignore
SKIP_BOARDNAMES = {
'FIZZ',
'RAMMUS',
'CORAL',
'NAMI',
'SKYRIM'
}
def parse_device_db():
"""Parse device-db.sh to extract HWID prefixes and their information."""
device_db_file = Path("device-db.sh")
if not device_db_file.exists():
print("Error: device-db.sh not found")
return {}
devices = {}
with open(device_db_file, 'r') as f:
content = f.read()
# Extract DEVICE_DB entries
# Pattern to match entries like ["HWID*"]="description|cpu|override|flags|"
pattern = r'\["([^"]+)"\]="([^|]+)\|([^|]+)\|([^|]*)\|([^|]*)\|"'
matches = re.findall(pattern, content)
for hwid, description, cpu, override, flags in matches:
# Remove the * from HWID for comparison - use HWID as boardname, not override
clean_hwid = hwid.rstrip('*')
devices[clean_hwid] = {
'description': description.strip(),
'cpu': cpu.strip(),
'override': override.strip(),
'flags': flags.strip(),
'boardname': clean_hwid # Use HWID as boardname, not the override
}
return devices
def get_cpu_platform_mapping():
"""Map CPU codes to platform names used in devices.json."""
return {
'SNB': 'Intel Sandybridge/Ivybridge',
'IVB': 'Intel Sandybridge/Ivybridge',
'HSW': 'Intel Haswell',
'BDW': 'Intel Broadwell',
'BYT': 'Intel Baytrail',
'BSW': 'Intel Braswell',
'SKL': 'Intel Skylake',
'APL': 'Intel Apollolake',
'KBL': 'Intel Kabylake / Amberlake',
'GLK': 'Intel Geminilake',
'WHL': 'Intel Whiskeylake',
'CML': 'Intel Cometlake',
'JSL': 'Intel JasperLake',
'TGL': 'Intel TigerLake',
'ADL': 'Intel Alderlake',
'ADN': 'Intel Alderlake-N',
'MTL': 'Intel Meteorlake',
'STR': 'AMD Stoneyridge',
'PCO': 'AMD Picasso',
'CZN': 'AMD Cezanne',
'MDN': 'AMD Mendocino'
}
def load_devices_json(devices_json_path="devices.json"):
"""Load devices.json file."""
devices_json_file = Path(devices_json_path)
if not devices_json_file.exists():
print(f"Error: {devices_json_path} not found")
return {}
with open(devices_json_file, 'r') as f:
return json.load(f)
def normalize_boardname(boardname):
"""Normalize boardname for comparison by removing extra spaces and special chars."""
if not boardname:
return ""
# Remove extra spaces, split by / and take first part, remove special chars
normalized = re.sub(r'\s+', ' ', boardname.strip())
# Take first part if there's a / (like "AURON_PAINE / PAINE" -> "AURON_PAINE")
if '/' in normalized:
normalized = normalized.split('/')[0].strip()
# Remove special characters for comparison
normalized = re.sub(r'[^\w]', '', normalized.upper())
return normalized
def is_similar_description(desc1, desc2):
"""Check if two descriptions are similar by comparing first 12 characters."""
if not desc1 or not desc2:
return False
# Normalize descriptions for comparison
norm1 = desc1.strip().upper()
norm2 = desc2.strip().upper()
# Exact match
if norm1 == norm2:
return True
# Check first 12 characters if both strings are long enough
if len(norm1) >= 12 and len(norm2) >= 12:
return norm1[:12] == norm2[:12]
# If one is shorter than 12 chars, check if the shorter one matches the beginning of the longer one
shorter = norm1 if len(norm1) < len(norm2) else norm2
longer = norm2 if len(norm1) < len(norm2) else norm1
if len(shorter) >= 8: # Only compare if shorter is at least 8 chars
return longer.startswith(shorter)
return False
def find_similar_description(description, existing_descriptions):
"""Find if a description is similar to any existing description."""
for existing_desc in existing_descriptions:
if is_similar_description(description, existing_desc):
return existing_desc
return None
def find_existing_boardname(devices_json, boardname):
"""Check if a boardname already exists in any platform section with improved matching."""
target_normalized = normalize_boardname(boardname)
for platform, data in devices_json.items():
if 'devices' in data:
for device_entry in data['devices']:
existing_boardname = device_entry.get('boardname', '')
existing_normalized = normalize_boardname(existing_boardname)
# Check exact match first
if existing_boardname == boardname:
return platform, device_entry
# Check normalized match
if existing_normalized and existing_normalized == target_normalized:
return platform, device_entry
# Check if target is contained in existing with word boundary matching
# This handles cases like "AURON_PAINE" vs "AURON_PAINE / PAINE" but not "QUANDISO" vs "QUANDISO3602"
if existing_normalized and target_normalized in existing_normalized:
# Only match if it's a complete word match (not a substring within a longer word)
if (existing_normalized.startswith(target_normalized + '_') or
existing_normalized.startswith(target_normalized + '/') or
existing_normalized == target_normalized or
existing_normalized.endswith('_' + target_normalized) or
existing_normalized.endswith('/' + target_normalized)):
return platform, device_entry
# Check if existing is contained in target with word boundary matching
if target_normalized and existing_normalized in target_normalized:
# Only match if it's a complete word match (not a substring within a longer word)
if (target_normalized.startswith(existing_normalized + '_') or
target_normalized.startswith(existing_normalized + '/') or
target_normalized == existing_normalized or
target_normalized.endswith('_' + existing_normalized) or
target_normalized.endswith('/' + existing_normalized)):
return platform, device_entry
return None, None
def add_missing_device(devices_json, platform, boardname, descriptions, cpu_info):
"""Add a missing device entry to the appropriate platform section in alphabetical order."""
if platform not in devices_json:
print(f"Warning: Platform '{platform}' not found in devices.json")
return False
if 'devices' not in devices_json[platform]:
devices_json[platform]['devices'] = []
# Ensure descriptions is a list
if isinstance(descriptions, str):
descriptions = [descriptions]
# Check if device already exists
for device_entry in devices_json[platform]['devices']:
existing_boardname = device_entry.get('boardname', '')
if existing_boardname == boardname:
# Update description if missing or different
if not device_entry.get('device') or device_entry['device'] == ['']:
device_entry['device'] = descriptions
return True
return False
# Create new device entry
new_device = {
'device': descriptions,
'boardname': boardname
}
# Insert in alphabetical order by boardname
devices = devices_json[platform]['devices']
for i, existing_device in enumerate(devices):
existing_boardname = existing_device.get('boardname', '')
if boardname.lower() < existing_boardname.lower():
devices.insert(i, new_device)
return True
# If not inserted, append to end
devices.append(new_device)
return True
def analyze_and_update(devices_json_path="devices.json"):
"""Main analysis function."""
print(f"Analyzing HWID prefixes from device-db.sh...")
print(f"Using devices.json path: {devices_json_path}")
# Parse device database
device_db = parse_device_db()
if not device_db:
print("No devices found in device-db.sh")
return
print(f"Found {len(device_db)} HWID entries in device-db.sh")
# Load devices.json
devices_json = load_devices_json(devices_json_path)
if not devices_json:
print(f"No data found in {devices_json_path}")
return
# Get CPU to platform mapping
cpu_mapping = get_cpu_platform_mapping()
# Analyze each HWID from device-db.sh
missing_entries = []
updated_entries = []
skipped_duplicates = []
skipped_purism = []
# First pass: Group hyphenated variants by base HWID
grouped_devices = {}
for hwid, info in device_db.items():
# Skip Purism boards
if hwid.startswith('LIBREM'):
skipped_purism.append((hwid, info['description']))
continue
platform = cpu_mapping.get(info['cpu'])
if not platform:
print(f"Warning: Unknown CPU platform '{info['cpu']}' for HWID '{hwid}'")
continue
# Extract base HWID (part before hyphen or space, remove * suffix)
clean_hwid = hwid.rstrip('*')
# Handle spaces: LARS [DE] -> LARS
if ' ' in clean_hwid:
base_hwid = clean_hwid.split(' ')[0]
else:
# Handle hyphens: BUJIA-FWVA -> BUJIA
base_hwid = clean_hwid.split('-')[0]
# Group by base HWID and platform
key = (base_hwid, platform)
if key not in grouped_devices:
grouped_devices[key] = {
'base_hwid': base_hwid,
'platform': platform,
'descriptions': [],
'all_hwids': [],
'cpu': info['cpu'],
'flags': info['flags']
}
grouped_devices[key]['descriptions'].append(info['description'])
grouped_devices[key]['all_hwids'].append(hwid)
# Second pass: Process grouped devices
for (base_hwid, platform), group_info in grouped_devices.items():
# Skip board names in the skip list
if base_hwid in SKIP_BOARDNAMES:
print(f"Skipping boardname: {base_hwid}")
continue
# Combine descriptions, removing duplicates
unique_descriptions = list(dict.fromkeys(group_info['descriptions'])) # Preserve order, remove duplicates
# Check if base HWID exists in devices.json
existing_platform, existing_device = find_existing_boardname(devices_json, base_hwid)
if existing_device:
# Device already exists - merge any missing descriptions from database
existing_descriptions = existing_device.get('device', [])
if not existing_descriptions or existing_descriptions == ['']:
# No existing descriptions, use all from database
existing_device['device'] = unique_descriptions
updated_entries.append((base_hwid, f"{len(unique_descriptions)} variants", existing_device.get('boardname', base_hwid)))
print(f"Updated descriptions for {existing_device.get('boardname', base_hwid)}: {len(unique_descriptions)} variants")
else:
# Merge missing descriptions from database into existing ones, avoiding similar duplicates
truly_new_descriptions = []
for desc in unique_descriptions:
# Check if this description is similar to any existing one
similar_existing = find_similar_description(desc, existing_descriptions)
if not similar_existing:
# Not similar to any existing description, add it
truly_new_descriptions.append(desc)
else:
# Similar to existing description, skip it
print(f" Skipping similar description: '{desc}' (similar to existing: '{similar_existing}')")
if truly_new_descriptions:
# Combine existing descriptions with truly new ones
combined_descriptions = existing_descriptions + truly_new_descriptions
existing_device['device'] = combined_descriptions
updated_entries.append((base_hwid, f"added {len(truly_new_descriptions)} new descriptions", existing_device.get('boardname', base_hwid)))
print(f"Updated {existing_device.get('boardname', base_hwid)}: added {len(truly_new_descriptions)} new descriptions")
print(f" New descriptions: {', '.join(truly_new_descriptions[:2])}{'...' if len(truly_new_descriptions) > 2 else ''}")
else:
skipped_duplicates.append((base_hwid, f"{len(unique_descriptions)} variants", existing_device.get('boardname', base_hwid)))
print(f"Skipped duplicate: {base_hwid} (exists as {existing_device.get('boardname', base_hwid)})")
else:
# Add new device with combined descriptions
if add_missing_device(devices_json, platform, base_hwid, unique_descriptions, group_info):
missing_entries.append((base_hwid, f"{len(unique_descriptions)} variants", platform))
print(f"Added: {base_hwid} -> {len(unique_descriptions)} variants to {platform}")
print(f" Variants: {', '.join(group_info['all_hwids'])}")
print(f" Descriptions: {'; '.join(unique_descriptions[:2])}{'...' if len(unique_descriptions) > 2 else ''}")
# Save updated devices.json
if missing_entries or updated_entries:
with open(devices_json_path, 'w') as f:
json.dump(devices_json, f, indent=4)
print(f"\nSummary:")
print(f"Added {len(missing_entries)} missing entries")
print(f"Updated {len(updated_entries)} existing entries")
print(f"Skipped {len(skipped_duplicates)} duplicates")
print(f"Skipped {len(skipped_purism)} Purism boards")
if missing_entries:
print("\nMissing entries added:")
for hwid, desc, platform in missing_entries:
print(f" {hwid}: {desc} -> {platform}")
if updated_entries:
print("\nUpdated entries:")
for hwid, desc, existing in updated_entries:
print(f" {hwid} (as {existing}): {desc}")
if skipped_duplicates:
print("\nSkipped duplicates:")
for hwid, desc, existing in skipped_duplicates:
print(f" {hwid} (exists as {existing}): {desc}")
if skipped_purism:
print("\nSkipped Purism boards:")
for hwid, desc in skipped_purism:
print(f" {hwid}: {desc}")
else:
print("No updates needed - all HWID entries are already present in devices.json")
def main():
"""Main function with command line argument parsing."""
parser = argparse.ArgumentParser(
description="Analyze HWID prefixes from device-db.sh and update devices.json",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python3 analyze_hwid_json_v2.py # Use default devices.json
python3 analyze_hwid_json_v2.py my_devices.json # Use custom path
python3 analyze_hwid_json_v2.py /path/to/devices.json # Use absolute path
"""
)
parser.add_argument(
'devices_json_path',
nargs='?',
default='devices.json',
help='Path to devices.json file (default: devices.json)'
)
args = parser.parse_args()
analyze_and_update(args.devices_json_path)
if __name__ == "__main__":
main()