66
77import filecmp
88import json
9+ import logging
910import re
1011import tarfile
1112import tempfile
1213from os import PathLike
1314from pathlib import Path
14- from typing import Optional
15+ from typing import Any , Optional
16+
17+ import patoolib
1518
1619
1720class Comparator (object ):
@@ -35,12 +38,18 @@ class Comparator(object):
3538 r"/etc/shadow" ,
3639 r"/etc/update-motd\.d/05-logo" ,
3740 r"/var/lib/apt/lists/packages\.gardenlinux\.io_gardenlinux_dists_[0-9]*\.[0-9]*\.[0-9]*_.*" ,
38- r"/var/lib/apt/lists/packages\.gardenlinux\.io_gardenlinux_dists_[0-9]*\.[0-9]*\.[0-9]*_main_binary-(arm64|amd64)_Packages " ,
39- r"/efi/loader/entries/Default-[0-9]*\.[0-9]*\.[0-9]*-(cloud-)?(arm64|amd64) \.conf" ,
40- r"/efi/Default/[0-9]*\.[0-9]*\.[0-9]*-(cloud-)?(arm64|amd64) /initrd" ,
41- r"/boot/initrd\.img-[0-9]*\.[0-9]*\.[0-9]*-(cloud-)?(arm64|amd64) " ,
41+ r"/var/lib/apt/lists/packages\.gardenlinux\.io_gardenlinux_dists_[0-9]*\.[0-9]*\.[0-9]*_main_binary-ARCH_Packages " ,
42+ r"/efi/loader/entries/Default-[0-9]*\.[0-9]*\.[0-9]*-(cloud-)?ARCH \.conf" ,
43+ r"/efi/Default/[0-9]*\.[0-9]*\.[0-9]*-(cloud-)?ARCH /initrd" ,
44+ r"/boot/initrd\.img-[0-9]*\.[0-9]*\.[0-9]*-(cloud-)?ARCH " ,
4245 ]
4346
47+ _cname = re .compile (
48+ r"[a-zA-Z0-9]+([\\_\\-][a-zA-Z0-9]+)*-([0-9.]+|local)-([a-f0-9]{8}|today)"
49+ )
50+
51+ _arch = re .compile (r"(arm64|amd64)" )
52+
4453 def __init__ (
4554 self , nightly : bool = False , whitelist : list [str ] = _default_whitelist
4655 ):
@@ -56,6 +65,10 @@ def __init__(
5665 if nightly :
5766 self .whitelist += self ._nightly_whitelist
5867
68+ # Mute INFO logs from patool
69+ patool_logger = logging .getLogger ("patool" )
70+ patool_logger .setLevel ("WARNING" )
71+
5972 @staticmethod
6073 def _unpack (file : PathLike [str ]) -> tempfile .TemporaryDirectory [str ]:
6174 """
@@ -117,61 +130,128 @@ def _unpack(file: PathLike[str]) -> tempfile.TemporaryDirectory[str]:
117130 except tarfile .TarError as e :
118131 print (f"Skipping { member .name } due to error: { e } " )
119132 else :
120- with tarfile .open (file , "r" ) as tar :
121- tar .extractall (
122- path = output_dir .name ,
123- filter = "fully_trusted" ,
124- members = tar .getmembers (),
125- )
133+ patoolib .extract_archive (str (file ), outdir = output_dir .name , verbosity = - 2 )
126134
127135 return output_dir
128136
129137 def _diff_files (
130- self , cmp : filecmp .dircmp [str ], left_root : Optional [Path ] = None
131- ) -> list [str ]:
138+ self ,
139+ cmp : filecmp .dircmp [str ],
140+ left_root : Optional [Path ] = None ,
141+ right_root : Optional [Path ] = None ,
142+ ) -> dict [str , tuple [Optional [str ], Optional [str ]]]:
132143 """
133144 Recursively compare files
134145
135- :param cmp: Dircmp to recursively compare
136- :param left_root: Left root to obtain the archive relative path
146+ :param cmp: Dircmp to recursively compare
147+ :param left_root: Left root to obtain the archive relative path
137148
138- :return: list[Path] List of paths with different content
149+ :return: dict[str, tuple[Optional[str], Optional[str]]] Dict with general name, left name and right name of files with different content
139150 :since: 1.0.0
140151 """
141152
142- result = []
153+ # {general name: (actual name left, actual name right)}
154+ result : dict [str , tuple [Optional [str ], Optional [str ]]] = {}
143155 if not left_root :
144156 left_root = Path (cmp .left )
157+ if not right_root :
158+ right_root = Path (cmp .right )
159+ for name in cmp .left_only :
160+ if not (
161+ name .endswith (".log" )
162+ and Path (cmp .left ).joinpath (name .rstrip (".log" )).is_file ()
163+ ):
164+ actual_name = f"/{ Path (cmp .left ).relative_to (left_root ).joinpath (name )} "
165+ general_name = self ._arch .sub (
166+ "ARCH" , self ._cname .sub ("CNAME" , actual_name )
167+ )
168+ result [general_name ] = (actual_name , None )
169+ for name in cmp .right_only :
170+ if not (
171+ name .endswith (".log" )
172+ and Path (cmp .right ).joinpath (name .rstrip (".log" )).is_file ()
173+ ):
174+ actual_name = (
175+ f"/{ Path (cmp .right ).relative_to (right_root ).joinpath (name )} "
176+ )
177+ general_name = self ._arch .sub (
178+ "ARCH" , self ._cname .sub ("CNAME" , actual_name )
179+ )
180+ if general_name not in result :
181+ result [general_name ] = (None , actual_name )
182+ else :
183+ result [general_name ] = (result [general_name ][0 ], actual_name )
145184 for name in cmp .diff_files :
146- result .append (f"/{ Path (cmp .left ).relative_to (left_root ).joinpath (name )} " )
185+ # Ignore *.log files as the timestamp differs always
186+ if not (
187+ name .endswith (".log" )
188+ and Path (cmp .left ).joinpath (name .rstrip (".log" )).is_file ()
189+ ):
190+ actual_name = f"/{ Path (cmp .left ).relative_to (left_root ).joinpath (name )} "
191+ general_name = self ._arch .sub (
192+ "ARCH" , self ._cname .sub ("CNAME" , actual_name )
193+ )
194+
195+ result [general_name ] = (actual_name , actual_name )
196+
147197 for sub_cmp in cmp .subdirs .values ():
148- result += self ._diff_files (sub_cmp , left_root = left_root )
198+ result |= self ._diff_files (
199+ sub_cmp , left_root = left_root , right_root = right_root
200+ )
149201 return result
150202
151- def generate (self , a : PathLike [str ], b : PathLike [str ]) -> tuple [list [str ], bool ]:
203+ def generate (
204+ self , a : PathLike [str ], b : PathLike [str ]
205+ ) -> tuple [dict [str , Any ], bool ]:
152206 """
153207 Compare two .tar/.oci images with each other
154208
155209 :param a: First .tar/.oci file
156210 :param b: Second .tar/.oci file
157211
158- :return: list[Path ], bool Filtered list of paths with different content and flag indicating if whitelist was applied
212+ :return: dict[str, Any ], bool Filtered recursive dict of paths with different content and flag indicating if whitelist was applied
159213 :since: 1.0.0
160214 """
161215
162216 if filecmp .cmp (a , b , shallow = False ):
163- return [] , False
217+ return {} , False
164218
165219 with self ._unpack (a ) as unpacked_a , self ._unpack (b ) as unpacked_b :
166220 cmp = filecmp .dircmp (unpacked_a , unpacked_b , shallow = False )
167221
168222 diff_files = self ._diff_files (cmp )
169223
170- filtered = [
171- file
172- for file in diff_files
173- if not any (re .match (pattern , file ) for pattern in self .whitelist )
174- ]
175- whitelist = len (diff_files ) != len (filtered )
176-
177- return filtered , whitelist
224+ filtered : dict [tuple [str , Optional [str ], Optional [str ]], Any ] = {
225+ (
226+ general_name ,
227+ diff_files [general_name ][0 ],
228+ diff_files [general_name ][1 ],
229+ ): {}
230+ for general_name in diff_files
231+ if not any (
232+ re .match (pattern , general_name ) for pattern in self .whitelist
233+ )
234+ }
235+ whitelist = len (diff_files ) != len (filtered )
236+
237+ result : dict [str , Any ] = {}
238+ for general_name , left_name , right_name in filtered :
239+ result [general_name ] = {}
240+ if left_name and right_name :
241+ file_a = Path (unpacked_a ).joinpath (left_name [1 :])
242+ file_b = Path (unpacked_b ).joinpath (right_name [1 :])
243+ if (
244+ file_a .is_file ()
245+ and file_b .is_file ()
246+ and patoolib .is_archive (file_a )
247+ and patoolib .is_archive (file_b )
248+ ):
249+ filtered_rec , whitelist_rec = self .generate (file_a , file_b )
250+ whitelist = whitelist or whitelist_rec
251+ if filtered_rec != {}:
252+ result [general_name ] = filtered_rec
253+ else :
254+ # Remove if no files found in an archive to not count different timestamps inside the archives as a difference
255+ del result [general_name ]
256+
257+ return result , whitelist
0 commit comments