33import re
44import shutil
55import tempfile
6+ from hashlib import sha256
67
78from gettext import gettext as _
89from urllib .parse import quote , urlparse , urlunparse
3637metadata_files = []
3738
3839
39- def synchronize (remote_pk , repository_pk , mirror , url = None ):
40+ def _get_sha256 (file_path ):
41+ """Compute the SHA256 hex digest of a file."""
42+ with open (file_path , "rb" ) as f :
43+ return sha256 (f .read ()).hexdigest ()
44+
45+
46+ def _should_optimize_sync (sync_details , last_sync_details ):
47+ """
48+ Check whether the sync can be skipped by comparing with the previous sync.
49+
50+ Args:
51+ sync_details (dict): Details about the current sync configuration.
52+ last_sync_details (dict): Details about the previous sync configuration.
53+
54+ Returns:
55+ bool: True if sync can be skipped; False otherwise.
56+
57+ """
58+ if not last_sync_details :
59+ return False
60+
61+ # If switching to immediate download, we may need to download content
62+ if (
63+ last_sync_details .get ("download_policy" ) != "immediate"
64+ and sync_details ["download_policy" ] == "immediate"
65+ ):
66+ return False
67+
68+ # If switching to mirror mode, we need to create a publication
69+ if not last_sync_details .get ("mirror" ) and sync_details ["mirror" ]:
70+ return False
71+
72+ if last_sync_details .get ("url" ) != sync_details ["url" ]:
73+ return False
74+
75+ if last_sync_details .get ("most_recent_version" ) != sync_details ["most_recent_version" ]:
76+ return False
77+
78+ if last_sync_details .get ("manifest_checksum" ) != sync_details ["manifest_checksum" ]:
79+ return False
80+
81+ return True
82+
83+
84+ def synchronize (remote_pk , repository_pk , mirror , optimize = False , url = None , ** kwargs ):
4085 """
4186 Sync content from the remote repository.
4287
@@ -46,6 +91,7 @@ def synchronize(remote_pk, repository_pk, mirror, url=None):
4691 remote_pk (str): The remote PK.
4792 repository_pk (str): The repository PK.
4893 mirror (bool): True for mirror mode, False for additive.
94+ optimize (bool): Whether to skip sync if nothing has changed.
4995 url (str): The url to synchronize. If omitted, the url of the remote is used.
5096
5197 Raises:
@@ -67,10 +113,42 @@ def synchronize(remote_pk, repository_pk, mirror, url=None):
67113 for stage in old_pipeline_stages (new_version )
68114 if not isinstance (stage , (RemoteArtifactSaver ))
69115 ]
116+ rv = dv .create ()
70117 else :
71- first_stage = FileFirstStage (remote , url )
118+ sync_url = url or remote .url
119+ version = repository .latest_version ()
120+
121+ # Download the manifest to compute its checksum for optimization
122+ downloader = remote .get_downloader (url = sync_url )
123+ manifest_result = downloader .fetch ()
124+
125+ sync_details = {
126+ "url" : remote .url ,
127+ "download_policy" : remote .policy ,
128+ "mirror" : mirror ,
129+ "most_recent_version" : version .number ,
130+ "manifest_checksum" : _get_sha256 (manifest_result .path ),
131+ }
132+
133+ if optimize and _should_optimize_sync (sync_details , repository .last_sync_details ):
134+ with ProgressReport (
135+ message = "Skipping Sync (no change from previous sync)" ,
136+ code = "sync.was_skipped" ,
137+ ) as pb :
138+ pb .total = 1
139+ pb .done = 1
140+ return
141+
142+ first_stage = FileFirstStage (remote , url , manifest_path = manifest_result .path )
72143 dv = DeclarativeVersion (first_stage , repository , mirror = mirror , acs = True )
73- rv = dv .create ()
144+ rv = dv .create ()
145+
146+ # Update last_sync_details after sync
147+ if rv :
148+ sync_details ["most_recent_version" ] = rv .number
149+ repository .last_sync_details = sync_details
150+ repository .save ()
151+
74152 if rv and mirror :
75153 # TODO: this is awful, we really should rewrite the DeclarativeVersion API to
76154 # accomodate this use case
@@ -98,18 +176,21 @@ class FileFirstStage(Stage):
98176 The first stage of a pulp_file sync pipeline.
99177 """
100178
101- def __init__ (self , remote , url ):
179+ def __init__ (self , remote , url , manifest_path = None ):
102180 """
103181 The first stage of a pulp_file sync pipeline.
104182
105183 Args:
106184 remote (FileRemote): The remote data to be used when syncing
107185 url (str): The base url of custom remote
186+ manifest_path (str): Path to an already-downloaded manifest file. If provided,
187+ the manifest will not be downloaded again.
108188
109189 """
110190 super ().__init__ ()
111191 self .remote = remote
112192 self .url = url if url else remote .url
193+ self .manifest_path = manifest_path
113194
114195 async def run (self ):
115196 """
@@ -123,15 +204,19 @@ async def run(self):
123204 ) as pb :
124205 parsed_url = urlparse (self .url )
125206 root_dir = os .path .dirname (parsed_url .path )
126- downloader = self .remote .get_downloader (url = self .url )
127- result = await downloader .run ()
207+ if self .manifest_path :
208+ result_path = self .manifest_path
209+ else :
210+ downloader = self .remote .get_downloader (url = self .url )
211+ result = await downloader .run ()
212+ result_path = result .path
128213 await pb .aincrement ()
129- metadata_files .append ((result . path , self .url .split ("/" )[- 1 ]))
214+ metadata_files .append ((result_path , self .url .split ("/" )[- 1 ]))
130215
131216 async with ProgressReport (
132217 message = "Parsing Metadata Lines" , code = "sync.parsing.metadata"
133218 ) as pb :
134- manifest = Manifest (result . path )
219+ manifest = Manifest (result_path )
135220 entries = list (manifest .read ())
136221
137222 pb .total = len (entries )
0 commit comments