Skip to content

Commit 4715faf

Browse files
authored
[INFRA-6259] feat: implement reproducible zip (#10)f
Alters the zip method to reset all file permissions and timestamps of the files to be zipped, prior to zipping. The file modification time respects $SOURCE_DATE_EPOC (according to the reproducible builds spec[1]), and defaults to 1/1/1980. The file permissions are also updated to 644, to enforce standardization. More information on reproducible builds for zip files can be found here[2]. [1]: https://reproducible-builds.org/docs/timestamps/ [2]: https://reproducible-builds.org/docs/archives/
1 parent a89f3ee commit 4715faf

File tree

3 files changed

+57
-13
lines changed

3 files changed

+57
-13
lines changed

package_python_function/packager.py

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,21 @@
1+
from __future__ import annotations
2+
3+
import logging
4+
import os
5+
import shutil
6+
import time
7+
import zipfile
18
from pathlib import Path
29
from tempfile import NamedTemporaryFile
3-
import zipfile
4-
import shutil
5-
import logging
10+
from typing import TYPE_CHECKING
611

712
from .python_project import PythonProject
813

14+
if TYPE_CHECKING:
15+
from typing import Tuple
916

1017
logger = logging.getLogger(__name__)
1118

12-
1319
class Packager:
1420
AWS_LAMBDA_MAX_UNZIP_SIZE = 262144000
1521

@@ -40,14 +46,34 @@ def package(self) -> None:
4046
def zip_all_dependencies(self, target_path: Path) -> None:
4147
logger.info(f"Zipping to {target_path}...")
4248

43-
with zipfile.ZipFile(target_path, 'w', zipfile.ZIP_DEFLATED) as zip_file:
49+
def date_time() -> Tuple[int, int, int, int, int, int]:
50+
"""Returns date_time value used to force overwrite on all ZipInfo objects. Defaults to
51+
1980-01-01 00:00:00. You can set this with the environment variable SOURCE_DATE_EPOCH as an
52+
integer value representing seconds since Epoch.
53+
"""
54+
source_date_epoch = os.environ.get("SOURCE_DATE_EPOCH", None)
55+
if source_date_epoch is not None:
56+
return time.gmtime(int(source_date_epoch))[:6]
57+
return (1980, 1, 1, 0, 0, 0)
58+
59+
with zipfile.ZipFile(target_path, "w", zipfile.ZIP_DEFLATED) as zip_file:
60+
4461
def zip_dir(path: Path) -> None:
4562
for item in path.iterdir():
4663
if item.is_dir():
4764
zip_dir(item)
4865
else:
66+
zinfo = zipfile.ZipInfo.from_file(
67+
item, item.relative_to(self.input_path)
68+
)
69+
zinfo.date_time = date_time()
70+
zinfo.external_attr = 0o644 << 16
4971
self._uncompressed_bytes += item.stat().st_size
50-
zip_file.write(item, item.relative_to(self.input_path))
72+
with (
73+
open(item, "rb") as src,
74+
zip_file.open(zinfo, "w") as dest,
75+
):
76+
shutil.copyfileobj(src, dest, 1024 * 8)
5177

5278
zip_dir(self.input_path)
5379

@@ -61,7 +87,7 @@ def zip_dir(path: Path) -> None:
6187
logger.info(f"The compressed size ({compressed_bytes:,}) is less than the AWS limit, so the nested-zip strategy will be used.")
6288
self.generate_nested_zip(target_path)
6389
else:
64-
print(f"TODO Error. The unzipped size it too large for AWS Lambda.")
90+
print("TODO Error. The unzipped size it too large for AWS Lambda.")
6591
else:
6692
logger.info(f"Copying '{target_path}' to '{self.output_file}'")
6793
shutil.copy(str(target_path), str(self.output_file))
@@ -80,4 +106,4 @@ def generate_nested_zip(self, inner_zip_path: Path) -> None:
80106
str(entrypoint_dir / "__init__.py"),
81107
Path(__file__).parent.joinpath("nested_zip_loader.py").read_text(),
82108
compresslevel=zipfile.ZIP_DEFLATED
83-
)
109+
)

tests/projects/project-1/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ description = "project-1"
55
authors = [{ name = "Brandon White", email = "brandonlwhite@gmail.com" }]
66
license = "MIT"
77
readme = "README.md"
8-
requires-python = "^3.10"
8+
requires-python = ">=3.10,<4.0"
99

1010

1111
[build-system]

tests/test_package_python_function.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
1-
from pathlib import Path
21
import sys
3-
from package_python_function.main import main
2+
import zipfile
3+
from pathlib import Path
44

5+
from package_python_function.main import main
56

67
PROJECTS_DIR_PATH = Path(__file__).parent / 'projects'
78

8-
99
def test_package_python_function(tmp_path: Path) -> None:
10+
EXPECTED_FILE_MODE = 0o644
11+
EXPECTED_FILE_DATE_TIME = (1980, 1, 1, 0, 0, 0)
12+
1013
project_file_path = PROJECTS_DIR_PATH / 'project-1' / 'pyproject.toml'
1114

1215
venv_dir_path = tmp_path / 'venv'
@@ -34,4 +37,19 @@ def test_package_python_function(tmp_path: Path) -> None:
3437
]
3538
main()
3639

37-
assert (output_dir_path / 'project_1.zip').exists()
40+
zip_file = output_dir_path / "project_1.zip"
41+
assert zip_file.exists()
42+
43+
verify_dir = tmp_path / "verify"
44+
verify_dir.mkdir()
45+
with zipfile.ZipFile(zip_file, "r") as zip:
46+
zip.extractall(verify_dir)
47+
for file_info in zip.infolist():
48+
mode = (file_info.external_attr >> 16) & 0xFFFF
49+
assert mode == EXPECTED_FILE_MODE
50+
assert file_info.date_time == EXPECTED_FILE_DATE_TIME
51+
52+
assert (verify_dir / "project_1" / "__init__.py").exists()
53+
assert (verify_dir / "project_1" / "project1.py").exists()
54+
assert (verify_dir / "small_dependency" / "__init__.py").exists()
55+
assert (verify_dir / "small_dependency" / "small_dependency.py").exists()

0 commit comments

Comments
 (0)