Skip to content

Commit 5aec296

Browse files
POC 1 (#1)
* WIP * Implement nested_zip_loader * Setup GHA * Add pytest-cov * Disable code-coverage action (for now)
1 parent 93c9624 commit 5aec296

File tree

8 files changed

+429
-52
lines changed

8 files changed

+429
-52
lines changed

.github/workflows/pull_request.yml

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
name: Pull Request
2+
on:
3+
pull_request
4+
5+
jobs:
6+
build:
7+
permissions:
8+
pull-requests: write
9+
runs-on: ubuntu-latest
10+
11+
steps:
12+
- uses: actions/checkout@v4
13+
14+
- uses: actions/setup-python@v5
15+
with:
16+
python-version-file: pyproject.toml
17+
18+
- uses: BrandonLWhite/pipx-install-action@v1.0.1
19+
20+
- run: poetry install
21+
- run: poe test
22+
- run: poetry build
23+
24+
# - uses: irongut/CodeCoverageSummary@v1.3.0
25+
# with:
26+
# filename: coverage.xml
27+
# badge: true
28+
# format: markdown
29+
# output: both
30+
31+
# - uses: marocchino/sticky-pull-request-comment@v2
32+
# with:
33+
# path: code-coverage-results.md

.github/workflows/release.yml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
name: Release
2+
on:
3+
release:
4+
types: [created]
5+
6+
jobs:
7+
build:
8+
permissions:
9+
pull-requests: write
10+
id-token: write # Needed for pypi trusted publishing
11+
runs-on: ubuntu-latest
12+
13+
steps:
14+
- uses: actions/checkout@v4
15+
16+
- uses: actions/setup-python@v5
17+
with:
18+
python-version-file: pyproject.toml
19+
20+
- uses: BrandonLWhite/pipx-install-action@v1.0.1
21+
22+
- run: poetry install
23+
- run: poe test
24+
- run: poetry version ${{ github.ref_name }}
25+
- run: poetry build
26+
27+
- name: Publish distribution 📦 to PyPI
28+
uses: pypa/gh-action-pypi-publish@release/v1

package_python_function/main.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import argparse
2+
from pathlib import Path
3+
4+
from .packager import Packager
5+
6+
7+
def main() -> None:
8+
args = parse_args()
9+
project_path = Path(args.project).resolve()
10+
venv_path = Path(args.venv).resolve()
11+
output_path = Path(args.output).resolve()
12+
packager = Packager(venv_path, project_path, output_path)
13+
packager.package()
14+
15+
16+
def parse_args() -> argparse.Namespace:
17+
arg_parser = argparse.ArgumentParser()
18+
arg_parser.add_argument("venv", type=str)
19+
arg_parser.add_argument("--project", type=str, default='pyproject.toml')
20+
arg_parser.add_argument("--output", type=str, default='.')
21+
return arg_parser.parse_args()
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# AWS imposes a 10 second limit on the INIT sequence of a Lambda function. If this time limit is reached, the process
2+
# is terminated and the INIT is performed again as part of the function's billable invocation.
3+
# Reference: https://docs.aws.amazon.com/lambda/latest/dg/lambda-runtime-environment.html
4+
#
5+
# For this reason, we can be left with an incomplete extraction and so care is taken to avoid inadverently using it.
6+
#
7+
# From https://docs.python.org/3/reference/import.html
8+
# "The module will exist in sys.modules before the loader executes the module code. This is crucial because the module
9+
# code may (directly or indirectly) import itself"
10+
11+
# TODO: Inspired by serverless-python-requirements.
12+
13+
def load_nested_zip() -> None:
14+
from pathlib import Path
15+
import sys
16+
import tempfile
17+
import importlib
18+
19+
temp_path = Path(tempfile.gettempdir())
20+
21+
target_package_path = temp_path / "package-python-function"
22+
23+
if not target_package_path.exists():
24+
import zipfile
25+
import shutil
26+
import os
27+
28+
staging_package_path = temp_path / ".stage.package-python-function"
29+
30+
# TODO BW: Work this out.
31+
if staging_package_path.exists():
32+
shutil.rmtree(str(staging_package_path))
33+
34+
nested_zip_path = Path(__file__).parent / '.requirements.zip'
35+
36+
zipfile.ZipFile(str(nested_zip_path), 'r').extractall(str(staging_package_path))
37+
os.rename(str(staging_package_path), str(target_package_path)) # Atomic -- TODO BW DOCME
38+
39+
# TODO BW: Update this comment
40+
# We want our path to look like [working_dir, serverless_requirements, ...]
41+
sys.path.insert(1, target_package_path)
42+
importlib.reload(sys.modules[__name__])
43+
44+
load_nested_zip()
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
from pathlib import Path
2+
from tempfile import NamedTemporaryFile
3+
import zipfile
4+
import shutil
5+
6+
from .python_project import PythonProject
7+
8+
9+
class Packager:
10+
AWS_LAMBDA_MAX_UNZIP_SIZE = 262144000
11+
12+
def __init__(self, venv_path: Path, project_path: Path, output_path: Path):
13+
self.project = PythonProject(project_path)
14+
self.venv_path = venv_path
15+
self.output_path = output_path
16+
self._uncompressed_bytes = 0
17+
18+
@property
19+
def output_file_path(self) -> Path:
20+
if self.output_path.is_dir():
21+
return self.output_path / f'{self.project.name}.zip'
22+
return self.output_path
23+
24+
@property
25+
def input_path(self) -> Path:
26+
python_paths = list((self.venv_path / 'lib').glob('python*'))
27+
if not python_paths:
28+
raise Exception("input_path")
29+
return python_paths[0] / 'site-packages'
30+
31+
def package(self) -> None:
32+
print("Packaging:", self.project.path)
33+
print("Output:", self.output_file_path)
34+
print("Input:", self.input_path)
35+
print("Entrypoint Package name:", self.project.entrypoint_package_name)
36+
37+
with NamedTemporaryFile() as dependencies_zip:
38+
self.zip_all_dependencies(Path(dependencies_zip.name))
39+
40+
def zip_all_dependencies(self, target_path: Path) -> None:
41+
print(f"Zipping to {target_path} ...")
42+
43+
with zipfile.ZipFile(target_path, 'w', zipfile.ZIP_DEFLATED) as zip_file:
44+
def zip_dir(path: Path) -> None:
45+
for item in path.iterdir():
46+
if item.is_dir():
47+
zip_dir(item)
48+
else:
49+
self._uncompressed_bytes += item.stat().st_size
50+
zip_file.write(item, item.relative_to(self.input_path))
51+
52+
zip_dir(self.input_path)
53+
54+
compressed_bytes = target_path.stat().st_size
55+
56+
print(f"Uncompressed size: {self._uncompressed_bytes:,} bytes")
57+
print(f"Compressed size: {compressed_bytes:,} bytes")
58+
59+
if self._uncompressed_bytes > self.AWS_LAMBDA_MAX_UNZIP_SIZE:
60+
print(f"The uncompressed size of the ZIP file is greater than the AWS Lambda limit of {self.AWS_LAMBDA_MAX_UNZIP_SIZE:,} bytes.")
61+
if(compressed_bytes < self.AWS_LAMBDA_MAX_UNZIP_SIZE):
62+
print(f"The compressed size ({compressed_bytes:,}) is less than the AWS limit, so the nested-zip strategy will be used.")
63+
self.generate_nested_zip(target_path)
64+
else:
65+
print(f"TODO Error. The unzipped size it too large for AWS Lambda.")
66+
else:
67+
shutil.copy(str(target_path), str(self.output_file_path))
68+
69+
def generate_nested_zip(self, inner_zip_path: Path) -> None:
70+
with zipfile.ZipFile(self.output_file_path, 'w') as outer_zip_file:
71+
entrypoint_dir = Path(self.project.entrypoint_package_name)
72+
outer_zip_file.write(
73+
inner_zip_path,
74+
arcname=str(entrypoint_dir / ".dependencies.zip"),
75+
compresslevel=zipfile.ZIP_STORED
76+
)
77+
outer_zip_file.writestr(
78+
str(entrypoint_dir / "__init__.py"),
79+
Path(__file__).parent.joinpath("nested_zip_loader.py").read_text(),
80+
compresslevel=zipfile.ZIP_DEFLATED
81+
)
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
from functools import cached_property
2+
from pathlib import Path
3+
from typing import Optional
4+
import tomllib
5+
6+
7+
class PythonProject:
8+
def __init__(self, path: Path):
9+
self.path = path
10+
self.toml = tomllib.loads(path.read_text())
11+
12+
@cached_property
13+
def name(self) -> str:
14+
return self.find_value((
15+
('project', 'name'),
16+
('tool', 'poetry', 'name'),
17+
))
18+
19+
@cached_property
20+
def entrypoint_package_name(self) -> str:
21+
"""
22+
The subdirectory name in the source virtual environment's site-packages that contains the function's entrypoint
23+
code.
24+
"""
25+
# TODO : Parse out the project's package dir(s). Use the first one if there are multiple.
26+
return self.name.replace('-', '_')
27+
28+
def find_value(self, paths: tuple[tuple[str]]) -> str:
29+
for path in paths:
30+
value = self.get_value(path)
31+
if value is not None:
32+
return value
33+
raise Exception("TODO Exception find_value")
34+
35+
def get_value(self, path: tuple[str]) -> Optional[str]:
36+
node = self.toml
37+
for name in path:
38+
node = node.get(name)
39+
if node is None:
40+
return None
41+
return node

0 commit comments

Comments
 (0)