Skip to content

Commit 7a9be72

Browse files
authored
replace path with elf/header checks (#62)
* replace path with elf/header checks * version update
1 parent 8d18b2d commit 7a9be72

3 files changed

Lines changed: 166 additions & 69 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ build-backend = "setuptools.build_meta"
99

1010
[project]
1111
name = "PyPcre"
12-
version = "0.2.8"
12+
version = "0.2.9"
1313
description = "Modern, GIL-friendly, Fast Python bindings for PCRE2 with auto caching and JIT of compiled patterns."
1414
readme = "README.md"
1515
requires-python = ">=3.9"

setup_utils.py

Lines changed: 121 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import platform
1010
import shlex
1111
import shutil
12+
import struct
1213
import subprocess
1314
import sys
1415
import sysconfig
@@ -870,43 +871,134 @@ def _linux_multiarch_dirs() -> list[str]:
870871
return mapping.get(arch, [])
871872

872873

873-
_KNOWN_MULTIARCH_TOKENS = {
874-
"x86_64-linux-gnu",
875-
"i386-linux-gnu",
876-
"i486-linux-gnu",
877-
"i586-linux-gnu",
878-
"i686-linux-gnu",
879-
"aarch64-linux-gnu",
880-
"arm-linux-gnueabihf",
881-
"powerpc64le-linux-gnu",
882-
"s390x-linux-gnu",
883-
}
874+
def _host_pointer_width() -> int:
875+
return struct.calcsize("P") * 8
884876

885-
_UNSUPPORTED_MULTIARCH_TOKENS = {
886-
"i386-linux-gnu",
887-
"i486-linux-gnu",
888-
"i586-linux-gnu",
889-
"i686-linux-gnu",
890-
"arm-linux-gnueabihf",
891-
}
877+
878+
_MACHO_MAGIC_32 = {0xFEEDFACE, 0xCEFAEDFE}
879+
_MACHO_MAGIC_64 = {0xFEEDFACF, 0xCFFAEDFE}
880+
_MACHO_FAT_MAGIC = {0xCAFEBABE, 0xBEBAFECA}
881+
_MACHO_FAT_MAGIC_64 = {0xCAFEBABF, 0xBFBAFECA}
882+
_MACHO_ABI64_FLAG = 0x01000000
883+
_MACHO_MAGIC_BYTES = {struct.pack(">I", value) for value in (_MACHO_MAGIC_32 | _MACHO_MAGIC_64 | _MACHO_FAT_MAGIC | _MACHO_FAT_MAGIC_64)}
884+
885+
886+
def _elf_class_bits(path: Path) -> int | None:
887+
try:
888+
with path.open("rb") as handle:
889+
header = handle.read(5)
890+
except OSError:
891+
return None
892+
if len(header) < 5 or header[:4] != b"\x7fELF":
893+
return None
894+
if header[4] == 1:
895+
return 32
896+
if header[4] == 2:
897+
return 64
898+
return None
899+
900+
901+
def _macho_class_bits(path: Path, host_bits: int) -> int | None:
902+
try:
903+
with path.open("rb") as handle:
904+
header = handle.read(8)
905+
if len(header) < 4:
906+
return None
907+
magic = struct.unpack(">I", header[:4])[0]
908+
if magic in _MACHO_MAGIC_32:
909+
return 32
910+
if magic in _MACHO_MAGIC_64:
911+
return 64
912+
if magic not in _MACHO_FAT_MAGIC and magic not in _MACHO_FAT_MAGIC_64:
913+
return None
914+
big_endian = magic in (0xCAFEBABE, 0xCAFEBABF)
915+
is_fat64 = magic in _MACHO_FAT_MAGIC_64
916+
endian = ">" if big_endian else "<"
917+
nfat_arch = struct.unpack(f"{endian}I", header[4:8])[0]
918+
arch_entry_size = 24 if is_fat64 else 20
919+
arch_data = handle.read(nfat_arch * arch_entry_size)
920+
if len(arch_data) < nfat_arch * arch_entry_size:
921+
return None
922+
for index in range(nfat_arch):
923+
offset = index * arch_entry_size
924+
cputype = struct.unpack(f"{endian}I", arch_data[offset : offset + 4])[0]
925+
bits = 64 if (cputype & _MACHO_ABI64_FLAG) else 32
926+
if bits == host_bits:
927+
return host_bits
928+
if nfat_arch > 0:
929+
first_type = struct.unpack(f"{endian}I", arch_data[0:4])[0]
930+
return 64 if (first_type & _MACHO_ABI64_FLAG) else 32
931+
return None
932+
except OSError:
933+
return None
934+
935+
936+
def _pe_class_bits(path: Path) -> int | None:
937+
try:
938+
with path.open("rb") as handle:
939+
mz_header = handle.read(64)
940+
if len(mz_header) < 64 or not mz_header.startswith(b"MZ"):
941+
return None
942+
e_lfanew = struct.unpack("<I", mz_header[0x3C:0x40])[0]
943+
handle.seek(e_lfanew)
944+
signature = handle.read(4)
945+
if signature != b"PE\x00\x00":
946+
return None
947+
file_header = handle.read(20)
948+
if len(file_header) < 20:
949+
return None
950+
optional_magic = handle.read(2)
951+
if len(optional_magic) < 2:
952+
return None
953+
magic_value = struct.unpack("<H", optional_magic)[0]
954+
if magic_value == 0x20B:
955+
return 64
956+
if magic_value == 0x10B:
957+
return 32
958+
return None
959+
except OSError:
960+
return None
961+
962+
963+
def _binary_matches_host(path: Path) -> bool:
964+
host_bits = _host_pointer_width()
965+
try:
966+
with path.open("rb") as handle:
967+
magic = handle.read(4)
968+
except OSError:
969+
return True
970+
if magic.startswith(b"\x7fELF"):
971+
bits = _elf_class_bits(path)
972+
elif magic in _MACHO_MAGIC_BYTES:
973+
bits = _macho_class_bits(path, host_bits)
974+
elif magic.startswith(b"MZ"):
975+
bits = _pe_class_bits(path)
976+
else:
977+
bits = None
978+
if bits is None:
979+
return True
980+
if bits != host_bits:
981+
print(f"Skipping lib (binary class mismatch): {path}")
982+
return False
983+
return True
892984

893985

894986
def _host_multiarch_names() -> set[str]:
895987
return set(_linux_multiarch_dirs())
896988

897989

898990
def _path_matches_host_multiarch(path: str, host_multiarch: set[str]) -> bool:
899-
lower = path.lower()
900-
for token in _UNSUPPORTED_MULTIARCH_TOKENS:
901-
if token in lower:
902-
print(f"Skipping lib: {lower}")
903-
return False
904-
if not host_multiarch:
905-
return True
906-
for token in _KNOWN_MULTIARCH_TOKENS:
907-
if token in lower and token not in host_multiarch:
908-
print(f"Skipping lib: {lower}")
909-
return False
991+
_ = host_multiarch # retained for signature compatibility
992+
path_obj = Path(path)
993+
if path_obj.is_file():
994+
return _binary_matches_host(path_obj)
995+
if path_obj.is_dir():
996+
try:
997+
candidate = _locate_library_file(path_obj)
998+
except RuntimeError:
999+
return True
1000+
if candidate is not None:
1001+
return _binary_matches_host(candidate)
9101002
return True
9111003

9121004

Lines changed: 44 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,60 @@
1-
import setup_utils
2-
1+
import struct
2+
import sys
3+
from pathlib import Path
34

4-
def test_filter_incompatible_multiarch_skips_foreign_arch(monkeypatch):
5-
monkeypatch.setattr(setup_utils, "_linux_multiarch_dirs", lambda: ["x86_64-linux-gnu"])
6-
paths = [
7-
"/usr/lib/x86_64-linux-gnu/libpcre2-8.so",
8-
"/usr/lib/i386-linux-gnu/libpcre2-8.so.0",
9-
"/opt/lib/libpcre2-8.so",
10-
]
5+
import setup_utils
116

12-
result = setup_utils.filter_incompatible_multiarch(paths)
137

14-
assert result == [
15-
"/usr/lib/x86_64-linux-gnu/libpcre2-8.so",
16-
"/opt/lib/libpcre2-8.so",
17-
]
8+
def _write_pe_binary(path: Path, optional_magic: int) -> None:
9+
e_lfanew = 0x80
10+
data = bytearray(b"MZ")
11+
if len(data) < 0x3C:
12+
data.extend(b"\x00" * (0x3C - len(data)))
13+
data.extend(struct.pack("<I", e_lfanew))
14+
if len(data) < e_lfanew:
15+
data.extend(b"\x00" * (e_lfanew - len(data)))
16+
data.extend(b"PE\x00\x00")
17+
data.extend(b"\x00" * 20)
18+
data.extend(struct.pack("<H", optional_magic))
19+
path.write_bytes(data)
1820

1921

20-
def test_filter_incompatible_multiarch_keeps_host_arch(monkeypatch):
21-
monkeypatch.setattr(setup_utils, "_linux_multiarch_dirs", lambda: ["aarch64-linux-gnu"])
22-
paths = [
23-
"/usr/lib/x86_64-linux-gnu/libpcre2-8.so",
24-
"/usr/lib/aarch64-linux-gnu/libpcre2-8.so",
25-
"/usr/lib/i386-linux-gnu/libpcre2-8.so.0",
26-
]
22+
def test_filter_incompatible_multiarch_filters_elf_32bit(monkeypatch, tmp_path):
23+
monkeypatch.setattr(setup_utils, "_linux_multiarch_dirs", lambda: ["x86_64-linux-gnu"])
24+
monkeypatch.setattr(setup_utils, "_host_pointer_width", lambda: 64)
25+
lib64 = tmp_path / "libpcre2-8.so"
26+
lib64.write_bytes(b"\x7fELF\x02" + b"\x00" * 16)
27+
lib32 = tmp_path / "libpcre2-8-compat.so"
28+
lib32.write_bytes(b"\x7fELF\x01" + b"\x00" * 16)
2729

28-
result = setup_utils.filter_incompatible_multiarch(paths)
30+
result = setup_utils.filter_incompatible_multiarch([str(lib64), str(lib32)])
2931

30-
assert result == ["/usr/lib/aarch64-linux-gnu/libpcre2-8.so"]
32+
assert result == [str(lib64)]
3133

3234

33-
def test_filter_incompatible_multiarch_drops_32bit_when_unknown_host(monkeypatch):
35+
def test_filter_incompatible_multiarch_filters_macho_32bit(monkeypatch, tmp_path):
36+
monkeypatch.setattr(sys, "platform", "darwin")
3437
monkeypatch.setattr(setup_utils, "_linux_multiarch_dirs", lambda: [])
35-
paths = [
36-
"/usr/lib/i386-linux-gnu/libpcre2-8.so.0",
37-
"/usr/lib/i686-linux-gnu/libpcre2-8.so.0",
38-
"/usr/lib/x86_64-linux-gnu/libpcre2-8.so",
39-
]
38+
monkeypatch.setattr(setup_utils, "_host_pointer_width", lambda: 64)
39+
macho64 = tmp_path / "libpcre2-8.dylib"
40+
macho32 = tmp_path / "libpcre2-8-compat.dylib"
41+
macho64.write_bytes(struct.pack(">I", 0xCFFAEDFE))
42+
macho32.write_bytes(struct.pack(">I", 0xCEFAEDFE))
4043

41-
result = setup_utils.filter_incompatible_multiarch(paths)
44+
result = setup_utils.filter_incompatible_multiarch([str(macho64), str(macho32)])
4245

43-
assert result == ["/usr/lib/x86_64-linux-gnu/libpcre2-8.so"]
46+
assert result == [str(macho64)]
4447

4548

46-
def test_filter_incompatible_multiarch_drops_32bit_arm(monkeypatch):
47-
monkeypatch.setattr(setup_utils, "_linux_multiarch_dirs", lambda: ["aarch64-linux-gnu"])
48-
paths = [
49-
"/usr/lib/arm-linux-gnueabihf/libpcre2-8.so.0",
50-
"/usr/lib/aarch64-linux-gnu/libpcre2-8.so",
51-
]
49+
def test_filter_incompatible_multiarch_filters_pe32(monkeypatch, tmp_path):
50+
monkeypatch.setattr(sys, "platform", "win32")
51+
monkeypatch.setattr(setup_utils, "_linux_multiarch_dirs", lambda: [])
52+
monkeypatch.setattr(setup_utils, "_host_pointer_width", lambda: 64)
53+
pe64 = tmp_path / "pcre2-8.dll"
54+
pe32 = tmp_path / "pcre2-8-compat.dll"
55+
_write_pe_binary(pe64, 0x20B)
56+
_write_pe_binary(pe32, 0x10B)
5257

53-
result = setup_utils.filter_incompatible_multiarch(paths)
58+
result = setup_utils.filter_incompatible_multiarch([str(pe64), str(pe32)])
5459

55-
assert result == ["/usr/lib/aarch64-linux-gnu/libpcre2-8.so"]
60+
assert result == [str(pe64)]

0 commit comments

Comments
 (0)