From 98a5f499777041ed9f20255236f1a90c27eb8afc Mon Sep 17 00:00:00 2001 From: Joachim Rosskopf Date: Mon, 25 May 2026 14:25:36 +0200 Subject: [PATCH] feat(self-packaging #66): fat / universal macOS binary support Before this change, `LocateFlapiSectionInBuffer` returned `nullopt` for any input whose magic wasn't `MH_MAGIC_64`. Universal binaries (`FAT_MAGIC` / `FAT_MAGIC_64`) -- the format produced by `lipo -create` and consumed by Homebrew-style formulas that ship both arches -- therefore couldn't be packed or self-inspected. A universal binary is a small big-endian fat header followed by N thin Mach-O slices at distinct file offsets. The `section_64.offset` read inside a slice is relative to the slice's base, not the fat file -- both `OverwriteFlapiSection` (write side) and `LocateBundleInRange` (read side) treat the returned offset as an absolute file offset, so a naive accept-fat patch would have written to the wrong byte address. This PR: - Adds `ReadU32BE` / `ReadU64BE` -- fat headers are big-endian on disk regardless of host endianness. - Adds `ParseFatHeader` (namespace-private). Walks `fat_arch[]` (20-byte records for `FAT_MAGIC/CIGAM`, 32-byte for the `_64` variants). Selection rule: first slice whose cputype matches the host arch (compile-time, via `__aarch64__` / `__x86_64__`), else the first slice. Rejects malformed `nfat_arch` (0 or > 64) and slice extents past EOF. - Splits `LocateFlapiSectionInBuffer` into an outer dispatch + an inner `LocateFlapiSectionAt(buffer, base)` overload. On fat input the outer call parses the fat header, picks a slice, and recurses via the inner overload with the slice's absolute offset as base. The inner overload adds base to whatever the load-cmd walker returns, so callers see an absolute file offset. - `IsMachOMagic` extended to recognise `FAT_MAGIC_64` / `FAT_CIGAM_64` in addition to the 32-bit fat variants it already accepted. - `OverwriteFlapiSection` is unchanged -- the new absolute-offset invariant makes its existing `seekp(file_offset)` land in the correct slice automatically. - Header doc updated to drop the "fat / universal not supported" caveat and document the slice-selection rule. Test fixture: new `BuildFatMachO(slices)` helper wraps any number of `BuildMachO64` outputs with a big-endian fat header + 4 KiB- aligned slice placement. Four new test cases (issue #66 acceptance): - single-slice fat: section located at the absolute offset (slice_offset + intra-slice section offset). - two-slice fat (arm64 + x86_64): parser picks the host-matching slice; deterministic per-host expected offset via `#ifdef`. - two PPC slices (host arch doesn't match either): parser falls back to the first slice and stays deterministic across exotic hosts. - OverwriteFlapiSection round-trip inside a slice: write a 7-byte payload through the located section, verify the bytes land at the returned absolute offset, and confirm a re-locate finds the same section. Test plan: - ctest: 642 / 642 pass (637 previous + 5 new -- 4 fat cases + the round-trip). - pytest test_self_packaging.py + test_self_packaging_http.py: 11 / 11 pass (no regression on the Linux EOCD tail-scan path, which is unaffected by this change). Closes #66. --- src/include/macho_bundle.hpp | 14 ++- src/macho_bundle.cpp | 182 ++++++++++++++++++++++++--- test/cpp/macho_bundle_test.cpp | 219 +++++++++++++++++++++++++++++++++ 3 files changed, 397 insertions(+), 18 deletions(-) diff --git a/src/include/macho_bundle.hpp b/src/include/macho_bundle.hpp index 5467df1..f7d98b2 100644 --- a/src/include/macho_bundle.hpp +++ b/src/include/macho_bundle.hpp @@ -29,13 +29,19 @@ bool IsMachOMagic(const std::uint8_t magic_bytes[4]); // Locate the __FLAPI/__bundle section in a Mach-O file on disk. // Returns nullopt if: -// - the file isn't a thin (non-fat) Mach-O, +// - the file isn't a 64-bit Mach-O (thin or fat / universal), // - the file is malformed, // - the section doesn't exist (e.g., on a non-macOS build). // -// Fat / universal binaries are currently not supported -- a follow-up -// can iterate slices. macOS releases produced by this repo are thin -// per-architecture, so the gap is acceptable for now. +// Fat / universal binaries (FAT_MAGIC, FAT_MAGIC_64) are supported: +// the parser walks fat_arch[] and picks the slice whose cputype +// matches the host arch (compile-time), or the first slice as a +// deterministic fallback. The returned `file_offset` is absolute +// within the fat file so OverwriteFlapiSection seeks to the right +// place inside the chosen slice. +// +// 32-bit Mach-O (MH_MAGIC / MH_CIGAM) is intentionally rejected -- +// no flapi release ships a 32-bit slice today. std::optional LocateFlapiSection(const std::filesystem::path& path); // Overload that scans a buffer instead of opening a file. Used by diff --git a/src/macho_bundle.cpp b/src/macho_bundle.cpp index c8af39b..97056f2 100644 --- a/src/macho_bundle.cpp +++ b/src/macho_bundle.cpp @@ -30,10 +30,31 @@ constexpr std::uint32_t kMachOMagic64 = 0xFEEDFACFu; constexpr std::uint32_t kMachOCigam64 = 0xCFFAEDFEu; constexpr std::uint32_t kFatMagic = 0xCAFEBABEu; constexpr std::uint32_t kFatCigam = 0xBEBAFECAu; +constexpr std::uint32_t kFatMagic64 = 0xCAFEBABFu; +constexpr std::uint32_t kFatCigam64 = 0xBFBAFECAu; constexpr std::uint32_t kLcSegment = 0x01; constexpr std::uint32_t kLcSegment64 = 0x19; +// Apple CPU type constants (from ). Only the ones we +// care about for slice selection on hosts we actually run on. Marked +// maybe_unused because the host-arch #ifdef below only picks one. +[[maybe_unused]] constexpr std::uint32_t kCpuTypeX86_64 = 0x01000007u; +[[maybe_unused]] constexpr std::uint32_t kCpuTypeArm64 = 0x0100000Cu; + +// What slice to prefer when the input is a fat / universal binary. +// Resolved at compile time from the host arch so the recursive +// LocateFlapiSection call picks the slice that matches the binary +// doing the lookup -- which, in the self-packaging case, is the +// binary being looked at. 0 means "no preference; first slice". +#if defined(__aarch64__) || defined(__arm64__) +constexpr std::uint32_t kPreferredCpuType = kCpuTypeArm64; +#elif defined(__x86_64__) || defined(_M_X64) +constexpr std::uint32_t kPreferredCpuType = kCpuTypeX86_64; +#else +constexpr std::uint32_t kPreferredCpuType = 0u; +#endif + std::uint32_t ReadU32LE(const std::uint8_t* p) { return static_cast(p[0]) | (static_cast(p[1]) << 8) @@ -46,6 +67,20 @@ std::uint64_t ReadU64LE(const std::uint8_t* p) { | (static_cast(ReadU32LE(p + 4)) << 32); } +// Fat header + fat_arch records are stored big-endian on disk +// regardless of host endianness (Apple's universal-binary spec). +std::uint32_t ReadU32BE(const std::uint8_t* p) { + return (static_cast(p[0]) << 24) + | (static_cast(p[1]) << 16) + | (static_cast(p[2]) << 8) + | static_cast(p[3]); +} + +std::uint64_t ReadU64BE(const std::uint8_t* p) { + return (static_cast(ReadU32BE(p)) << 32) + | static_cast(ReadU32BE(p + 4)); +} + bool NameEquals(const std::uint8_t* fixed, std::size_t cap, const char* expected) { // Mach-O segment/section names are NUL-padded fixed-length fields. // We compare up to cap bytes, treating NUL as terminator on the @@ -177,13 +212,132 @@ std::optional FindInLoadCommands64( return std::nullopt; } +// Parses a fat / universal Mach-O header at buffer[0] and returns the +// absolute file offset of the slice we want to recurse into. The +// caller is responsible for the magic dispatch and for adding the +// returned offset to any per-slice section offsets it computes. +// +// Selection rule: first slice whose cputype matches kPreferredCpuType; +// else the first slice. The fallback gives deterministic behaviour on +// hosts whose arch isn't represented in the file (e.g., a PPC-only +// fat binary inspected on x86_64, or a test fixture built on a host +// arch we don't compile-time match). +// +// Returns nullopt on truncation, an absurd nfat_arch (we cap at 64 +// slices -- real universal binaries top out at 4-5), or a slice whose +// declared extent exceeds the buffer. +struct FatSlice { + std::uint64_t file_offset = 0; + std::uint64_t size = 0; +}; + +std::optional ParseFatHeader( + const std::vector& buffer, + std::uint32_t magic) { + constexpr std::size_t kFatHeaderSize = 8; + if (buffer.size() < kFatHeaderSize) { + return std::nullopt; + } + const bool is_64 = (magic == kFatMagic64 || magic == kFatCigam64); + const std::size_t arch_size = is_64 ? 32u : 20u; + const std::uint32_t nfat_arch = ReadU32BE(buffer.data() + 4); + constexpr std::uint32_t kMaxSlices = 64u; + if (nfat_arch == 0 || nfat_arch > kMaxSlices) { + return std::nullopt; + } + if (buffer.size() < kFatHeaderSize + nfat_arch * arch_size) { + return std::nullopt; + } + + auto read_slice = [&](std::uint32_t i) -> FatSlice { + const std::size_t off = kFatHeaderSize + i * arch_size; + FatSlice s; + // Layout (fat_arch): cputype, cpusubtype, offset, size, align + // Layout (fat_arch_64): cputype, cpusubtype, offset(64), size(64), + // align, reserved + if (is_64) { + s.file_offset = ReadU64BE(buffer.data() + off + 8); + s.size = ReadU64BE(buffer.data() + off + 16); + } else { + s.file_offset = ReadU32BE(buffer.data() + off + 8); + s.size = ReadU32BE(buffer.data() + off + 12); + } + return s; + }; + auto read_cputype = [&](std::uint32_t i) -> std::uint32_t { + const std::size_t off = kFatHeaderSize + i * arch_size; + return ReadU32BE(buffer.data() + off); + }; + + // Pass 1: preferred cputype. + if (kPreferredCpuType != 0) { + for (std::uint32_t i = 0; i < nfat_arch; ++i) { + if (read_cputype(i) == kPreferredCpuType) { + FatSlice s = read_slice(i); + if (s.file_offset > buffer.size() || + s.size > buffer.size() || + s.file_offset + s.size > buffer.size()) { + return std::nullopt; + } + return s; + } + } + } + // Pass 2: first slice as fallback. + FatSlice s = read_slice(0); + if (s.file_offset > buffer.size() || + s.size > buffer.size() || + s.file_offset + s.size > buffer.size()) { + return std::nullopt; + } + return s; +} + +// Inner overload: parse a Mach-O whose first byte lives at +// buffer[base] and produce a section file_offset that is absolute +// within the original (possibly fat-wrapping) buffer. +std::optional LocateFlapiSectionAt( + const std::vector& buffer, + std::uint64_t base) { + if (base + 32 > buffer.size()) { + return std::nullopt; + } + const std::uint32_t magic = ReadU32LE(buffer.data() + base); + + // 32-bit Mach-O: we don't ship 32-bit artifacts; cigam (byte-swapped) + // is also out of scope for this parser. A future PR can extend if a + // legit use-case appears. + if (magic != kMachOMagic64) { + return std::nullopt; + } + + // mach_header_64 layout: + // uint32 magic, cputype, cpusubtype, filetype, + // uint32 ncmds, sizeofcmds, flags, reserved + const std::uint32_t ncmds = ReadU32LE(buffer.data() + base + 16); + const std::uint32_t sizeofcmds = ReadU32LE(buffer.data() + base + 20); + + auto inner = FindInLoadCommands64(buffer, base, ncmds, sizeofcmds); + if (!inner.has_value()) { + return std::nullopt; + } + // The section's offset field is recorded relative to its slice's + // base in the on-disk Mach-O, so callers further out need the + // absolute file offset. base is the slice's absolute offset in the + // outer (potentially fat) file; adding it gives the absolute byte + // position seek() should land on. + inner->file_offset += base; + return inner; +} + } // namespace bool IsMachOMagic(const std::uint8_t magic_bytes[4]) { const std::uint32_t m = ReadU32LE(magic_bytes); return m == kMachOMagic32 || m == kMachOCigam32 || m == kMachOMagic64 || m == kMachOCigam64 || - m == kFatMagic || m == kFatCigam; + m == kFatMagic || m == kFatCigam || + m == kFatMagic64 || m == kFatCigam64; } std::optional LocateFlapiSectionInBuffer( @@ -193,21 +347,21 @@ std::optional LocateFlapiSectionInBuffer( } const std::uint32_t magic = ReadU32LE(buffer.data()); - // We only handle 64-bit little-endian Mach-O here. Production - // arm64/x86_64 builds emit this format. Cigam (byte-swapped), - // 32-bit, and fat (universal) are out of scope for the spike -- - // documented in the header. - if (magic != kMachOMagic64) { - return std::nullopt; + // Fat / universal binary: pick the slice that matches the host + // arch (or the first slice as a deterministic fallback) and recurse + // into the inner thin Mach-O. The returned file_offset is absolute + // within the fat file, which is what OverwriteFlapiSection and + // LocateBundleInRange both expect. + if (magic == kFatMagic || magic == kFatCigam || + magic == kFatMagic64 || magic == kFatCigam64) { + auto slice = ParseFatHeader(buffer, magic); + if (!slice.has_value()) { + return std::nullopt; + } + return LocateFlapiSectionAt(buffer, slice->file_offset); } - // mach_header_64: - // uint32 magic, cputype, cpusubtype, filetype, - // uint32 ncmds, sizeofcmds, flags, reserved - const std::uint32_t ncmds = ReadU32LE(buffer.data() + 16); - const std::uint32_t sizeofcmds = ReadU32LE(buffer.data() + 20); - - return FindInLoadCommands64(buffer, /*base=*/0, ncmds, sizeofcmds); + return LocateFlapiSectionAt(buffer, /*base=*/0); } std::optional LocateFlapiSection(const std::filesystem::path& path) { diff --git a/test/cpp/macho_bundle_test.cpp b/test/cpp/macho_bundle_test.cpp index 1b20602..d2ac6d4 100644 --- a/test/cpp/macho_bundle_test.cpp +++ b/test/cpp/macho_bundle_test.cpp @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include using namespace flapi; @@ -102,6 +104,69 @@ std::vector BuildMachO64(const std::vector& sections) return buf; } +// --- Fat / universal helpers (issue #66) ------------------------------------- + +constexpr std::uint32_t kFatMagic = 0xCAFEBABEu; +constexpr std::size_t kFatHeaderSize = 8; +constexpr std::size_t kFatArchSize = 20; // 32-bit fat_arch +constexpr std::uint32_t kCpuTypeX86_64 = 0x01000007u; +constexpr std::uint32_t kCpuTypeArm64 = 0x0100000Cu; +constexpr std::uint32_t kCpuTypePPC = 0x00000012u; + +void WriteU32BE(std::vector& buf, std::size_t off, std::uint32_t v) { + buf[off] = static_cast((v >> 24) & 0xff); + buf[off + 1] = static_cast((v >> 16) & 0xff); + buf[off + 2] = static_cast((v >> 8) & 0xff); + buf[off + 3] = static_cast(v & 0xff); +} + +struct FatSliceSpec { + std::uint32_t cputype; + std::vector bytes; // thin Mach-O contents for this slice +}; + +// Wraps thin Mach-O bytes with a 32-bit fat header. Slices are placed +// at 4 KiB-aligned offsets after the fat_arch table; the wrapper +// pads with zeros between slices so absolute offsets are stable and +// reproducible across runs. +std::vector BuildFatMachO(const std::vector& slices) { + constexpr std::uint64_t kAlign = 4096; + auto align_up = [](std::uint64_t v, std::uint64_t a) { + return (v + a - 1) & ~(a - 1); + }; + + const std::uint32_t nfat_arch = static_cast(slices.size()); + std::vector slice_offsets; + slice_offsets.reserve(slices.size()); + + std::uint64_t cursor = align_up(kFatHeaderSize + nfat_arch * kFatArchSize, kAlign); + for (const auto& s : slices) { + slice_offsets.push_back(cursor); + cursor = align_up(cursor + s.bytes.size(), kAlign); + } + const std::uint64_t total = cursor; + + std::vector buf(static_cast(total), 0); + WriteU32BE(buf, 0, kFatMagic); + WriteU32BE(buf, 4, nfat_arch); + + for (std::uint32_t i = 0; i < nfat_arch; ++i) { + const std::size_t arch_off = + kFatHeaderSize + static_cast(i) * kFatArchSize; + WriteU32BE(buf, arch_off + 0, slices[i].cputype); + WriteU32BE(buf, arch_off + 4, 0); // cpusubtype + WriteU32BE(buf, arch_off + 8, static_cast(slice_offsets[i])); + WriteU32BE(buf, arch_off + 12, static_cast(slices[i].bytes.size())); + WriteU32BE(buf, arch_off + 16, 12); // align (2^12 == 4 KiB) + } + for (std::uint32_t i = 0; i < nfat_arch; ++i) { + std::memcpy(buf.data() + slice_offsets[i], + slices[i].bytes.data(), + slices[i].bytes.size()); + } + return buf; +} + } // namespace TEST_CASE("IsMachOMagic recognises 64-bit and fat magics", "[macho_bundle]") { @@ -167,6 +232,160 @@ TEST_CASE("LocateFlapiSection handles short buffer without crashing", REQUIRE_FALSE(LocateFlapiSectionInBuffer(buf).has_value()); } +// --- Fat / universal binary cases (issue #66) -------------------------------- + +TEST_CASE("LocateFlapiSection finds __FLAPI/__bundle in a single-slice fat Mach-O", + "[macho_bundle][fat]") { + constexpr std::uint64_t kSectOffWithinSlice = 0x100000; + auto slice_bytes = BuildMachO64({ + {"__TEXT", "__text", 0x1000, 0x4000}, + {"__FLAPI", "__bundle", kSectOffWithinSlice, 16u * 1024u * 1024u}, + }); + + auto fat = BuildFatMachO({ + {kCpuTypeArm64, slice_bytes}, + }); + + auto loc = LocateFlapiSectionInBuffer(fat); + REQUIRE(loc.has_value()); + + // The section's offset within the slice is recorded as + // kSectOffWithinSlice; the slice itself sits at the first + // 4 KiB-aligned offset past the fat header + 1 fat_arch entry, + // which is 4096. Absolute file_offset must add the two. + constexpr std::uint64_t kSliceFatOffset = 4096; + REQUIRE(loc->file_offset == kSliceFatOffset + kSectOffWithinSlice); + // Width unchanged by the fat wrapper. + REQUIRE(loc->size == 16ull * 1024ull * 1024ull); +} + +TEST_CASE("LocateFlapiSection picks the host-arch slice in a two-slice fat Mach-O", + "[macho_bundle][fat]") { + constexpr std::uint64_t kArm64SectOff = 0x100000; + constexpr std::uint64_t kX86_64SectOff = 0x200000; + constexpr std::uint64_t kSliceSize = 16u * 1024u * 1024u; + + auto arm64_slice = BuildMachO64({ + {"__TEXT", "__text", 0x1000, 0x4000}, + {"__FLAPI", "__bundle", kArm64SectOff, kSliceSize}, + }); + auto x86_64_slice = BuildMachO64({ + {"__TEXT", "__text", 0x1000, 0x4000}, + {"__FLAPI", "__bundle", kX86_64SectOff, kSliceSize}, + }); + + auto fat = BuildFatMachO({ + {kCpuTypeArm64, arm64_slice}, + {kCpuTypeX86_64, x86_64_slice}, + }); + + auto loc = LocateFlapiSectionInBuffer(fat); + REQUIRE(loc.has_value()); + + // First slice is 4 KiB-aligned past the fat header + 2 fat_arch + // entries (8 + 40 = 48 bytes) = 4096. Second slice is placed at + // the next 4 KiB-aligned offset after the first slice's bytes; + // BuildMachO64 emits 32 + 2*(72+80) = 336 bytes for a 2-section + // thin Mach-O, so the second slice lands at align_up(4096+336, + // 4096) = 8192. + [[maybe_unused]] constexpr std::uint64_t kArm64SliceFatOffset = 4096; + [[maybe_unused]] constexpr std::uint64_t kX86_64SliceFatOffset = 8192; + +#if defined(__aarch64__) || defined(__arm64__) + constexpr std::uint64_t expected_abs = kArm64SliceFatOffset + kArm64SectOff; +#elif defined(__x86_64__) || defined(_M_X64) + constexpr std::uint64_t expected_abs = kX86_64SliceFatOffset + kX86_64SectOff; +#else + constexpr std::uint64_t expected_abs = kArm64SliceFatOffset + kArm64SectOff; +#endif + REQUIRE(loc->file_offset == expected_abs); + REQUIRE(loc->size == kSliceSize); +} + +TEST_CASE("LocateFlapiSection falls back to the first slice when no arch matches", + "[macho_bundle][fat]") { + constexpr std::uint64_t kFirstSectOff = 0x100000; + auto first_slice = BuildMachO64({ + {"__FLAPI", "__bundle", kFirstSectOff, 0x80000}, + }); + auto second_slice = BuildMachO64({ + {"__FLAPI", "__bundle", 0x200000, 0x80000}, + }); + // Two PPC slices: never matches kPreferredCpuType on the platforms + // we compile for, so the parser must take the first one. + auto fat = BuildFatMachO({ + {kCpuTypePPC, first_slice}, + {kCpuTypePPC, second_slice}, + }); + + auto loc = LocateFlapiSectionInBuffer(fat); + REQUIRE(loc.has_value()); + // First slice's fat offset = 4 KiB-aligned past the 8-byte fat + // header + 2 * 20-byte fat_arch records (= 48) = 4096. The + // section is recorded at kFirstSectOff inside that slice, so the + // absolute offset is the sum -- proving the parser picked the + // first slice as the fallback rather than peeking at the second. + constexpr std::uint64_t kFirstSliceFatOffset = 4096; + REQUIRE(loc->file_offset == kFirstSliceFatOffset + kFirstSectOff); + REQUIRE(loc->size == 0x80000); +} + +TEST_CASE("OverwriteFlapiSection round-trips inside a fat slice", + "[macho_bundle][fat]") { + // Pick a section large enough to hold our payload, with at least + // some intra-slice offset so the absolute-offset path is the only + // way the seek lands correctly. + constexpr std::uint64_t kSectOff = 0x10000; + constexpr std::uint64_t kSectSize = 4096; + auto slice_bytes = BuildMachO64({ + {"__FLAPI", "__bundle", kSectOff, kSectSize}, + }); + // Extend the slice byte vector so the section's declared extent + // (kSectOff + kSectSize) actually exists inside the slice's + // storage when we ship it into the fat wrapper. + slice_bytes.resize(static_cast(kSectOff + kSectSize), 0); + + auto fat = BuildFatMachO({ + {kCpuTypeArm64, slice_bytes}, + }); + + // Write the fat fixture to a temp file so we can exercise the + // file-on-disk path of OverwriteFlapiSection + LocateFlapiSection. + auto tmp = std::filesystem::temp_directory_path() / + ("macho_fat_roundtrip_" + + std::to_string(reinterpret_cast(&fat)) + ".bin"); + { + std::ofstream f(tmp, std::ios::binary); + f.write(reinterpret_cast(fat.data()), + static_cast(fat.size())); + } + + auto loc = LocateFlapiSection(tmp); + REQUIRE(loc.has_value()); + + const std::vector payload = {'f', 'l', 'a', 'p', 'i', '!', '!'}; + OverwriteFlapiSection(tmp, *loc, payload); + + // Verify the bytes landed at the absolute offset the locator + // returned -- proves the slice base was applied before the seek. + std::ifstream in(tmp, std::ios::binary); + in.seekg(static_cast(loc->file_offset), std::ios::beg); + std::vector got(payload.size(), 0); + in.read(reinterpret_cast(got.data()), + static_cast(got.size())); + REQUIRE(got == payload); + + // And the slack right after the payload was zero-padded, so a + // re-locate sees the same section unchanged. + auto loc2 = LocateFlapiSection(tmp); + REQUIRE(loc2.has_value()); + REQUIRE(loc2->file_offset == loc->file_offset); + REQUIRE(loc2->size == loc->size); + + std::error_code ec; + std::filesystem::remove(tmp, ec); +} + TEST_CASE("CodesignBinary is a benign no-op on non-Darwin builds", "[macho_bundle]") { #ifndef __APPLE__