From 46b42d127b48a6665062475fcfd95f46a2aedefb Mon Sep 17 00:00:00 2001 From: Joachim Rosskopf Date: Mon, 25 May 2026 11:03:16 +0200 Subject: [PATCH] feat(self-packaging #65): ZIP64-sentinel WARN log in bundle_locator When a ZIP exceeds 4 GiB or 65535 entries, the EOCD's cd_size / cd_offset / entry-count fields are pinned to 0xffffffff / 0xffff and the real values live in a ZIP64 EOCD record that we don't parse. The existing range-check rejection silently returned nullopt for these, making the failure indistinguishable from "no bundle present" in operator logs. - bundle_locator: explicit ZIP64 sentinel check inside the EOCD scan loop; on match, emit a single-line CROW_LOG_WARNING naming the mismatch + offending field values, and return nullopt - bundle_locator_test: rename the existing 0xffffffff fixture to call out the ZIP64 origin (issue #65); add a second case for the 16-bit entry-count sentinel (0xffff) that previously slipped past the range checks since cd_size/cd_offset can still be plausible --- src/bundle_locator.cpp | 22 +++++++++ test/cpp/bundle_locator_test.cpp | 77 +++++++++++++++++++++++--------- 2 files changed, 78 insertions(+), 21 deletions(-) diff --git a/src/bundle_locator.cpp b/src/bundle_locator.cpp index 3c48675..0b57942 100644 --- a/src/bundle_locator.cpp +++ b/src/bundle_locator.cpp @@ -2,6 +2,8 @@ #include "macho_bundle.hpp" #include "selfpath.hpp" +#include + #include #include #include @@ -64,6 +66,26 @@ std::optional ScanBufferForEocd( if (this_disk != 0 || cd_start_disk != 0) { continue; } + // ZIP64 sentinels (#65): when a ZIP exceeds 4 GiB or 65535 entries, + // the EOCD's size / offset / entry-count fields are pinned to + // 0xffffffff / 0xffff and the real values live in a ZIP64 EOCD + // record we don't parse. Surface this explicitly -- the implicit + // range-check rejection below would otherwise return nullopt with + // no log line, making the failure indistinguishable from "no + // bundle present" in operator logs. + constexpr std::uint16_t kZip64SentinelU16 = 0xffffu; + constexpr std::uint32_t kZip64SentinelU32 = 0xffffffffu; + if (cd_size == kZip64SentinelU32 || cd_offset_arch == kZip64SentinelU32 || + entries_this == kZip64SentinelU16 || entries_total == kZip64SentinelU16) { + CROW_LOG_WARNING + << "bundle_locator: ZIP64-shaped EOCD detected at file offset " + << (buf_start_in_file + i) + << " (cd_size=0x" << std::hex << cd_size + << " cd_offset=0x" << cd_offset_arch + << " entries=" << std::dec << entries_this + << "); ZIP64 is not supported -- treating as no bundle present"; + return std::nullopt; + } if (entries_this != entries_total) { continue; } diff --git a/test/cpp/bundle_locator_test.cpp b/test/cpp/bundle_locator_test.cpp index 323997e..abc6792 100644 --- a/test/cpp/bundle_locator_test.cpp +++ b/test/cpp/bundle_locator_test.cpp @@ -119,31 +119,66 @@ TEST_CASE("bundle_locator returns nullopt when no EOCD signature is present", REQUIRE_FALSE(loc.has_value()); } -TEST_CASE("bundle_locator rejects an EOCD with implausible fields", "[bundle_locator]") { +namespace { + +// Helpers for building a synthetic EOCD record at the tail of a buffer. +// Used by the ZIP64 sentinel tests; the EOCD layout is constant across +// them and the existing tests would benefit from the same shorthand if +// they ever need to grow. +void PutU16(std::vector& bytes, std::size_t off, std::uint16_t v) { + bytes[off] = static_cast(v & 0xff); + bytes[off + 1] = static_cast((v >> 8) & 0xff); +} +void PutU32(std::vector& bytes, std::size_t off, std::uint32_t v) { + bytes[off] = static_cast(v & 0xff); + bytes[off + 1] = static_cast((v >> 8) & 0xff); + bytes[off + 2] = static_cast((v >> 16) & 0xff); + bytes[off + 3] = static_cast((v >> 24) & 0xff); +} + +} // namespace + +TEST_CASE("bundle_locator rejects ZIP64-sentinel cd_size / cd_offset (issue #65)", + "[bundle_locator][zip64]") { // 2 KiB of filler with a fake EOCD signature at the very end whose - // central-directory size/offset can't possibly fit in the file. + // central-directory size/offset carry the ZIP64 sentinel 0xffffffff + // -- the marker that says "the real values live in a ZIP64 EOCD + // record". flapi does not parse ZIP64 records, so it must reject + // these as "no bundle present" rather than masquerading as malformed. + std::vector bytes(2048, std::uint8_t{0xab}); + const std::size_t eocd_off = bytes.size() - 22; + + PutU32(bytes, eocd_off + 0, 0x06054b50); // signature + PutU16(bytes, eocd_off + 4, 0); // disk number + PutU16(bytes, eocd_off + 6, 0); // disk where CD starts + PutU16(bytes, eocd_off + 8, 1); // entries on this disk + PutU16(bytes, eocd_off + 10, 1); // total entries + PutU32(bytes, eocd_off + 12, 0xffffffffu); // central directory size (ZIP64 sentinel) + PutU32(bytes, eocd_off + 16, 0xffffffffu); // central directory offset (ZIP64 sentinel) + PutU16(bytes, eocd_off + 20, 0); // comment length + + TempBinary tb{bytes}; + auto loc = LocateBundle(tb.path()); + REQUIRE_FALSE(loc.has_value()); +} + +TEST_CASE("bundle_locator rejects ZIP64-sentinel entry counts (issue #65)", + "[bundle_locator][zip64]") { + // Same shape as above but the sentinel sits on the 16-bit entry-count + // fields (signals > 65535 entries). cd_size/cd_offset are kept + // plausible so they'd pass the range checks; the ZIP64 detection + // must catch the entry-count sentinel before then. std::vector bytes(2048, std::uint8_t{0xab}); const std::size_t eocd_off = bytes.size() - 22; - auto put_u16 = [&](std::size_t off, std::uint16_t v) { - bytes[off] = static_cast(v & 0xff); - bytes[off + 1] = static_cast((v >> 8) & 0xff); - }; - auto put_u32 = [&](std::size_t off, std::uint32_t v) { - bytes[off] = static_cast(v & 0xff); - bytes[off + 1] = static_cast((v >> 8) & 0xff); - bytes[off + 2] = static_cast((v >> 16) & 0xff); - bytes[off + 3] = static_cast((v >> 24) & 0xff); - }; - - put_u32(eocd_off + 0, 0x06054b50); // signature - put_u16(eocd_off + 4, 0); // disk number - put_u16(eocd_off + 6, 0); // disk where CD starts - put_u16(eocd_off + 8, 1); // entries on this disk - put_u16(eocd_off + 10, 1); // total entries - put_u32(eocd_off + 12, 0xffffffffu); // central directory size - put_u32(eocd_off + 16, 0xffffffffu); // central directory offset - put_u16(eocd_off + 20, 0); // comment length + PutU32(bytes, eocd_off + 0, 0x06054b50); // signature + PutU16(bytes, eocd_off + 4, 0); // disk number + PutU16(bytes, eocd_off + 6, 0); // disk where CD starts + PutU16(bytes, eocd_off + 8, 0xffffu); // entries on this disk (ZIP64 sentinel) + PutU16(bytes, eocd_off + 10, 0xffffu); // total entries (ZIP64 sentinel) + PutU32(bytes, eocd_off + 12, 64); // central directory size (plausible) + PutU32(bytes, eocd_off + 16, 256); // central directory offset (plausible) + PutU16(bytes, eocd_off + 20, 0); // comment length TempBinary tb{bytes}; auto loc = LocateBundle(tb.path());