From 2e075df27fe1d781a638da6eabbf0bfb0d4291fb Mon Sep 17 00:00:00 2001 From: Larry Gritz Date: Sat, 14 Mar 2026 13:56:53 -0700 Subject: [PATCH] fix(ustring): protect against idempotent rehash upon collision Some fuzzing stumbled across a very interesting edge case in the rehashing logic of ustring internals: If the initial hash location of a newly ustring-ized string is already occupied, it rehashes. But we never considered what happens if it ends up rehashing to the same slot! And in fact, that can happen if the true hash is 0 (I'm not sure if it can/does for any other value). And we may have thought that the only string that would ever hash to 0 is the empty string (which is 0 hash by design, and doesn't need to rehash because the table is seeded with that entry already in its correct slot). So anyway, if there was some other string that happened to hash to 0, it would get caught in an infinite loop of rehashing but never finding a free slot, because it keeps rehashing to the very same slot. And would you believe that there IS such a string, and it is this one: "\t\n\n\t\301\350`O\217c85?\202\200\251r|}\304\351\2517\337K'\217C\240" (unprintable chars expressed as escaped octal values) The crux of this patch is to notice when the old and new hash bit pattern has not changed, and bump it by 7 (a prime number, so it should not land on the same location again until every slot in the bin has been exhausted). Solution entirely from my own human brain, in case you're wondering. Signed-off-by: Larry Gritz --- src/libutil/ustring.cpp | 23 +++++++++++++++++++---- src/libutil/ustring_test.cpp | 8 ++++++++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/src/libutil/ustring.cpp b/src/libutil/ustring.cpp index 3da39ccf65..043ce43dfb 100644 --- a/src/libutil/ustring.cpp +++ b/src/libutil/ustring.cpp @@ -522,7 +522,7 @@ ustring::make_unique(string_view strref) size_t bin = rm.lock_bin(hash); hash_t orighash = hash; - size_t binmask = orighash & (~rm.nobin_mask()); + size_t binbits = orighash & (~rm.nobin_mask()); size_t num_rehashes = 0; while (1) { @@ -546,9 +546,23 @@ ustring::make_unique(string_view strref) break; } // Rehash, but keep the bin bits identical so we always rehash into - // the same (locked) bin. - hash = (hash & binmask) - | (farmhash::Fingerprint(hash) & rm.nobin_mask()); + // the same (locked) bin. But watch out for rehashing that returns the + // identical non-bin part as before -- that will enter an infinite + // loop if we're not careful! + hash_t old_nonbin_bits = hash & rm.nobin_mask(); + hash_t new_nonbin_bits = farmhash::Fingerprint(hash) & rm.nobin_mask(); + if (OIIO_UNLIKELY(old_nonbin_bits == new_nonbin_bits)) { + new_nonbin_bits = (new_nonbin_bits + 7) & rm.nobin_mask(); +# ifndef NDEBUG + std::string s = Strutil::escape_chars(strref); + print(stderr, "IDEMPOTENT RE-HASH! |{}|\n", s); + for (auto c : s) + print(stderr, c > 0 ? "{:c}" : "\\{:03o}", + static_cast(c)); + print(stderr, "\n"); +# endif + } + hash = binbits | new_nonbin_bits; ++num_rehashes; // Strutil::print("COLLISION \"{}\" {:08x} vs \"{}\"\n", // strref, orighash, rev->second); @@ -556,6 +570,7 @@ ustring::make_unique(string_view strref) std::lock_guard lock(collision_mutex); all_hash_collisions.emplace_back(rev->second, rev->first); } + OIIO_ASSERT(num_rehashes < 100000); // Something is very wrong } rm.unlock_bin(bin); diff --git a/src/libutil/ustring_test.cpp b/src/libutil/ustring_test.cpp index 464419d1f3..2a04f905b7 100644 --- a/src/libutil/ustring_test.cpp +++ b/src/libutil/ustring_test.cpp @@ -173,6 +173,14 @@ test_ustring() Strutil::print("Test print default initialized ustring: '{}'\n", uninit); OIIO_CHECK_EQUAL(Strutil::format("{}", empty), Strutil::format("{}", uninit)); + + // Regression test: This specific string hashes to 0! This once caused a + // lot of trouble, because it couldn't rehash properly (every rehash also + // ended up at 0). When broken, this made an infinite loop inside + // ustring::make_unique(). + const char* hash0 + = "\t\n\n\t\301\350`O\217c85?\202\200\251r|}\304\351\2517\337K'\217C\240"; + OIIO_CHECK_EQUAL(ustring(hash0).hash(), 13226272983328805811ULL); }