From 7c9f6b59e4f8a2d3e89e840336e6fa340c7e718d Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Fri, 21 Mar 2025 17:11:58 +0000 Subject: [PATCH 01/19] Some outstanding tests. --- src/test/perf/combininglock/combininglock.cc | 37 ++++++++++++++++ src/test/perf/realloc/realloc.cc | 46 ++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 src/test/perf/combininglock/combininglock.cc create mode 100644 src/test/perf/realloc/realloc.cc diff --git a/src/test/perf/combininglock/combininglock.cc b/src/test/perf/combininglock/combininglock.cc new file mode 100644 index 000000000..6a9437c70 --- /dev/null +++ b/src/test/perf/combininglock/combininglock.cc @@ -0,0 +1,37 @@ +#include +#include +#include + +snmalloc::CombiningLock cl; + +std::atomic run{true}; + +void loop() +{ + size_t j = 0; + size_t i = 0; + while (run) + { + i++; + snmalloc::with(cl, [&]() { j++; }); + if (i != j) + snmalloc::error("i != j"); + } +} + +int main() +{ + std::vector threads; + for (size_t i = 0; i < 8; i++) + { + threads.emplace_back(std::thread(loop)); + } + + std::this_thread::sleep_for(std::chrono::seconds(100)); + run = false; + + for (auto& t : threads) + { + t.join(); + } +} \ No newline at end of file diff --git a/src/test/perf/realloc/realloc.cc b/src/test/perf/realloc/realloc.cc new file mode 100644 index 000000000..5efcfbaeb --- /dev/null +++ b/src/test/perf/realloc/realloc.cc @@ -0,0 +1,46 @@ +#include "test/opt.h" +#include "test/setup.h" +#include "test/usage.h" +#include "test/xoroshiro.h" + +#include +#include +#include +#include +#include + +using namespace snmalloc; + +NOINLINE +void* myrealloc(void* p, size_t size) +{ + return snmalloc::libc::realloc(p, size); +} + +void grow() +{ + void* base = nullptr; + for (size_t i = 1; i < 1000; i++) + { + base = myrealloc(base, i * 8); + } + snmalloc::libc::free(base); +} + +int main() +{ + auto start = Aal::tick(); + + for (size_t i = 0; i < 10000; i++) + { + grow(); + if (i % 10 == 0) + { + std::cout << "." << std::flush; + } + } + + auto end = Aal::tick(); + + std::cout << "Taken: " << end - start << std::endl; +} \ No newline at end of file From d15c1bc4504f614efebd5db23d110930d09b61ab Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Fri, 21 Mar 2025 17:13:53 +0000 Subject: [PATCH 02/19] Add statistic to snmalloc. This adds a collection of per sizeclass statistic for tracking how many allocations have occurred on each thread. These are racily combined to provide basic tracking information. --- src/snmalloc/backend_helpers/statsrange.h | 25 +-- src/snmalloc/ds_core/array.h | 185 ++++++++++++++++++++++ src/snmalloc/ds_core/ds_core.h | 3 +- src/snmalloc/ds_core/stats.h | 97 ++++++++++++ src/snmalloc/global/globalalloc.h | 78 +++++++++ src/snmalloc/mem/allocstats.h | 44 +++++ src/snmalloc/mem/corealloc.h | 44 ++++- src/snmalloc/mem/pool.h | 8 + src/snmalloc/mem/remotecache.h | 29 ++-- src/test/func/alloc_churn/alloc_churn.cc | 32 ++++ src/test/func/cleanup/cleanup.cc | 61 +++++++ src/test/func/statistics/stats.cc | 17 +- src/test/perf/batchblitz/batchblitz.cc | 94 +++++++++++ src/test/perf/churn/churn.cc | 94 +++++++++++ 14 files changed, 771 insertions(+), 40 deletions(-) create mode 100644 src/snmalloc/ds_core/array.h create mode 100644 src/snmalloc/ds_core/stats.h create mode 100644 src/snmalloc/mem/allocstats.h create mode 100644 src/test/func/alloc_churn/alloc_churn.cc create mode 100644 src/test/func/cleanup/cleanup.cc create mode 100644 src/test/perf/batchblitz/batchblitz.cc create mode 100644 src/test/perf/churn/churn.cc diff --git a/src/snmalloc/backend_helpers/statsrange.h b/src/snmalloc/backend_helpers/statsrange.h index d1e213777..8fe676fb6 100644 --- a/src/snmalloc/backend_helpers/statsrange.h +++ b/src/snmalloc/backend_helpers/statsrange.h @@ -16,8 +16,7 @@ namespace snmalloc { using ContainsParent::parent; - static inline stl::Atomic current_usage{}; - static inline stl::Atomic peak_usage{}; + static inline Stat usage{}; public: static constexpr bool Aligned = ParentRange::Aligned; @@ -30,34 +29,26 @@ namespace snmalloc CapPtr alloc_range(size_t size) { - auto result = parent.alloc_range(size); - if (result != nullptr) - { - auto prev = current_usage.fetch_add(size); - auto curr = peak_usage.load(); - while (curr < prev + size) - { - if (peak_usage.compare_exchange_weak(curr, prev + size)) - break; - } - } - return result; + auto r = parent.alloc_range(size); + if (r != nullptr) + usage += size; + return r; } void dealloc_range(CapPtr base, size_t size) { - current_usage -= size; + usage -= size; parent.dealloc_range(base, size); } size_t get_current_usage() { - return current_usage.load(); + return usage.get_curr(); } size_t get_peak_usage() { - return peak_usage.load(); + return usage.get_peak(); } }; }; diff --git a/src/snmalloc/ds_core/array.h b/src/snmalloc/ds_core/array.h new file mode 100644 index 000000000..f473c73cd --- /dev/null +++ b/src/snmalloc/ds_core/array.h @@ -0,0 +1,185 @@ +#pragma once + +#include "defines.h" + +#include + +namespace snmalloc +{ + /** + * A simple fixed-size array container. + * + * This provides a std::array-like interface without depending on the + * standard library. The array supports aggregate initialization and + * provides iterators for range-based for loops. + * + * @tparam T The element type + * @tparam N The number of elements + */ + template + struct Array + { + // Expose this to public to allow aggregate initialization. + T storage_[N]; + + [[nodiscard]] constexpr SNMALLOC_FAST_PATH size_t size() const + { + return N; + } + + constexpr T& operator[](size_t i) + { + return storage_[i]; + } + + constexpr const T& operator[](size_t i) const + { + return storage_[i]; + } + + using value_type = T; + using size_type = size_t; + using iterator = T*; + using const_iterator = const T*; + + [[nodiscard]] constexpr SNMALLOC_FAST_PATH iterator begin() + { + return &storage_[0]; + } + + [[nodiscard]] constexpr SNMALLOC_FAST_PATH const_iterator begin() const + { + return &storage_[0]; + } + + [[nodiscard]] constexpr SNMALLOC_FAST_PATH iterator end() + { + return &storage_[N]; + } + + [[nodiscard]] constexpr SNMALLOC_FAST_PATH const_iterator end() const + { + return &storage_[N]; + } + + [[nodiscard]] constexpr SNMALLOC_FAST_PATH T* data() + { + return &storage_[0]; + } + + [[nodiscard]] constexpr SNMALLOC_FAST_PATH const T* data() const + { + return &storage_[0]; + } + }; + + /** + * Specialization for zero-length arrays. + * + * Zero-length arrays are not valid in standard C++, so we provide + * a specialization that has no storage but maintains the same interface. + */ + template + struct Array + { + [[nodiscard]] constexpr SNMALLOC_FAST_PATH size_t size() const + { + return 0; + } + + constexpr T& operator[](size_t) + { + SNMALLOC_FAST_FAIL(); + } + + constexpr const T& operator[](size_t) const + { + SNMALLOC_FAST_FAIL(); + } + + using value_type = T; + using size_type = size_t; + using iterator = T*; + using const_iterator = const T*; + + [[nodiscard]] constexpr SNMALLOC_FAST_PATH iterator begin() + { + return nullptr; + } + + [[nodiscard]] constexpr SNMALLOC_FAST_PATH const_iterator begin() const + { + return nullptr; + } + + [[nodiscard]] constexpr SNMALLOC_FAST_PATH iterator end() + { + return nullptr; + } + + [[nodiscard]] constexpr SNMALLOC_FAST_PATH const_iterator end() const + { + return nullptr; + } + + [[nodiscard]] constexpr SNMALLOC_FAST_PATH T* data() + { + return nullptr; + } + + [[nodiscard]] constexpr SNMALLOC_FAST_PATH const T* data() const + { + return nullptr; + } + }; + + // Free function begin/end for Array + template + constexpr T* begin(Array& a) + { + return a.begin(); + } + + template + constexpr T* end(Array& a) + { + return a.end(); + } + + template + constexpr const T* begin(const Array& a) + { + return a.begin(); + } + + template + constexpr const T* end(const Array& a) + { + return a.end(); + } + + // Free function begin/end for C-style arrays + template + constexpr T* begin(T (&a)[N]) + { + return &a[0]; + } + + template + constexpr T* end(T (&a)[N]) + { + return &a[N]; + } + + template + constexpr const T* begin(const T (&a)[N]) + { + return &a[0]; + } + + template + constexpr const T* end(const T (&a)[N]) + { + return &a[N]; + } +} // namespace snmalloc diff --git a/src/snmalloc/ds_core/ds_core.h b/src/snmalloc/ds_core/ds_core.h index cc395127b..2292b8118 100644 --- a/src/snmalloc/ds_core/ds_core.h +++ b/src/snmalloc/ds_core/ds_core.h @@ -15,4 +15,5 @@ #include "mitigations.h" #include "ptrwrap.h" #include "redblacktree.h" -#include "tid.h" \ No newline at end of file +#include "stats.h" +#include "tid.h" diff --git a/src/snmalloc/ds_core/stats.h b/src/snmalloc/ds_core/stats.h new file mode 100644 index 000000000..55bdd2dd6 --- /dev/null +++ b/src/snmalloc/ds_core/stats.h @@ -0,0 +1,97 @@ +#include "defines.h" + +#include +#include + +namespace snmalloc +{ + /** + * Very basic statistic that tracks current and peak values. + */ + class Stat + { + private: + stl::Atomic curr{0}; + stl::Atomic peak{0}; + + public: + void increase(size_t amount) + { + size_t c = (curr += amount); + size_t p = peak.load(std::memory_order_relaxed); + while (c > p) + { + if (peak.compare_exchange_strong(p, c)) + break; + } + } + + void decrease(size_t amount) + { + size_t prev = curr.fetch_sub(amount); + SNMALLOC_ASSERT_MSG( + prev >= amount, "prev = {}, amount = {}", prev, amount); + UNUSED(prev); + } + + size_t get_curr() + { + return curr.load(std::memory_order_relaxed); + } + + size_t get_peak() + { + return peak.load(std::memory_order_relaxed); + } + + void operator+=(size_t amount) + { + increase(amount); + } + + void operator-=(size_t amount) + { + decrease(amount); + } + + void operator++() + { + increase(1); + } + + void operator--() + { + decrease(1); + } + }; + + /** + * Very basic statistic that can only grow. Not thread-safe. + */ + class MonotoneLocalStat + { + std::atomic value{0}; + + public: + void operator++(int) + { + value.fetch_add(1, std::memory_order_relaxed); + } + + void operator+=(const MonotoneLocalStat& other) + { + auto v = other.value.load(std::memory_order_relaxed); + value.fetch_add(v, std::memory_order_relaxed); + } + + void operator+=(size_t v) + { + value.fetch_add(v, std::memory_order_relaxed); + } + + size_t operator*() + { + return value.load(std::memory_order_relaxed); + } + }; +} // namespace snmalloc diff --git a/src/snmalloc/global/globalalloc.h b/src/snmalloc/global/globalalloc.h index 110051e2f..a977d2d14 100644 --- a/src/snmalloc/global/globalalloc.h +++ b/src/snmalloc/global/globalalloc.h @@ -84,6 +84,9 @@ namespace snmalloc } } + if (result == nullptr) + SNMALLOC_CHECK(RemoteDeallocCache::remote_inflight.get_curr() == 0); + if (result != nullptr) { *result = okay; @@ -128,6 +131,81 @@ namespace snmalloc } } + template + inline static void get_stats(AllocStats& stats) + { + auto alloc = AllocPool::iterate(); + while (alloc != nullptr) + { + stats += alloc->get_stats(); + alloc = AllocPool::iterate(alloc); + } + } + + template + inline static void print_alloc_stats() + { + static std::atomic dump{0}; + + auto l_dump = dump++; + if (l_dump == 0) + { + message<1024>( + "snmalloc_allocs,dumpid,sizeclass,size,allocated,deallocated,in_use," + "bytes,slabs allocated,slabs deallocated,slabs in_use,slabs bytes"); + message<1024>( + "snmalloc_totals,dumpid,backend bytes,peak backend " + "bytes,requested,slabs requested bytes,remote inflight bytes,allocator " + "count"); + } + + AllocStats stats; + snmalloc::get_stats(stats); + size_t total_live{0}; + size_t total_live_slabs{0}; + for (size_t i = 0; i < snmalloc::SIZECLASS_REP_SIZE; i++) + { + auto sc = snmalloc::sizeclass_t::from_raw(i); + auto allocated = *stats[sc].objects_allocated; + auto deallocated = *stats[sc].objects_deallocated; + auto slabs_allocated = *stats[sc].slabs_allocated; + auto slabs_deallocated = *stats[sc].slabs_deallocated; + if (allocated == 0 && deallocated == 0) + continue; + auto size = snmalloc::sizeclass_full_to_size(sc); + auto slab_size = snmalloc::sizeclass_full_to_slab_size(sc); + auto in_use = allocated - deallocated; + auto amount = in_use * size; + total_live += amount; + auto in_use_slabs = slabs_allocated - slabs_deallocated; + auto amount_slabs = in_use_slabs * slab_size; + total_live_slabs += amount_slabs; + + snmalloc::message<1024>( + "snmalloc_allocs,{},{},{},{},{},{},{},{},{},{},{}", + l_dump, + i, + size, + allocated, + deallocated, + in_use, + amount, + slabs_allocated, + slabs_deallocated, + in_use_slabs, + amount_slabs); + } + snmalloc::message<1024>( + "snmalloc_totals,{},{},{},{},{},{},{}", + l_dump, + Config::Backend::get_current_usage(), + Config::Backend::get_peak_usage(), + total_live, + total_live_slabs, + RemoteDeallocCache::remote_inflight.get_curr(), + Config::pool().get_count()); + } + /** * Returns the number of remaining bytes in an object. * diff --git a/src/snmalloc/mem/allocstats.h b/src/snmalloc/mem/allocstats.h new file mode 100644 index 000000000..bfa789c36 --- /dev/null +++ b/src/snmalloc/mem/allocstats.h @@ -0,0 +1,44 @@ +#include "../ds_core/ds_core.h" +#include "sizeclasstable.h" + +#include + +namespace snmalloc +{ + struct AllocStat + { + MonotoneLocalStat objects_allocated{}; + MonotoneLocalStat objects_deallocated{}; + MonotoneLocalStat slabs_allocated{}; + MonotoneLocalStat slabs_deallocated{}; + }; + + class AllocStats + { + std::array sizeclass{}; + + public: + AllocStat& operator[](sizeclass_t index) + { + auto i = index.raw(); + return sizeclass[i]; + } + + AllocStat& operator[](smallsizeclass_t index) + { + return sizeclass[sizeclass_t::from_small_class(index).raw()]; + } + + void operator+=(const AllocStats& other) + { + for (size_t i = 0; i < SIZECLASS_REP_SIZE; i++) + { + sizeclass[i].objects_allocated += other.sizeclass[i].objects_allocated; + sizeclass[i].objects_deallocated += + other.sizeclass[i].objects_deallocated; + sizeclass[i].slabs_allocated += other.sizeclass[i].slabs_allocated; + sizeclass[i].slabs_deallocated += other.sizeclass[i].slabs_deallocated; + } + } + }; +} // namespace snmalloc \ No newline at end of file diff --git a/src/snmalloc/mem/corealloc.h b/src/snmalloc/mem/corealloc.h index 5ec7bf1f3..526cc14f7 100644 --- a/src/snmalloc/mem/corealloc.h +++ b/src/snmalloc/mem/corealloc.h @@ -1,6 +1,7 @@ #pragma once #include "../ds/ds.h" +#include "allocstats.h" #include "check_init.h" #include "freelist.h" #include "metadata.h" @@ -47,6 +48,9 @@ namespace snmalloc return ::memset(p, 0, round_size(size)); } + auto sc = size_to_sizeclass_full(size); + self->stats[sc].objects_allocated++; + return p; } @@ -181,6 +185,11 @@ namespace snmalloc */ Ticker ticker; + /** + * Tracks this allocators memory usage + */ + AllocStats stats; + /** * The message queue needs to be accessible from other threads * @@ -462,6 +471,9 @@ namespace snmalloc post(); } + // Push size to global statistics + RemoteDeallocCache::remote_inflight -= bytes_freed; + return action(args...); } @@ -513,16 +525,15 @@ namespace snmalloc freelist::Object::key_root, entry.get_slab_metadata()->as_key_tweak(), domesticate); - if (!need_post && !remote_dealloc_cache.reserve_space(entry, nelem)) - { - need_post = true; - } + + need_post |= remote_dealloc_cache.reserve_space(entry, nelem); + remote_dealloc_cache.template forward( entry.get_remote()->trunc_id(), msg); } template - SNMALLOC_FAST_PATH static auto dealloc_local_objects_fast( + SNMALLOC_FAST_PATH auto dealloc_local_objects_fast( capptr::Alloc msg, const PagemapEntry& entry, BackendSlabMetadata* meta, @@ -548,6 +559,8 @@ namespace snmalloc bytes_freed += objsize * length; + stats[entry.get_sizeclass()].objects_deallocated += static_cast(length); + // Update the head and the next pointer in the free list. meta->free_queue.append_segment( curr, @@ -636,6 +649,7 @@ namespace snmalloc if (SNMALLOC_LIKELY(!fl->empty())) { auto p = fl->take(key, domesticate); + stats[sizeclass].objects_allocated++; return finish_alloc(p, size); } @@ -721,6 +735,9 @@ namespace snmalloc address_cast(chunk), freelist::Object::key_root); self->laden.insert(meta); + auto sc = size_to_sizeclass_full(size); + self->stats[sc].slabs_allocated++; + // Make capptr system happy we are allowed to pass this to // `success`. auto p = capptr_reveal( @@ -866,6 +883,7 @@ namespace snmalloc } auto r = finish_alloc(p, size); + stats[sizeclass].slabs_allocated++; return ticker.check_tick(r); }, [](Allocator* a, size_t size) SNMALLOC_FAST_PATH_LAMBDA { @@ -1039,6 +1057,7 @@ namespace snmalloc */ if (SNMALLOC_LIKELY(public_state() == entry.get_remote())) { + stats[entry.get_sizeclass()].objects_deallocated++; dealloc_cheri_checks(p_tame.unsafe_ptr()); dealloc_local_object(p_tame, entry); return; @@ -1107,6 +1126,8 @@ namespace snmalloc // Remove from set of fully used slabs. meta->node.remove(); + stats[entry.get_sizeclass()].slabs_deallocated++; + Config::Backend::dealloc_chunk( get_backend_local_state(), *meta, p, size, entry.get_sizeclass()); @@ -1207,6 +1228,8 @@ namespace snmalloc // don't touch the cache lines at this point in snmalloc_check_client. auto start = clear_slab(meta, sizeclass); + stats[sizeclass].slabs_deallocated++; + Config::Backend::dealloc_chunk( get_backend_local_state(), *meta, @@ -1375,7 +1398,7 @@ namespace snmalloc return capptr_domesticate(local_state, p); }; - size_t bytes_flushed = 0; // Not currently used. + size_t bytes_flushed = 0; if (destroy_queue) { @@ -1388,6 +1411,8 @@ namespace snmalloc entry, m, need_post, domesticate, bytes_flushed); }; + RemoteDeallocCache::remote_inflight -= bytes_flushed; + message_queue().destroy_and_iterate(domesticate, cb); } else @@ -1438,8 +1463,6 @@ namespace snmalloc } }); } - // Set the remote_dealloc_cache to immediately slow path. - remote_dealloc_cache.capacity = 0; return posted; } @@ -1508,6 +1531,11 @@ namespace snmalloc #endif return sent_something; } + + const AllocStats& get_stats() + { + return stats; + } }; template diff --git a/src/snmalloc/mem/pool.h b/src/snmalloc/mem/pool.h index 2a94ec440..ebb2f85ff 100644 --- a/src/snmalloc/mem/pool.h +++ b/src/snmalloc/mem/pool.h @@ -32,9 +32,15 @@ namespace snmalloc FlagWord lock{}; capptr::Alloc list{nullptr}; + std::atomic count{0}; public: constexpr PoolState() = default; + + size_t get_count() + { + return count.load(std::memory_order_relaxed); + } }; /** @@ -124,6 +130,8 @@ namespace snmalloc p->list_next = pool.list; pool.list = p; + pool.count++; + p->set_in_use(); }); return p.unsafe_ptr(); diff --git a/src/snmalloc/mem/remotecache.h b/src/snmalloc/mem/remotecache.h index 045ab141b..5e75615bf 100644 --- a/src/snmalloc/mem/remotecache.h +++ b/src/snmalloc/mem/remotecache.h @@ -194,14 +194,17 @@ namespace snmalloc RemoteDeallocCacheBatchingImpl batching; + static inline Stat remote_inflight; + /** - * The total amount of memory we are waiting for before we will dispatch - * to other allocators. Zero can mean we have not initialised the allocator - * yet. This is initialised to the 0 so that we always hit a slow path to - * start with, when we hit the slow path and need to dispatch everything, we - * can check if we are a real allocator and lazily provide a real allocator. + * The total amount of bytes of memory in the cache. + * + * REMOTE_CACHE is used as the initial value, so that we always hit a slow + * path to start with, when we hit the slow path and need to dispatch + * everything, we can check if we are a real allocator and lazily provide a + * real allocator. */ - int64_t capacity{0}; + size_t cache_bytes{REMOTE_CACHE}; #ifndef NDEBUG bool initialised = false; @@ -236,10 +239,8 @@ namespace snmalloc auto size = n * static_cast(sizeclass_full_to_size(entry.get_sizeclass())); - bool result = capacity > size; - if (result) - capacity -= size; - return result; + cache_bytes += size; + return cache_bytes < REMOTE_CACHE; } template @@ -288,6 +289,9 @@ namespace snmalloc return capptr_domesticate(local_state, p); }; + // We are about to post cache_bytes bytes to other allocators. + remote_inflight += cache_bytes; + batching.close_all([this]( RemoteAllocator::alloc_id_t target_id, capptr::Alloc msg) { @@ -357,7 +361,7 @@ namespace snmalloc } // Reset capacity as we have emptied everything - capacity = REMOTE_CACHE; + cache_bytes = 0; return sent_something; } @@ -382,9 +386,8 @@ namespace snmalloc // a null address. l.init(0, RemoteAllocator::key_global, NO_KEY_TWEAK); } - capacity = REMOTE_CACHE; - batching.init(); + cache_bytes = 0; } }; } // namespace snmalloc diff --git a/src/test/func/alloc_churn/alloc_churn.cc b/src/test/func/alloc_churn/alloc_churn.cc new file mode 100644 index 000000000..a2d6dd436 --- /dev/null +++ b/src/test/func/alloc_churn/alloc_churn.cc @@ -0,0 +1,32 @@ +#include "snmalloc/snmalloc.h" + +#include + +void test_step() +{ + auto b = snmalloc::get_scoped_allocator(); + auto a = snmalloc::get_scoped_allocator(); + + for (size_t j = 0; j < 32; j++) + for (size_t i = 0; i < 20; i++) + { + auto p = a->alloc(snmalloc::bits::one_at_bit(i)); + if (p != nullptr) + b->dealloc(p); + p = b->alloc(snmalloc::bits::one_at_bit(i)); + if (p != nullptr) + a->dealloc(p); + } +} + +int main() +{ + for (size_t i = 0; i < 10000; i++) + { + if (i % 1000 == 0) { + std::cout << "Step " << i << std::endl; + snmalloc::print_alloc_stats(); + } + test_step(); + } +} \ No newline at end of file diff --git a/src/test/func/cleanup/cleanup.cc b/src/test/func/cleanup/cleanup.cc new file mode 100644 index 000000000..5e3666dc8 --- /dev/null +++ b/src/test/func/cleanup/cleanup.cc @@ -0,0 +1,61 @@ +#include +#include +#include +#include + +void ecall() +{ + auto a = snmalloc::get_scoped_allocator(); + std::vector allocs; + for (size_t j = 0; j < 1000; j++) + { + allocs.push_back(a->alloc(j % 1024)); + } + auto p = a->alloc(1 * 1024 * 1024); + memset(p, 0, 1 * 1024 * 1024); + + for (size_t j = 0; j < allocs.size(); j++) + a->dealloc(allocs[j]); + + a->dealloc(p); +} + +void thread_body() +{ + for (int i = 0; i < 1000; i++) + { + ecall(); + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } +} + +void monitor_body() +{ + for (int i = 0; i < 60; i++) + { + std::cout << "Current: " + << snmalloc::Alloc::Config::Backend::get_current_usage() + << std::endl; + std::cout << "Peak : " + << snmalloc::Alloc::Config::Backend::get_peak_usage() + << std::endl; + std::cout << "Allocs : " << snmalloc::Alloc::Config::pool().get_count() + << std::endl; + std::cout << "--------------------------------------------" << std::endl; + std::this_thread::sleep_for(std::chrono::seconds(1)); + } +} + +int main() +{ + std::vector threads; + for (int i = 0; i < 8; i++) + { + threads.push_back(std::thread(thread_body)); + } + threads.push_back(std::thread(monitor_body)); + + for (auto& t : threads) + t.join(); + return 0; +} \ No newline at end of file diff --git a/src/test/func/statistics/stats.cc b/src/test/func/statistics/stats.cc index d66f060a1..032884a0d 100644 --- a/src/test/func/statistics/stats.cc +++ b/src/test/func/statistics/stats.cc @@ -11,6 +11,7 @@ void debug_check_empty_1() auto r = snmalloc::alloc(size); snmalloc::debug_check_empty(&result); + snmalloc::print_alloc_stats(); if (result != false) { std::cout << "debug_check_empty failed to detect leaked memory:" << size @@ -18,7 +19,11 @@ void debug_check_empty_1() abort(); } - snmalloc::dealloc(r); +snmalloc::print_alloc_stats(); + +snmalloc::dealloc(r); + + snmalloc::print_alloc_stats(); snmalloc::debug_check_empty(&result); if (result != true) @@ -27,8 +32,12 @@ void debug_check_empty_1() abort(); } + snmalloc::print_alloc_stats(); + r = snmalloc::alloc(size); + snmalloc::print_alloc_stats(); + snmalloc::debug_check_empty(&result); if (result != false) { @@ -37,14 +46,20 @@ void debug_check_empty_1() abort(); } + snmalloc::print_alloc_stats(); + snmalloc::dealloc(r); + snmalloc::print_alloc_stats(); + snmalloc::debug_check_empty(&result); if (result != true) { std::cout << "debug_check_empty failed to say empty:" << size << std::endl; abort(); } + + snmalloc::print_alloc_stats(); } template diff --git a/src/test/perf/batchblitz/batchblitz.cc b/src/test/perf/batchblitz/batchblitz.cc new file mode 100644 index 000000000..496bc5a5d --- /dev/null +++ b/src/test/perf/batchblitz/batchblitz.cc @@ -0,0 +1,94 @@ +#include +#include +#include +#include + +size_t threads{0}; +size_t memory{0}; +size_t iterations{0}; + +// Global barrier for synchronising threads. +std::atomic barrier{0}; +std::atomic incarnation{0}; + +std::atomic stop{false}; + +std::vector> allocations; + +NOINLINE bool wait() +{ + auto old_incarnation = incarnation.load(); + // Register we have arrived at the barrier. + if (--barrier == 0) + { + printf("."); + fflush(stdout); + barrier = threads; + incarnation++; + return stop; + } + + while (incarnation.load() == old_incarnation) + { + if (stop) + return true; + snmalloc::Aal::pause(); + } + + return stop; +} + +void thread_func(size_t tid) +{ + size_t size = 4097; + size_t mem = memory / size; + for (size_t j = 0; j < iterations; j++) + { + if (wait()) + return; + std::vector& allocs = allocations[tid]; + for (size_t i = 0; i < mem; i++) + { + allocs.push_back(snmalloc::alloc(4097)); + } + if (wait()) + return; + std::vector& deallocs = allocations[(tid + 1) % threads]; + for (auto p : deallocs) + { + snmalloc::dealloc(p); + } + deallocs.clear(); + } +} + +int main() +{ + threads = std::thread::hardware_concurrency(); + barrier = threads; + + if (snmalloc::DefaultPal::address_bits == 32) + memory = snmalloc::bits::one_at_bit(30) / threads; + else + memory = snmalloc::bits::one_at_bit(32) / threads; + iterations = 1000; + + for (size_t i = 0; i < threads; i++) + allocations.emplace_back(); + + std::vector thread_pool; + for (size_t i = 0; i < threads; i++) + thread_pool.emplace_back(thread_func, i); + + for (size_t i = 0; i < 30; i++) + { + std::this_thread::sleep_for(std::chrono::seconds(1)); + snmalloc::print_alloc_stats(); + } + stop = true; + + + + for (auto& t : thread_pool) + t.join(); +} diff --git a/src/test/perf/churn/churn.cc b/src/test/perf/churn/churn.cc new file mode 100644 index 000000000..1a540c0d9 --- /dev/null +++ b/src/test/perf/churn/churn.cc @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include + +int main() +{ + std::vector threads; + std::atomic running; + snmalloc::Stat requests; + std::atomic done{false}; + + for (size_t i = 0; i < 16; i++) + { + threads.push_back(std::thread([&running, &requests, &done]() { + std::queue q; + while (!done) + { + snmalloc::ScopedAllocator alloc; + running++; + + if (rand() % 1000 == 0) + { + // Deallocate everything in the queue + while (q.size() > 0) + { + auto p = q.front(); + requests -= *p; + alloc->dealloc(p); + q.pop(); + } + } + + for (size_t j = 0; j < 1000; j++) + { + if (q.size() >= 20000 || (q.size() > 0 && (rand() % 10 == 0))) + { + auto p = q.front(); + requests -= *p; + alloc->dealloc(p); + q.pop(); + } + else + { + size_t size = + (rand() % 1024 == 0) ? 16 * 1024 * (1 << (rand() % 3)) : 48; + requests += size; + auto p = (size_t*)alloc->alloc(size); + *p = size; + q.push(p); + } + } + + running--; + std::this_thread::sleep_for(std::chrono::microseconds(rand() % 2000)); + } + })); + } + + std::thread([&requests]() { + size_t count = 0; + while (count < 60) + { + count++; + std::this_thread::sleep_for(std::chrono::seconds(1)); + // std::cout << "Inflight: " << + // snmalloc::RemoteDeallocCache::remote_inflight << std::endl; std::cout + // << "Current reservation: " << snmalloc::Globals::get_current_usage() << + // std::endl; std::cout << "Peak reservation: " << + // snmalloc::Globals::get_peak_usage() << std::endl; std::cout << + // "Allocator count: " << snmalloc::Globals::pool().get_count() << + // std::endl; std::cout << "Running threads: " << running << + // std::endl; std::cout << "Index: " << count << std::endl; + // std::cout << "------------------------------------------" << std::endl; + std::cout << count << "," + << snmalloc::Alloc::Config::Backend::get_peak_usage() << "," + << snmalloc::Alloc::Config::Backend::get_current_usage() << "," + << requests.get_curr() << "," << requests.get_peak() << "," + << snmalloc::RemoteDeallocCache::remote_inflight.get_peak() + << "," + << snmalloc::RemoteDeallocCache::remote_inflight.get_curr() + << std::endl; + snmalloc::print_alloc_stats(); + } + }).join(); + + done = true; + + for (auto& t : threads) + t.join(); + + return 0; +} \ No newline at end of file From efb4263a4cf3fb57cfda8b189c5d913e946a46e4 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Fri, 21 Mar 2025 17:26:26 +0000 Subject: [PATCH 03/19] conversion fix. --- src/snmalloc/mem/remotecache.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/snmalloc/mem/remotecache.h b/src/snmalloc/mem/remotecache.h index 5e75615bf..799ed4f5e 100644 --- a/src/snmalloc/mem/remotecache.h +++ b/src/snmalloc/mem/remotecache.h @@ -236,8 +236,7 @@ namespace snmalloc { static_assert(sizeof(n) * 8 > MAX_CAPACITY_BITS); - auto size = - n * static_cast(sizeclass_full_to_size(entry.get_sizeclass())); + size_t size = n * sizeclass_full_to_size(entry.get_sizeclass()); cache_bytes += size; return cache_bytes < REMOTE_CACHE; From fcfbd25f74f93216555c25c448c2c6c472571300 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Fri, 21 Mar 2025 17:32:19 +0000 Subject: [PATCH 04/19] Fix header --- src/snmalloc/ds_core/stats.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/snmalloc/ds_core/stats.h b/src/snmalloc/ds_core/stats.h index 55bdd2dd6..57ffcc9a4 100644 --- a/src/snmalloc/ds_core/stats.h +++ b/src/snmalloc/ds_core/stats.h @@ -1,7 +1,8 @@ -#include "defines.h" +#pragma once -#include -#include +#include "defines.h" +#include "snmalloc/stl/atomic.h" +#include "stddef.h" namespace snmalloc { From 229a3308415fd5a10186abadc4dd04f8667e0f52 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Fri, 21 Mar 2025 17:32:39 +0000 Subject: [PATCH 05/19] Clangformat --- src/snmalloc/global/globalalloc.h | 3 ++- src/snmalloc/mem/corealloc.h | 3 ++- src/test/func/alloc_churn/alloc_churn.cc | 3 ++- src/test/func/statistics/stats.cc | 4 ++-- src/test/perf/batchblitz/batchblitz.cc | 2 -- src/test/perf/churn/churn.cc | 21 ++++++++++++--------- 6 files changed, 20 insertions(+), 16 deletions(-) diff --git a/src/snmalloc/global/globalalloc.h b/src/snmalloc/global/globalalloc.h index a977d2d14..b06ce8bd1 100644 --- a/src/snmalloc/global/globalalloc.h +++ b/src/snmalloc/global/globalalloc.h @@ -85,7 +85,8 @@ namespace snmalloc } if (result == nullptr) - SNMALLOC_CHECK(RemoteDeallocCache::remote_inflight.get_curr() == 0); + SNMALLOC_CHECK( + RemoteDeallocCache::remote_inflight.get_curr() == 0); if (result != nullptr) { diff --git a/src/snmalloc/mem/corealloc.h b/src/snmalloc/mem/corealloc.h index 526cc14f7..c50b3262d 100644 --- a/src/snmalloc/mem/corealloc.h +++ b/src/snmalloc/mem/corealloc.h @@ -559,7 +559,8 @@ namespace snmalloc bytes_freed += objsize * length; - stats[entry.get_sizeclass()].objects_deallocated += static_cast(length); + stats[entry.get_sizeclass()].objects_deallocated += + static_cast(length); // Update the head and the next pointer in the free list. meta->free_queue.append_segment( diff --git a/src/test/func/alloc_churn/alloc_churn.cc b/src/test/func/alloc_churn/alloc_churn.cc index a2d6dd436..23d3c3015 100644 --- a/src/test/func/alloc_churn/alloc_churn.cc +++ b/src/test/func/alloc_churn/alloc_churn.cc @@ -23,7 +23,8 @@ int main() { for (size_t i = 0; i < 10000; i++) { - if (i % 1000 == 0) { + if (i % 1000 == 0) + { std::cout << "Step " << i << std::endl; snmalloc::print_alloc_stats(); } diff --git a/src/test/func/statistics/stats.cc b/src/test/func/statistics/stats.cc index 032884a0d..3bedcd55b 100644 --- a/src/test/func/statistics/stats.cc +++ b/src/test/func/statistics/stats.cc @@ -19,9 +19,9 @@ void debug_check_empty_1() abort(); } -snmalloc::print_alloc_stats(); + snmalloc::print_alloc_stats(); -snmalloc::dealloc(r); + snmalloc::dealloc(r); snmalloc::print_alloc_stats(); diff --git a/src/test/perf/batchblitz/batchblitz.cc b/src/test/perf/batchblitz/batchblitz.cc index 496bc5a5d..3dce75353 100644 --- a/src/test/perf/batchblitz/batchblitz.cc +++ b/src/test/perf/batchblitz/batchblitz.cc @@ -87,8 +87,6 @@ int main() } stop = true; - - for (auto& t : thread_pool) t.join(); } diff --git a/src/test/perf/churn/churn.cc b/src/test/perf/churn/churn.cc index 1a540c0d9..910204a1d 100644 --- a/src/test/perf/churn/churn.cc +++ b/src/test/perf/churn/churn.cc @@ -65,7 +65,8 @@ int main() count++; std::this_thread::sleep_for(std::chrono::seconds(1)); // std::cout << "Inflight: " << - // snmalloc::RemoteDeallocCache::remote_inflight << std::endl; std::cout + // snmalloc::RemoteDeallocCache::remote_inflight << + // std::endl; std::cout // << "Current reservation: " << snmalloc::Globals::get_current_usage() << // std::endl; std::cout << "Peak reservation: " << // snmalloc::Globals::get_peak_usage() << std::endl; std::cout << @@ -73,14 +74,16 @@ int main() // std::endl; std::cout << "Running threads: " << running << // std::endl; std::cout << "Index: " << count << std::endl; // std::cout << "------------------------------------------" << std::endl; - std::cout << count << "," - << snmalloc::Alloc::Config::Backend::get_peak_usage() << "," - << snmalloc::Alloc::Config::Backend::get_current_usage() << "," - << requests.get_curr() << "," << requests.get_peak() << "," - << snmalloc::RemoteDeallocCache::remote_inflight.get_peak() - << "," - << snmalloc::RemoteDeallocCache::remote_inflight.get_curr() - << std::endl; + std::cout + << count << "," << snmalloc::Alloc::Config::Backend::get_peak_usage() + << "," << snmalloc::Alloc::Config::Backend::get_current_usage() << "," + << requests.get_curr() << "," << requests.get_peak() << "," + << snmalloc::RemoteDeallocCache::remote_inflight + .get_peak() + << "," + << snmalloc::RemoteDeallocCache::remote_inflight + .get_curr() + << std::endl; snmalloc::print_alloc_stats(); } }).join(); From ee46a3ffbfc2e4f51a7c43da9f8c9d82b4e4100b Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Fri, 21 Mar 2025 17:58:26 +0000 Subject: [PATCH 06/19] Do not write to the default allocators state. --- src/snmalloc/mem/remotecache.h | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/snmalloc/mem/remotecache.h b/src/snmalloc/mem/remotecache.h index 799ed4f5e..bd05e7a20 100644 --- a/src/snmalloc/mem/remotecache.h +++ b/src/snmalloc/mem/remotecache.h @@ -206,9 +206,7 @@ namespace snmalloc */ size_t cache_bytes{REMOTE_CACHE}; -#ifndef NDEBUG bool initialised = false; -#endif /// Used to find the index into the array of queues for remote /// deallocation @@ -238,8 +236,21 @@ namespace snmalloc size_t size = n * sizeclass_full_to_size(entry.get_sizeclass()); - cache_bytes += size; - return cache_bytes < REMOTE_CACHE; + size_t new_cache_bytes = cache_bytes + size; + if (SNMALLOC_UNLIKELY(new_cache_bytes > REMOTE_CACHE)) + { + // Check if this is the default allocator, and if not, we + // can update the state. + if (initialised) + { + cache_bytes = new_cache_bytes; + } + + return false; + } + + cache_bytes = new_cache_bytes; + return true; } template @@ -376,9 +387,8 @@ namespace snmalloc */ void init() { -#ifndef NDEBUG initialised = true; -#endif + for (auto& l : list) { // We do not need to initialise with a particular slab, so pass From 40e84f580233eec4439fcb23b78fe0961566eab3 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Sat, 22 Mar 2025 21:13:12 +0000 Subject: [PATCH 07/19] temporarily disable test to get a cleaner CI run --- src/snmalloc/global/globalalloc.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/snmalloc/global/globalalloc.h b/src/snmalloc/global/globalalloc.h index b06ce8bd1..dfb679d89 100644 --- a/src/snmalloc/global/globalalloc.h +++ b/src/snmalloc/global/globalalloc.h @@ -84,9 +84,10 @@ namespace snmalloc } } - if (result == nullptr) - SNMALLOC_CHECK( - RemoteDeallocCache::remote_inflight.get_curr() == 0); + // Check why this doesn't hold. + // if (result == nullptr) + // SNMALLOC_CHECK( + // RemoteDeallocCache::remote_inflight.get_curr() == 0); if (result != nullptr) { From 5dca6922327ecd77d787c1ec3f5c4bfa1d5c7a11 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Sat, 22 Mar 2025 21:43:00 +0000 Subject: [PATCH 08/19] Change headers slightly. --- src/snmalloc/ds_aal/seqset.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/snmalloc/ds_aal/seqset.h b/src/snmalloc/ds_aal/seqset.h index 6046bca70..d59c53530 100644 --- a/src/snmalloc/ds_aal/seqset.h +++ b/src/snmalloc/ds_aal/seqset.h @@ -1,7 +1,7 @@ #pragma once #include "../aal/aal.h" -#include "../ds_core/ds_core.h" +#include "defines.h" #include "snmalloc/stl/type_traits.h" #include "snmalloc/stl/utility.h" From b8f369c55207f8c03a8bcd06a99d202a1d59c3bb Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Sat, 22 Mar 2025 21:54:25 +0000 Subject: [PATCH 09/19] Move seqset as it uses pointeroffset and that is aal. --- src/snmalloc/ds_aal/seqset.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/snmalloc/ds_aal/seqset.h b/src/snmalloc/ds_aal/seqset.h index d59c53530..0ad18fb4d 100644 --- a/src/snmalloc/ds_aal/seqset.h +++ b/src/snmalloc/ds_aal/seqset.h @@ -1,7 +1,6 @@ #pragma once -#include "../aal/aal.h" -#include "defines.h" +#include "../ds_core/ds_core.h" #include "snmalloc/stl/type_traits.h" #include "snmalloc/stl/utility.h" From 1ed9ab2b40faaee74d22893f2eea881a747f756d Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Sat, 22 Mar 2025 22:14:30 +0000 Subject: [PATCH 10/19] Fix stl --- src/snmalloc/ds_core/stats.h | 21 +++++++++++---------- src/snmalloc/global/globalalloc.h | 2 +- src/snmalloc/mem/pool.h | 4 ++-- src/snmalloc/stl/gnu/atomic.h | 2 +- 4 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/snmalloc/ds_core/stats.h b/src/snmalloc/ds_core/stats.h index 57ffcc9a4..f949dda5d 100644 --- a/src/snmalloc/ds_core/stats.h +++ b/src/snmalloc/ds_core/stats.h @@ -18,8 +18,9 @@ namespace snmalloc public: void increase(size_t amount) { - size_t c = (curr += amount); - size_t p = peak.load(std::memory_order_relaxed); + size_t old = curr.fetch_add(amount); + size_t c = old + amount; + size_t p = peak.load(stl::memory_order_relaxed); while (c > p) { if (peak.compare_exchange_strong(p, c)) @@ -37,12 +38,12 @@ namespace snmalloc size_t get_curr() { - return curr.load(std::memory_order_relaxed); + return curr.load(stl::memory_order_relaxed); } size_t get_peak() { - return peak.load(std::memory_order_relaxed); + return peak.load(stl::memory_order_relaxed); } void operator+=(size_t amount) @@ -71,28 +72,28 @@ namespace snmalloc */ class MonotoneLocalStat { - std::atomic value{0}; + stl::Atomic value{0}; public: void operator++(int) { - value.fetch_add(1, std::memory_order_relaxed); + value.fetch_add(1, stl::memory_order_relaxed); } void operator+=(const MonotoneLocalStat& other) { - auto v = other.value.load(std::memory_order_relaxed); - value.fetch_add(v, std::memory_order_relaxed); + auto v = other.value.load(stl::memory_order_relaxed); + value.fetch_add(v, stl::memory_order_relaxed); } void operator+=(size_t v) { - value.fetch_add(v, std::memory_order_relaxed); + value.fetch_add(v, stl::memory_order_relaxed); } size_t operator*() { - return value.load(std::memory_order_relaxed); + return value.load(stl::memory_order_relaxed); } }; } // namespace snmalloc diff --git a/src/snmalloc/global/globalalloc.h b/src/snmalloc/global/globalalloc.h index dfb679d89..66ebb125c 100644 --- a/src/snmalloc/global/globalalloc.h +++ b/src/snmalloc/global/globalalloc.h @@ -147,7 +147,7 @@ namespace snmalloc template inline static void print_alloc_stats() { - static std::atomic dump{0}; + static stl::Atomic dump{0}; auto l_dump = dump++; if (l_dump == 0) diff --git a/src/snmalloc/mem/pool.h b/src/snmalloc/mem/pool.h index ebb2f85ff..6e6354734 100644 --- a/src/snmalloc/mem/pool.h +++ b/src/snmalloc/mem/pool.h @@ -32,14 +32,14 @@ namespace snmalloc FlagWord lock{}; capptr::Alloc list{nullptr}; - std::atomic count{0}; + stl::Atomic count{0}; public: constexpr PoolState() = default; size_t get_count() { - return count.load(std::memory_order_relaxed); + return count.load(stl::memory_order_relaxed); } }; diff --git a/src/snmalloc/stl/gnu/atomic.h b/src/snmalloc/stl/gnu/atomic.h index 7a193972e..82fff30d9 100644 --- a/src/snmalloc/stl/gnu/atomic.h +++ b/src/snmalloc/stl/gnu/atomic.h @@ -89,7 +89,7 @@ namespace snmalloc return load(); } - SNMALLOC_FAST_PATH T load(MemoryOrder mem_ord = MemoryOrder::SEQ_CST) + SNMALLOC_FAST_PATH T load(MemoryOrder mem_ord = MemoryOrder::SEQ_CST) const { T res; __atomic_load(addressof(val), addressof(res), order(mem_ord)); From be43d4cd9eac002f572cdc8c07e4aa4d705eb84a Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Sun, 23 Mar 2025 08:50:19 +0000 Subject: [PATCH 11/19] stl const? --- src/snmalloc/stl/gnu/atomic.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/snmalloc/stl/gnu/atomic.h b/src/snmalloc/stl/gnu/atomic.h index 82fff30d9..83fb8ce57 100644 --- a/src/snmalloc/stl/gnu/atomic.h +++ b/src/snmalloc/stl/gnu/atomic.h @@ -63,6 +63,11 @@ namespace snmalloc return __builtin_addressof(ref); } + SNMALLOC_FAST_PATH static const T* addressof(const T& ref) + { + return __builtin_addressof(ref); + } + // From libc++: // require types that are 1, 2, 4, 8, or 16 bytes in length to be aligned // to at least their size to be potentially From b3d9ff8e79d7e07f086b213f42414fcbc1fbc896 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Sun, 23 Mar 2025 09:04:12 +0000 Subject: [PATCH 12/19] Fixing CI --- src/snmalloc/stl/gnu/atomic.h | 3 ++- src/test/func/alloc_churn/alloc_churn.cc | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/snmalloc/stl/gnu/atomic.h b/src/snmalloc/stl/gnu/atomic.h index 83fb8ce57..bb7145f65 100644 --- a/src/snmalloc/stl/gnu/atomic.h +++ b/src/snmalloc/stl/gnu/atomic.h @@ -94,7 +94,8 @@ namespace snmalloc return load(); } - SNMALLOC_FAST_PATH T load(MemoryOrder mem_ord = MemoryOrder::SEQ_CST) const + SNMALLOC_FAST_PATH T + load(MemoryOrder mem_ord = MemoryOrder::SEQ_CST) const { T res; __atomic_load(addressof(val), addressof(res), order(mem_ord)); diff --git a/src/test/func/alloc_churn/alloc_churn.cc b/src/test/func/alloc_churn/alloc_churn.cc index 23d3c3015..1cc320ea8 100644 --- a/src/test/func/alloc_churn/alloc_churn.cc +++ b/src/test/func/alloc_churn/alloc_churn.cc @@ -21,9 +21,9 @@ void test_step() int main() { - for (size_t i = 0; i < 10000; i++) + for (size_t i = 0; i < 1000; i++) { - if (i % 1000 == 0) + if (i % 100 == 0) { std::cout << "Step " << i << std::endl; snmalloc::print_alloc_stats(); From e395751357156d556403898d47fa04fb732ace6d Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Mon, 24 Mar 2025 16:21:24 +0000 Subject: [PATCH 13/19] Fix inflight check --- src/snmalloc/global/globalalloc.h | 12 ++++++++---- src/snmalloc/mem/corealloc.h | 4 ++-- src/test/func/alloc_churn/alloc_churn.cc | 1 + 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/snmalloc/global/globalalloc.h b/src/snmalloc/global/globalalloc.h index 66ebb125c..582454470 100644 --- a/src/snmalloc/global/globalalloc.h +++ b/src/snmalloc/global/globalalloc.h @@ -84,10 +84,14 @@ namespace snmalloc } } - // Check why this doesn't hold. - // if (result == nullptr) - // SNMALLOC_CHECK( - // RemoteDeallocCache::remote_inflight.get_curr() == 0); + if ( + result == nullptr && + RemoteDeallocCache::remote_inflight.get_curr() != 0) + { + report_fatal_error( + "debug_check_empty: remote inflight deallocations left {}}", + RemoteDeallocCache::remote_inflight.get_curr()); + } if (result != nullptr) { diff --git a/src/snmalloc/mem/corealloc.h b/src/snmalloc/mem/corealloc.h index c50b3262d..fe5e66a2b 100644 --- a/src/snmalloc/mem/corealloc.h +++ b/src/snmalloc/mem/corealloc.h @@ -1412,9 +1412,9 @@ namespace snmalloc entry, m, need_post, domesticate, bytes_flushed); }; - RemoteDeallocCache::remote_inflight -= bytes_flushed; - message_queue().destroy_and_iterate(domesticate, cb); + + RemoteDeallocCache::remote_inflight -= bytes_flushed; } else { diff --git a/src/test/func/alloc_churn/alloc_churn.cc b/src/test/func/alloc_churn/alloc_churn.cc index 1cc320ea8..ebfe87774 100644 --- a/src/test/func/alloc_churn/alloc_churn.cc +++ b/src/test/func/alloc_churn/alloc_churn.cc @@ -27,6 +27,7 @@ int main() { std::cout << "Step " << i << std::endl; snmalloc::print_alloc_stats(); + snmalloc::debug_check_empty(); } test_step(); } From 5b4e542b6564505fd8891a0f2b8105c0ffbe8749 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Mon, 24 Mar 2025 20:30:18 +0000 Subject: [PATCH 14/19] Fix inflight statistic --- src/snmalloc/mem/corealloc.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/snmalloc/mem/corealloc.h b/src/snmalloc/mem/corealloc.h index fe5e66a2b..c9321b424 100644 --- a/src/snmalloc/mem/corealloc.h +++ b/src/snmalloc/mem/corealloc.h @@ -526,6 +526,10 @@ namespace snmalloc entry.get_slab_metadata()->as_key_tweak(), domesticate); + // Need to account for forwarded bytes. + size_t size = nelem * sizeclass_full_to_size(entry.get_sizeclass()); + bytes_returned += size; + need_post |= remote_dealloc_cache.reserve_space(entry, nelem); remote_dealloc_cache.template forward( @@ -1399,10 +1403,10 @@ namespace snmalloc return capptr_domesticate(local_state, p); }; - size_t bytes_flushed = 0; - if (destroy_queue) { + size_t bytes_flushed = 0; + auto cb = [this, domesticate, &bytes_flushed](capptr::Alloc m) { bool need_post = true; // Always going to post, so ignore. From f97e74183c8762f6314e0f7afd3ad4fe341dd530 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Mon, 24 Mar 2025 21:22:14 +0000 Subject: [PATCH 15/19] Remove interlocked from fast path. --- src/snmalloc/ds_core/stats.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/snmalloc/ds_core/stats.h b/src/snmalloc/ds_core/stats.h index f949dda5d..29d4c9a42 100644 --- a/src/snmalloc/ds_core/stats.h +++ b/src/snmalloc/ds_core/stats.h @@ -77,7 +77,8 @@ namespace snmalloc public: void operator++(int) { - value.fetch_add(1, stl::memory_order_relaxed); + auto old = value.load(stl::memory_order_relaxed); + value.store(old + 1, stl::memory_order_relaxed); } void operator+=(const MonotoneLocalStat& other) @@ -88,7 +89,8 @@ namespace snmalloc void operator+=(size_t v) { - value.fetch_add(v, stl::memory_order_relaxed); + auto old = value.load(stl::memory_order_relaxed); + value.store(old + v, stl::memory_order_relaxed); } size_t operator*() From 28c7f4b29c2a3926e1d27def1e46eee467878a45 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Tue, 25 Mar 2025 12:28:37 +0000 Subject: [PATCH 16/19] Fix comment --- src/snmalloc/mem/corealloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/snmalloc/mem/corealloc.h b/src/snmalloc/mem/corealloc.h index c9321b424..edb9d0fe0 100644 --- a/src/snmalloc/mem/corealloc.h +++ b/src/snmalloc/mem/corealloc.h @@ -589,7 +589,7 @@ namespace snmalloc * - alloc(size_t) * - small_alloc(size_t) * - gets allocation from a fast free list and is done. - * - if no fast free list, + * - otherwise no fast free list and calls small_alloc_slow * - check for message queue * - small_refill(size_t) * - If another free list is available, use it. From fa8814e342b82c369dbc3cda1d4f63165c7505c0 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Thu, 19 Feb 2026 15:22:07 +0000 Subject: [PATCH 17/19] WIP: Rebasing to latest snmalloc. --- src/snmalloc/ds_core/stats.h | 8 +++---- src/snmalloc/global/threadalloc.h | 2 +- src/snmalloc/mem/corealloc.h | 34 +++++++++++++++++------------- src/snmalloc/mem/freelist_queue.h | 2 +- src/snmalloc/mem/remoteallocator.h | 2 +- src/snmalloc/override/new.cc | 12 +++++++---- 6 files changed, 34 insertions(+), 26 deletions(-) diff --git a/src/snmalloc/ds_core/stats.h b/src/snmalloc/ds_core/stats.h index 29d4c9a42..37c5f5609 100644 --- a/src/snmalloc/ds_core/stats.h +++ b/src/snmalloc/ds_core/stats.h @@ -75,25 +75,25 @@ namespace snmalloc stl::Atomic value{0}; public: - void operator++(int) + SNMALLOC_FAST_PATH void operator++(int) { auto old = value.load(stl::memory_order_relaxed); value.store(old + 1, stl::memory_order_relaxed); } - void operator+=(const MonotoneLocalStat& other) + SNMALLOC_FAST_PATH void operator+=(const MonotoneLocalStat& other) { auto v = other.value.load(stl::memory_order_relaxed); value.fetch_add(v, stl::memory_order_relaxed); } - void operator+=(size_t v) + SNMALLOC_FAST_PATH void operator+=(size_t v) { auto old = value.load(stl::memory_order_relaxed); value.store(old + v, stl::memory_order_relaxed); } - size_t operator*() + SNMALLOC_FAST_PATH size_t operator*() { return value.load(stl::memory_order_relaxed); } diff --git a/src/snmalloc/global/threadalloc.h b/src/snmalloc/global/threadalloc.h index d037995e5..d170c631d 100644 --- a/src/snmalloc/global/threadalloc.h +++ b/src/snmalloc/global/threadalloc.h @@ -173,7 +173,7 @@ namespace snmalloc return s(); } - return check_init_slow(r, args...); + return check_init_slow(r, args...); } }; # ifdef SNMALLOC_USE_PTHREAD_DESTRUCTORS diff --git a/src/snmalloc/mem/corealloc.h b/src/snmalloc/mem/corealloc.h index edb9d0fe0..241b6bab9 100644 --- a/src/snmalloc/mem/corealloc.h +++ b/src/snmalloc/mem/corealloc.h @@ -33,7 +33,8 @@ namespace snmalloc class DefaultConts { public: - static void* success(void* p, size_t size, bool secondary_allocator = false) + template + SNMALLOC_FAST_PATH static void* success(Alloc* self, void* p, size_t size, bool secondary_allocator = false) { UNUSED(secondary_allocator); SNMALLOC_ASSERT(p != nullptr); @@ -54,7 +55,7 @@ namespace snmalloc return p; } - static void* failure(size_t size) noexcept + SNMALLOC_FAST_PATH static void* failure(size_t size) noexcept { UNUSED(size); // If we are here, then the allocation failed. @@ -68,11 +69,11 @@ namespace snmalloc using Zero = DefaultConts; using Uninit = DefaultConts; - template + template inline static SNMALLOC_FAST_PATH void* - finish_alloc(freelist::HeadPtr p, size_t size) + finish_alloc(Alloc* self, freelist::HeadPtr p, size_t size) { - return Conts::success(capptr_reveal(p.as_void()), size, false); + return Conts::success(self, capptr_reveal(p.as_void()), size, false); } struct FastFreeLists @@ -185,11 +186,13 @@ namespace snmalloc */ Ticker ticker; + public: /** * Tracks this allocators memory usage */ AllocStats stats; + private: /** * The message queue needs to be accessible from other threads * @@ -655,7 +658,7 @@ namespace snmalloc { auto p = fl->take(key, domesticate); stats[sizeclass].objects_allocated++; - return finish_alloc(p, size); + return finish_alloc(this, p, size); } return handle_message_queue( @@ -711,7 +714,7 @@ namespace snmalloc }); if (result != nullptr) { - return Conts::success(result, size, true); + return Conts::success(self, result, size, true); } // Grab slab of correct size @@ -747,7 +750,7 @@ namespace snmalloc // `success`. auto p = capptr_reveal( capptr_chunk_is_alloc(capptr_to_user_address_control(chunk))); - return Conts::success(p, size); + return Conts::success(self, p, size); } return Conts::failure(size); @@ -774,17 +777,18 @@ namespace snmalloc if (result != nullptr) { - result = Conts::success(result, size, true); - // We need to check for initialisation here in the case where // this is the first allocation in the system, so snmalloc has // not initialised the pagemap. If this allocation is subsequently // deallocated, before snmalloc is initialised, then it will fail // to access the pagemap. return CheckInit::check_init( - [result]() { return result; }, - [](Allocator*, void* result) { return result; }, - result); + [result, this, size]() { return Conts::success(this, result, size, true); }, + [](Allocator* self, void* result, size_t size) { + return Conts::success(self, result, size, true); + }, + result, + size); } // Look to see if we can grab a free list. @@ -826,7 +830,7 @@ namespace snmalloc laden.insert(meta); } - auto r = finish_alloc(p, size); + auto r = finish_alloc(this, p, size); return ticker.check_tick(r); } return small_refill_slow( @@ -887,7 +891,7 @@ namespace snmalloc laden.insert(meta); } - auto r = finish_alloc(p, size); + auto r = finish_alloc(this, p, size); stats[sizeclass].slabs_allocated++; return ticker.check_tick(r); }, diff --git a/src/snmalloc/mem/freelist_queue.h b/src/snmalloc/mem/freelist_queue.h index 452033249..b3fa5a2d9 100644 --- a/src/snmalloc/mem/freelist_queue.h +++ b/src/snmalloc/mem/freelist_queue.h @@ -82,7 +82,7 @@ namespace snmalloc } } - inline bool can_dequeue() + SNMALLOC_FAST_PATH bool can_dequeue() { return front.load(stl::memory_order_relaxed) != back.load(stl::memory_order_relaxed); diff --git a/src/snmalloc/mem/remoteallocator.h b/src/snmalloc/mem/remoteallocator.h index 57d7c31e0..97f9d0cfc 100644 --- a/src/snmalloc/mem/remoteallocator.h +++ b/src/snmalloc/mem/remoteallocator.h @@ -327,7 +327,7 @@ namespace snmalloc return list.destroy_and_iterate(domesticate, cbwrap); } - inline bool can_dequeue() + SNMALLOC_FAST_PATH bool can_dequeue() { return list.can_dequeue(); } diff --git a/src/snmalloc/override/new.cc b/src/snmalloc/override/new.cc index 7a97c6651..68d234303 100644 --- a/src/snmalloc/override/new.cc +++ b/src/snmalloc/override/new.cc @@ -28,12 +28,16 @@ namespace snmalloc class Base { public: - static void* - success(void* p, size_t size, bool secondary_allocator = false) + template + SNMALLOC_FAST_PATH static void* + success(Alloc* self, void* p, size_t size, bool secondary_allocator = false) { UNUSED(secondary_allocator, size); SNMALLOC_ASSERT(p != nullptr); + auto sc = size_to_sizeclass_full(size); + self->stats[sc].objects_allocated++; + SNMALLOC_ASSERT( secondary_allocator || is_start_of_object(size_to_sizeclass_full(size), address_cast(p))); @@ -45,7 +49,7 @@ namespace snmalloc class Throw : public Base { public: - static void* failure(size_t size) + SNMALLOC_FAST_PATH static void* failure(size_t size) { // Throw std::bad_alloc on failure. auto new_handler = std::get_new_handler(); @@ -67,7 +71,7 @@ namespace snmalloc class NoThrow : public Base { public: - static void* failure(size_t size) noexcept + SNMALLOC_FAST_PATH static void* failure(size_t size) noexcept { auto new_handler = std::get_new_handler(); if (new_handler != nullptr) From 3af40d2a06b60545e3e003881b722d1f78116343 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Thu, 19 Feb 2026 15:24:45 +0000 Subject: [PATCH 18/19] CF --- src/snmalloc/mem/corealloc.h | 7 +++++-- src/snmalloc/override/new.cc | 4 ++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/snmalloc/mem/corealloc.h b/src/snmalloc/mem/corealloc.h index 241b6bab9..9e51d1bac 100644 --- a/src/snmalloc/mem/corealloc.h +++ b/src/snmalloc/mem/corealloc.h @@ -34,7 +34,8 @@ namespace snmalloc { public: template - SNMALLOC_FAST_PATH static void* success(Alloc* self, void* p, size_t size, bool secondary_allocator = false) + SNMALLOC_FAST_PATH static void* + success(Alloc* self, void* p, size_t size, bool secondary_allocator = false) { UNUSED(secondary_allocator); SNMALLOC_ASSERT(p != nullptr); @@ -783,7 +784,9 @@ namespace snmalloc // deallocated, before snmalloc is initialised, then it will fail // to access the pagemap. return CheckInit::check_init( - [result, this, size]() { return Conts::success(this, result, size, true); }, + [result, this, size]() { + return Conts::success(this, result, size, true); + }, [](Allocator* self, void* result, size_t size) { return Conts::success(self, result, size, true); }, diff --git a/src/snmalloc/override/new.cc b/src/snmalloc/override/new.cc index 68d234303..396d95940 100644 --- a/src/snmalloc/override/new.cc +++ b/src/snmalloc/override/new.cc @@ -29,8 +29,8 @@ namespace snmalloc { public: template - SNMALLOC_FAST_PATH static void* - success(Alloc* self, void* p, size_t size, bool secondary_allocator = false) + SNMALLOC_FAST_PATH static void* success( + Alloc* self, void* p, size_t size, bool secondary_allocator = false) { UNUSED(secondary_allocator, size); SNMALLOC_ASSERT(p != nullptr); From 9a7d04b33b7e90c606eb84bf719843d0be8e74f9 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Thu, 19 Feb 2026 15:37:09 +0000 Subject: [PATCH 19/19] Fix inlining for failure. --- src/snmalloc/mem/corealloc.h | 2 +- src/snmalloc/override/new.cc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/snmalloc/mem/corealloc.h b/src/snmalloc/mem/corealloc.h index 9e51d1bac..68888f3ec 100644 --- a/src/snmalloc/mem/corealloc.h +++ b/src/snmalloc/mem/corealloc.h @@ -56,7 +56,7 @@ namespace snmalloc return p; } - SNMALLOC_FAST_PATH static void* failure(size_t size) noexcept + static void* failure(size_t size) noexcept { UNUSED(size); // If we are here, then the allocation failed. diff --git a/src/snmalloc/override/new.cc b/src/snmalloc/override/new.cc index 396d95940..bf0437dec 100644 --- a/src/snmalloc/override/new.cc +++ b/src/snmalloc/override/new.cc @@ -49,7 +49,7 @@ namespace snmalloc class Throw : public Base { public: - SNMALLOC_FAST_PATH static void* failure(size_t size) + static void* failure(size_t size) { // Throw std::bad_alloc on failure. auto new_handler = std::get_new_handler(); @@ -71,7 +71,7 @@ namespace snmalloc class NoThrow : public Base { public: - SNMALLOC_FAST_PATH static void* failure(size_t size) noexcept + static void* failure(size_t size) noexcept { auto new_handler = std::get_new_handler(); if (new_handler != nullptr)