Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 28 additions & 23 deletions include/iso_alloc_ds.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ typedef int64_t bitmap_index_t;
typedef uint16_t zone_lookup_table_t;
typedef uint16_t chunk_lookup_table_t;

#if ZONE_FREE_LIST_SZ > 255
#if ZONE_FREE_LIST_SZ >= 255
typedef uint16_t free_bit_slot_t;
#define FREE_LIST_SHF 16
#else
Expand All @@ -28,31 +28,36 @@ typedef uint8_t free_bit_slot_t;
#endif

typedef struct {
void *user_pages_start; /* Start of the pages backing this zone */
void *bitmap_start; /* Start of the bitmap */
int64_t next_free_bit_slot; /* The last bit slot returned by get_next_free_bit_slot */
bit_slot_t free_bit_slots[ZONE_FREE_LIST_SZ]; /* A cache of bit slots that point to freed chunks */
uint64_t canary_secret; /* Each zone has its own canary secret */
uint64_t pointer_mask; /* Each zone has its own pointer protection secret */
bitmap_index_t max_bitmap_idx; /* Max bitmap index for this bitmap */
uint32_t chunk_size; /* Size of chunks managed by this zone */
uint32_t bitmap_size; /* Size of the bitmap in bytes */
uint32_t af_count; /* Increment/Decrement with each alloc/free operation */
uint32_t chunk_count; /* Total number of chunks in this zone */
uint32_t alloc_count; /* Total number of lifetime allocations */
uint16_t index; /* Zone index */
uint16_t next_sz_index; /* What is the index of the next zone of this size */
free_bit_slot_t free_bit_slots_index; /* Tracks how many entries in the cache are filled */
free_bit_slot_t free_bit_slots_usable; /* The oldest members of the free cache are served first */
int8_t preallocated_bitmap_idx; /* The bitmap is preallocated and its index */
#if CPU_PIN
uint8_t cpu_core; /* What CPU core this zone is pinned to */
#endif
bool internal; /* Zones can be managed by iso_alloc or private */
bool is_full; /* Flags whether this zone is full to avoid bit slot searches */
/* Hot fields: all fit within the first 64-byte cache line.
* These are accessed on every alloc/free operation, so keeping
* them colocated avoids extra cache misses */
void *user_pages_start; /* Start of the pages backing this zone */
void *bitmap_start; /* Start of the bitmap */
int64_t next_free_bit_slot; /* The last bit slot returned by get_next_free_bit_slot */
uint64_t canary_secret; /* Each zone has its own canary secret */
uint64_t pointer_mask; /* Each zone has its own pointer protection secret */
bitmap_index_t max_bitmap_idx; /* Max bitmap index for this bitmap */
uint32_t chunk_size; /* Size of chunks managed by this zone */
free_bit_slot_t free_bit_slots_usable; /* The oldest members of the free cache are served first */
free_bit_slot_t free_bit_slots_index; /* Tracks how many entries in the cache are filled */
bool is_full; /* Flags whether this zone is full to avoid bit slot searches */
bool internal; /* Zones can be managed by iso_alloc or private */
#if MEMORY_TAGGING
bool tagged; /* Zone supports memory tagging */
#endif
int8_t preallocated_bitmap_idx; /* The bitmap is preallocated and its index */
#if CPU_PIN
uint8_t cpu_core; /* What CPU core this zone is pinned to */
#endif
/* Warm/cold fields: accessed less frequently */
uint32_t bitmap_size; /* Size of the bitmap in bytes */
uint32_t af_count; /* Increment/Decrement with each alloc/free operation */
uint32_t chunk_count; /* Total number of chunks in this zone */
uint32_t alloc_count; /* Total number of lifetime allocations */
uint16_t index; /* Zone index */
uint16_t next_sz_index; /* What is the index of the next zone of this size */
/* Large cold array: only accessed when refilling the free list */
bit_slot_t free_bit_slots[ZONE_FREE_LIST_SZ]; /* A cache of bit slots that point to freed chunks */
} __attribute__((packed, aligned(sizeof(int64_t)))) iso_alloc_zone_t;

/* Meta data for big allocations are allocated near the
Expand Down
93 changes: 46 additions & 47 deletions src/iso_alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -603,15 +603,19 @@ INTERNAL_HIDDEN void fill_free_bit_slots(iso_alloc_zone_t *zone) {
}
}
} else {
for(uint64_t j = 0; j < BITS_PER_QWORD; j += BITS_PER_CHUNK) {
if((GET_BIT(bts, j)) == 0) {
free_bit_slots[free_bit_slots_index] = (bm_idx_shf + j);
free_bit_slots_index++;
/* Use ctzll to skip directly to each free slot instead
* of iterating all 32 even-bit positions */
uint64_t free_mask = ~(uint64_t) bts & USED_BIT_VECTOR;

if(UNLIKELY(free_bit_slots_index >= ZONE_FREE_LIST_SZ)) {
break;
}
while(free_mask) {
free_bit_slots[free_bit_slots_index] = (bm_idx_shf + __builtin_ctzll(free_mask));
free_bit_slots_index++;

if(UNLIKELY(free_bit_slots_index >= ZONE_FREE_LIST_SZ)) {
break;
}

free_mask &= free_mask - 1; /* clear lowest set bit */
}
}
}
Expand Down Expand Up @@ -729,24 +733,19 @@ INTERNAL_HIDDEN bit_slot_t iso_scan_zone_free_slot_slow(iso_alloc_zone_t *zone)

for(bitmap_index_t i = 0; i < max; i += 2) {
int64x2_t im = vld1q_s64(&bm[i]);
int64_t i0 = vgetq_lane_s64(im, 0);

if(i0 != USED_BIT_VECTOR) {
for(int64_t j = 0; j < BITS_PER_QWORD; j += BITS_PER_CHUNK) {
if((GET_BIT(i0, j)) == 0) {
return ((i << BITS_PER_QWORD_SHIFT) + j);
}
}
/* Use ctzll to find the first free slot in O(1) instead of
* scanning bit-by-bit. USED_BIT_VECTOR selects even-position
* bits (one per chunk); inverting gives 1s where chunks are
* free (use bitwise ops on multiple values) */
uint64_t free_mask0 = ~(uint64_t) vgetq_lane_s64(im, 0) & USED_BIT_VECTOR;
if(free_mask0) {
return ((i << BITS_PER_QWORD_SHIFT) + __builtin_ctzll(free_mask0));
}

int64_t i1 = vgetq_lane_s64(im, 1);

if(i1 != USED_BIT_VECTOR) {
for(int64_t j = 0; j < BITS_PER_QWORD; j += BITS_PER_CHUNK) {
if((GET_BIT(i1, j)) == 0) {
return (((i + 1) << BITS_PER_QWORD_SHIFT) + j);
}
}
uint64_t free_mask1 = ~(uint64_t) vgetq_lane_s64(im, 1) & USED_BIT_VECTOR;
if(free_mask1) {
return (((i + 1) << BITS_PER_QWORD_SHIFT) + __builtin_ctzll(free_mask1));
}
}

Expand All @@ -758,28 +757,26 @@ INTERNAL_HIDDEN bit_slot_t iso_scan_zone_free_slot_slow(iso_alloc_zone_t *zone)
max = (zone->max_bitmap_idx >> 1);

for(size_t i = 0; i < max; i++) {
__int128_t bts = ebm[i];
unsigned __int128 bts = (unsigned __int128) ebm[i];

for(int64_t j = 0; j < BITS_PER_ODWORD; j += BITS_PER_CHUNK) {
if((GET_BIT(bts, j)) == 0) {
return ((i << BITS_PER_ODWORD_SHIFT) + j);
}
/* Split 128-bit word into two 64-bit halves and use ctzll */
uint64_t free_lo = ~(uint64_t) bts & USED_BIT_VECTOR;
if(free_lo) {
return ((i << BITS_PER_ODWORD_SHIFT) + __builtin_ctzll(free_lo));
}

uint64_t free_hi = ~(uint64_t) (bts >> 64) & USED_BIT_VECTOR;
if(free_hi) {
return ((i << BITS_PER_ODWORD_SHIFT) + 64 + __builtin_ctzll(free_hi));
}
}
#endif
bm = (bitmap_index_t *) zone->bitmap_start;

for(bitmap_index_t i = max; i < zone->max_bitmap_idx; i++) {
bit_slot_t bts = bm[i];

if(bts != USED_BIT_VECTOR) {
continue;
}

for(int64_t j = 0; j < BITS_PER_QWORD; j += BITS_PER_CHUNK) {
if((GET_BIT(bts, j)) == 0) {
return ((i << BITS_PER_QWORD_SHIFT) + j);
}
uint64_t free_mask = ~(uint64_t) bm[i] & USED_BIT_VECTOR;
if(free_mask) {
return ((i << BITS_PER_QWORD_SHIFT) + __builtin_ctzll(free_mask));
}
}

Expand Down Expand Up @@ -1130,11 +1127,13 @@ INTERNAL_HIDDEN ASSUME_ALIGNED void *_iso_alloc(iso_alloc_zone_t *zone, size_t s
/* Hot Path: Check the zone cache for a zone this
* thread recently used for an alloc/free operation.
* It's likely we are allocating a similar size chunk
* and this will speed up that operation */
* and this will speed up that operation.
* Scan newest-to-oldest (LIFO) since the most recently
* used zone is most likely to have free slots available. */
size_t _zone_cache_count = zone_cache_count;
_tzc *tzc = zone_cache;

for(size_t i = 0; i < _zone_cache_count; i++) {
for(size_t i = _zone_cache_count; i-- > 0;) {
if(tzc[i].chunk_size >= size) {
iso_alloc_zone_t *_zone = tzc[i].zone;
if(is_zone_usable(_zone, size) != NULL) {
Expand Down Expand Up @@ -1415,15 +1414,15 @@ INTERNAL_HIDDEN void iso_free_chunk_from_zone(iso_alloc_zone_t *zone, void *rest
#else
const uint64_t chunk_offset = (uint64_t) (p - UNMASK_USER_PTR(zone));
#endif

const size_t chunk_number = (chunk_offset / zone->chunk_size);
const size_t chunk_size = zone->chunk_size;
const size_t chunk_number = (chunk_offset / chunk_size);
const bit_slot_t bit_slot = (chunk_number << BITS_PER_CHUNK_SHIFT);
const bit_slot_t dwords_to_bit_slot = (bit_slot >> BITS_PER_QWORD_SHIFT);

/* Ensure the pointer is a multiple of chunk size. */
if(UNLIKELY((chunk_offset % zone->chunk_size) != 0)) {
if(UNLIKELY((chunk_offset % chunk_size) != 0)) {
LOG_AND_ABORT("Chunk %d at 0x%p is not a multiple of zone[%d] chunk size %d. Off by %lu bits",
chunk_offset, p, zone->index, zone->chunk_size, (chunk_offset & (zone->chunk_size - 1)));
chunk_offset, p, zone->index, chunk_size, (chunk_offset & (chunk_size - 1)));
}

if(UNLIKELY(dwords_to_bit_slot > zone->max_bitmap_idx)) {
Expand Down Expand Up @@ -1455,10 +1454,10 @@ INTERNAL_HIDDEN void iso_free_chunk_from_zone(iso_alloc_zone_t *zone, void *rest
UNSET_BIT(b, which_bit);
insert_free_bit_slot(zone, bit_slot);
#if !ENABLE_ASAN && SANITIZE_CHUNKS
__iso_memset(p, POISON_BYTE, zone->chunk_size);
__iso_memset(p, POISON_BYTE, chunk_size);
#endif
} else {
__iso_memset(p, POISON_BYTE, zone->chunk_size);
__iso_memset(p, POISON_BYTE, chunk_size);
}

bm[dwords_to_bit_slot] = b;
Expand All @@ -1475,14 +1474,14 @@ INTERNAL_HIDDEN void iso_free_chunk_from_zone(iso_alloc_zone_t *zone, void *rest
if((chunk_number + 1) != zone->chunk_count) {
const bit_slot_t bit_slot_over = ((chunk_number + 1) << BITS_PER_CHUNK_SHIFT);
if((GET_BIT(bm[(bit_slot_over >> BITS_PER_QWORD_SHIFT)], (WHICH_BIT(bit_slot_over) + 1))) == 1) {
check_canary(zone, p + zone->chunk_size);
check_canary(zone, p + chunk_size);
}
}

if(chunk_number != 0) {
const bit_slot_t bit_slot_under = ((chunk_number - 1) << BITS_PER_CHUNK_SHIFT);
if((GET_BIT(bm[(bit_slot_under >> BITS_PER_QWORD_SHIFT)], (WHICH_BIT(bit_slot_under) + 1))) == 1) {
check_canary(zone, p - zone->chunk_size);
check_canary(zone, p - chunk_size);
}
}
#endif
Expand Down