Skip to content

Commit 6577d87

Browse files
authored
gh-144438: Fix false sharing between QSBR and tlbc_index (gh-144554)
Align the QSBR thread state array to a 64-byte cache line boundary and add padding at the end of _PyThreadStateImpl. Depending on heap layout, the QSBR array could end up sharing a cache line with a thread's tlbc_index, causing QSBR quiescent state updates to contend with reads of tlbc_index in RESUME_CHECK. This is sensitive to earlier allocations during interpreter init and can appear or disappear with seemingly unrelated changes. Either change alone is sufficient to fix the specific issue, but both are worthwhile to avoid similar problems in the future.
1 parent 1bd8cf9 commit 6577d87

File tree

4 files changed

+24
-7
lines changed

4 files changed

+24
-7
lines changed

Include/internal/pycore_qsbr.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,9 @@ struct _qsbr_shared {
8383
// Minimum observed read sequence of all QSBR thread states
8484
uint64_t rd_seq;
8585

86-
// Array of QSBR thread states.
86+
// Array of QSBR thread states (aligned to 64 bytes).
8787
struct _qsbr_pad *array;
88+
void *array_raw; // raw allocation pointer (for free)
8889
Py_ssize_t size;
8990

9091
// Freelist of unused _qsbr_thread_states (protected by mutex)

Include/internal/pycore_tstate.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,12 @@ typedef struct _PyThreadStateImpl {
102102
#if _Py_TIER2
103103
struct _PyJitTracerState *jit_tracer_state;
104104
#endif
105+
106+
#ifdef Py_GIL_DISABLED
107+
// gh-144438: Add padding to ensure that the fields above don't share a
108+
// cache line with other allocations.
109+
char __padding[64];
110+
#endif
105111
} _PyThreadStateImpl;
106112

107113
#ifdef __cplusplus
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Align the QSBR thread state array to a 64-byte cache line boundary to
2+
avoid false sharing in the :term:`free-threaded build`.

Python/qsbr.c

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -85,22 +85,29 @@ grow_thread_array(struct _qsbr_shared *shared)
8585
new_size = MIN_ARRAY_SIZE;
8686
}
8787

88-
struct _qsbr_pad *array = PyMem_RawCalloc(new_size, sizeof(*array));
89-
if (array == NULL) {
88+
// Overallocate by 63 bytes so we can align to a 64-byte boundary.
89+
// This avoids potential false sharing between the first entry and other
90+
// allocations.
91+
size_t alignment = 64;
92+
size_t alloc_size = (size_t)new_size * sizeof(struct _qsbr_pad) + alignment - 1;
93+
void *raw = PyMem_RawCalloc(1, alloc_size);
94+
if (raw == NULL) {
9095
return -1;
9196
}
97+
struct _qsbr_pad *array = _Py_ALIGN_UP(raw, alignment);
9298

93-
struct _qsbr_pad *old = shared->array;
94-
if (old != NULL) {
99+
void *old_raw = shared->array_raw;
100+
if (shared->array != NULL) {
95101
memcpy(array, shared->array, shared->size * sizeof(*array));
96102
}
97103

98104
shared->array = array;
105+
shared->array_raw = raw;
99106
shared->size = new_size;
100107
shared->freelist = NULL;
101108
initialize_new_array(shared);
102109

103-
PyMem_RawFree(old);
110+
PyMem_RawFree(old_raw);
104111
return 0;
105112
}
106113

@@ -257,8 +264,9 @@ void
257264
_Py_qsbr_fini(PyInterpreterState *interp)
258265
{
259266
struct _qsbr_shared *shared = &interp->qsbr;
260-
PyMem_RawFree(shared->array);
267+
PyMem_RawFree(shared->array_raw);
261268
shared->array = NULL;
269+
shared->array_raw = NULL;
262270
shared->size = 0;
263271
shared->freelist = NULL;
264272
}

0 commit comments

Comments
 (0)