Skip to content

Commit 52192b5

Browse files
committed
Abstract clock-sweep buffer replacement algorithm
Re-author the clock-sweep algorithm such that it maintains its own state and has a well defined API.
1 parent dddcd59 commit 52192b5

3 files changed

Lines changed: 57 additions & 52 deletions

File tree

src/backend/storage/buffer/README

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -166,14 +166,13 @@ small limit value) whenever the buffer is pinned. (This requires only the
166166
buffer header spinlock, which would have to be taken anyway to increment the
167167
buffer reference count, so it's nearly free.)
168168

169-
The "clock hand" is a buffer index, nextVictimBuffer, that moves circularly
170-
through all the available buffers. nextVictimBuffer and completePasses are
171-
atomic values.
169+
The "clock hand" is a buffer index that moves circularly through all the
170+
available buffers.
172171

173172
The algorithm for a process that needs to obtain a victim buffer is:
174173

175-
1. Select the buffer pointed to by nextVictimBuffer, and circularly advance
176-
nextVictimBuffer for next time.
174+
1. Select the buffer pointed to by the clock hand, and circularly advance it
175+
for next time.
177176

178177
2. If the selected buffer is pinned or has a nonzero usage count, it cannot be
179178
used. Decrement its usage count (if nonzero), return to step 3 to examine the
@@ -235,13 +234,12 @@ Background Writer's Processing
235234
------------------------------
236235

237236
The background writer is designed to write out pages that are likely to be
238-
recycled soon, thereby offloading the writing work from active backends.
239-
To do this, it scans forward circularly from the current position of
240-
nextVictimBuffer (which it does not change!), looking for buffers that are
241-
dirty and not pinned nor marked with a positive usage count. It pins,
242-
writes, and releases any such buffer.
237+
recycled soon, thereby offloading the writing work from active backends. To do
238+
this, it scans forward circularly from the current position of clock (which it
239+
does not change!), looking for buffers that are dirty and not pinned nor marked
240+
with a positive usage count. It pins, writes, and releases any such buffer.
243241

244-
We enforce reading nextVictimBuffer within an atomic action so it needs only to
242+
We enforce reading the clock hand within an atomic action so it needs only to
245243
spinlock each buffer header for long enough to check the dirtybit. Even
246244
without that assumption, the writer only needs to take the lock long enough to
247245
read the variable value, not while scanning the buffers. (This is a very

src/backend/storage/buffer/freelist.c

Lines changed: 47 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -23,19 +23,22 @@
2323

2424
#define INT_ACCESS_ONCE(var) ((int)(*((volatile int *)&(var))))
2525

26+
typedef struct ClockSweep
27+
{
28+
pg_atomic_uint64 counter; /* Only incremented by one */
29+
uint32_t size; /* Size of the clock */
30+
} ClockSweep;
2631

2732
/*
2833
* The shared freelist control information.
2934
*/
30-
typedef struct {
35+
typedef struct
36+
{
3137
/*
32-
* The clock-sweep hand is atomically updated by 1 at every tick. Use the
33-
* macro CLOCK_HAND_POSITION() o find the next victim's index in the
34-
* BufferDescriptor array. To calculate the number of times the clock-sweep
35-
* hand has made a complete pass through all available buffers in the pool
36-
* divide NBuffers.
38+
* The next buffer available for use is determined by the clock-sweep
39+
* algorithm.
3740
*/
38-
pg_atomic_uint64 nextVictimBuffer;
41+
ClockSweep clock;
3942

4043
/*
4144
* Statistics. These counters should be wide enough that they can't
@@ -86,32 +89,40 @@ static BufferDesc *GetBufferFromRing(BufferAccessStrategy strategy,
8689
static void AddBufferToRing(BufferAccessStrategy strategy,
8790
BufferDesc *buf);
8891

89-
#define CLOCK_HAND_POSITION(counter) \
90-
((counter) & 0xFFFFFFFF) % NBuffers
92+
static void
93+
ClockSweepInit(ClockSweep *sweep, uint32 size)
94+
{
95+
pg_atomic_init_u64(&sweep->counter, 0);
96+
sweep->size = size;
97+
}
9198

92-
/*
93-
* ClockSweepTick - Helper routine for StrategyGetBuffer()
94-
*
95-
* Move the clock hand one buffer ahead of its current position and return the
96-
* id of the buffer now under the hand.
97-
*/
99+
/* Extract the number of complete cycles from the clock hand */
98100
static inline uint32
99-
ClockSweepTick(void)
101+
ClockSweepCycles(ClockSweep *sweep)
100102
{
101-
uint64 hand = UINT64_MAX;
102-
uint32 victim;
103+
uint64 current = pg_atomic_read_u64(&sweep->counter);
103104

104-
/*
105-
* Atomically move hand ahead one buffer - if there's several processes
106-
* doing this, this can lead to buffers being returned slightly out of
107-
* apparent order.
108-
*/
109-
hand = pg_atomic_fetch_add_u64(&StrategyControl->nextVictimBuffer, 1);
105+
return current / sweep->size;
106+
}
107+
108+
/* Return the current position of the clock's hand modulo size */
109+
static inline uint32
110+
ClockSweepPosition(ClockSweep *sweep)
111+
{
112+
uint64 counter = pg_atomic_read_u64(&sweep->counter);
113+
114+
return ((counter) & 0xFFFFFFFF) % sweep->size;
115+
}
110116

111-
victim = CLOCK_HAND_POSITION(hand);
112-
Assert(victim < NBuffers);
117+
/*
118+
* Move the clock hand ahead one and return its new position.
119+
*/
120+
static inline uint32
121+
ClockSweepTick(ClockSweep *sweep)
122+
{
123+
uint64 counter = pg_atomic_fetch_add_u64(&sweep->counter, 1);
113124

114-
return victim;
125+
return ((counter) & 0xFFFFFFFF) % sweep->size;
115126
}
116127

117128
/*
@@ -181,11 +192,11 @@ StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_r
181192
*/
182193
pg_atomic_fetch_add_u32(&StrategyControl->numBufferAllocs, 1);
183194

184-
/* Use the "clock-sweep" algorithm to find a free buffer */
195+
/* Use the clock-sweep algorithm to find a free buffer */
185196
trycounter = NBuffers;
186197
for (;;)
187198
{
188-
buf = GetBufferDescriptor(ClockSweepTick());
199+
buf = GetBufferDescriptor(ClockSweepTick(&StrategyControl->clock));
189200

190201
/*
191202
* If the buffer is pinned or has a nonzero usage_count, we cannot use
@@ -236,19 +247,14 @@ StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_r
236247
* buffer allocs if non-NULL pointers are passed. The alloc count is reset
237248
* after being read.
238249
*/
239-
uint32 StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc) {
240-
uint64 counter = UINT64_MAX; uint32 result;
241-
242-
counter = pg_atomic_read_u64(&StrategyControl->nextVictimBuffer);
243-
result = CLOCK_HAND_POSITION(counter);
250+
uint32
251+
StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
252+
{
253+
uint32 result = ClockSweepPosition(&StrategyControl->clock);
244254

245255
if (complete_passes)
246256
{
247-
/*
248-
* The number of complete passes is the counter divided by NBuffers
249-
* because the clock hand is a 64-bit counter that only increases.
250-
*/
251-
*complete_passes = (uint32) (counter / NBuffers);
257+
*complete_passes = ClockSweepCycles(&StrategyControl->clock);
252258
}
253259

254260
if (num_buf_alloc)
@@ -335,8 +341,8 @@ StrategyInitialize(bool init)
335341
*/
336342
Assert(init);
337343

338-
/* Initialize combined clock-sweep pointer/complete passes counter */
339-
pg_atomic_init_u64(&StrategyControl->nextVictimBuffer, 0);
344+
/* Initialize the clock-sweep algorithm */
345+
ClockSweepInit(&StrategyControl->clock, NBuffers);
340346

341347
/* Clear statistics */
342348
pg_atomic_init_u32(&StrategyControl->numBufferAllocs, 0);

src/tools/pgindent/typedefs.list

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,7 @@ ClientCertName
426426
ClientConnectionInfo
427427
ClientData
428428
ClientSocket
429+
ClockSweep
429430
ClonePtrType
430431
ClosePortalStmt
431432
ClosePtrType

0 commit comments

Comments
 (0)