Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion conanfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

class HomestoreConan(ConanFile):
name = "homestore"
version = "7.5.8"
version = "7.5.9"

homepage = "https://github.com/eBay/Homestore"
description = "HomeStore Storage Engine"
Expand Down
25 changes: 19 additions & 6 deletions src/lib/blkalloc/append_blk_allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,17 +155,30 @@ bool AppendBlkAllocator::is_blk_alloced(const BlkId& in_bid, bool) const {
}

void AppendBlkAllocator::reset() {
// Reset in-place: restore all in-memory counters to the initial empty state and mark dirty
// so the next cp_flush persists commit_offset=0 to the existing superblock.
//
// We intentionally do NOT destroy the superblock or deregister the handler here, unlike
// BitmapBlkAllocator::reset(). AppendBlkAllocator is managed by the upper layer (HomeObject
// GC), which reuses the same chunk across multiple GC cycles. Keeping the same allocator
// instance alive avoids the race described in https://github.com/eBay/HomeObject/issues/401:
//
// T1: cp_flush holds a shared_ptr to the old allocator
// T2: reset() destroys m_sb [old behavior]
// T3: gc_repl_reqs calls free() on old allocator -> m_is_dirty=true
// T4: cp_flush writes to the destroyed m_sb -> crash
//
// With in-place reset the superblock is always valid, so T4 is safe regardless of T3.
std::lock_guard lg(m_sb_mtx);
m_is_dirty.store(false);
m_sb.destroy();
meta_service().deregister_handler(get_name());
m_last_append_offset.store(0);
m_freeable_nblks.store(0);
m_commit_offset.store(0);
m_is_dirty.store(true);
}

bool AppendBlkAllocator::is_blk_alloced_on_disk(BlkId const& bid, bool use_lock) const {
std::lock_guard lg(m_sb_mtx);
if (!m_sb.is_empty()) { return bid.blk_num() < m_sb->commit_offset; }
// if allocator is reset, the sb will be destroyed, and all the blks will be treated as free;
return false;
return bid.blk_num() < m_sb->commit_offset;
}

std::string AppendBlkAllocator::get_name() const { return "AppendBlkAlloc_chunk_" + std::to_string(m_chunk_id); }
Expand Down
12 changes: 10 additions & 2 deletions src/lib/device/virtual_dev.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,16 @@ void VirtualDev::add_chunk(cshared< Chunk >& chunk, bool is_fresh_chunk) {
}

void VirtualDev::reset_chunk_blk_allocator(Chunk* chunk) {
if (m_allocator_type == blk_allocator_type_t::append) {
// AppendBlkAllocator resets in-place via its reset() method and no new allocator instance is created.
// Creating a new instance would require deregistering the old handler and registering a
// new one, which introduces a window where a concurrent cp_flush holding the old
// allocator's shared_ptr could write to an already-destroyed superblock.
// See https://github.com/eBay/HomeObject/issues/401.

LOGINFO("AppendBlkAllocator resets in-place, skip creating new instance for chunk {}", chunk->chunk_id());
return;
}
auto ba = create_blk_allocator(m_allocator_type, block_size(), chunk->physical_dev()->optimal_page_size(),
chunk->physical_dev()->align_size(), chunk->size(), m_auto_recovery,
chunk->chunk_id(), true /* is_init */, m_use_slab_in_blk_allocator);
Expand Down Expand Up @@ -743,8 +753,6 @@ void VirtualDev::cp_flush(VDevCPContext* v_cp_ctx) {
// pass down cp so that underlying components can get their customized CP context if needed;
m_chunk_selector->foreach_chunks([this, cp](cshared< Chunk >& chunk) {
HS_LOG(TRACE, device, "Flushing chunk: {}, vdev: {}", chunk->chunk_id(), m_vdev_info.name);
// Hold shared_ptr to prevent allocator from being reset during cp_flush.
// This fixes race with GC's purge_reserved_chunk() which calls reset_block_allocator().
auto alloc_ptr = chunk->blk_allocator_shared();

#ifdef _PRERELEASE
Expand Down
60 changes: 53 additions & 7 deletions src/tests/test_data_service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1123,8 +1123,8 @@ TEST_F(BlkDataServiceAppendTest, TestResetCompletedBeforeCpFlush) {
LOGINFO("Scenario 1: PASSED - Order: get_ptr -> reset_block_allocator -> cp_flush");
}

// Scenario 2: Order: cp_flush holds shared_ptr -> old allocator reset -> cp_flush on old -> create new allocator
// Test that cp_flush executes after old allocator reset, then new allocator is created
// Scenario 2: Order: cp_flush holds shared_ptr -> allocator reset (in-place) -> cp_flush completes on same allocator
// Test that cp_flush completes safely after in-place reset of the same allocator
TEST_F(BlkDataServiceAppendTest, TestResetDuringCpFlushHoldsSharedPtr) {
vdev_info vinfo;
auto data_vdev = inst().open_vdev(vinfo, true);
Expand Down Expand Up @@ -1185,12 +1185,12 @@ TEST_F(BlkDataServiceAppendTest, TestResetDuringCpFlushHoldsSharedPtr) {
LOGINFO("Scenario 2: Old allocator reset() completed");
old_allocator_reset.store(true);

// Wait for cp_flush to complete on old allocator before creating new one
// Wait for cp_flush to complete before reset thread continues
for (int i = 0; i < 200 && !cp_flush_completed.load(); ++i) {
std::this_thread::sleep_for(std::chrono::milliseconds(10));
}
RELEASE_ASSERT(cp_flush_completed.load(), "cp_flush should complete before creating new allocator");
LOGINFO("Scenario 2: cp_flush completed, proceeding to create new allocator");
RELEASE_ASSERT(cp_flush_completed.load(), "cp_flush should complete before reset thread continues");
LOGINFO("Scenario 2: cp_flush completed, reset thread proceeding");
}));

std::thread flush_thread([&]() {
Expand All @@ -1205,7 +1205,7 @@ TEST_F(BlkDataServiceAppendTest, TestResetDuringCpFlushHoldsSharedPtr) {
std::thread reset_thread([&]() {
chunk->reset_block_allocator();
reset_completed.store(true);
LOGINFO("Scenario 2: reset_block_allocator completed (new allocator created)");
LOGINFO("Scenario 2: reset_block_allocator completed (in-place reset)");
});

flush_thread.join();
Expand All @@ -1219,7 +1219,7 @@ TEST_F(BlkDataServiceAppendTest, TestResetDuringCpFlushHoldsSharedPtr) {
ASSERT_TRUE(cp_flush_completed.load());
ASSERT_TRUE(reset_completed.load());
ASSERT_TRUE(old_allocator_reset.load());
LOGINFO("Scenario 2: PASSED - Order: get_ptr -> old_allocator_reset -> cp_flush -> create new allocator");
LOGINFO("Scenario 2: PASSED - Order: get_ptr -> allocator_reset (in-place) -> cp_flush on same allocator");
}

// Scenario 3: Order: cp_flush acquires mutex first -> reset_block_allocator blocks on mutex
Expand Down Expand Up @@ -1299,6 +1299,52 @@ TEST_F(BlkDataServiceAppendTest, TestCpFlushBlocksResetWithMutex) {
LOGINFO("Scenario 3: PASSED - mutex protected m_sb from concurrent reset_block_allocator during cp_flush");
}

// Scenario 4: Order: cp_flush holds shared_ptr -> reset() completes -> free() re-sets dirty on old allocator
// -> cp_flush runs on old allocator
// Test the race from https://github.com/eBay/HomeObject/issues/401:
// with the old AppendBlkAllocator::reset() (destroy + recreate):
// - reset() destroys m_sb of the old allocator
// - free() sets m_is_dirty=true on the old allocator (whose m_sb is already destroyed)
// - cp_flush writes to the destroyed m_sb -> crash / undefined behavior
TEST_F(BlkDataServiceAppendTest, TestFreeAfterResetBeforeCpFlushSafe) {
vdev_info vinfo;
auto data_vdev = inst().open_vdev(vinfo, true);
auto chunks = data_vdev->get_chunks();

auto it = std::find_if(chunks.begin(), chunks.end(), [](const auto& pair) {
return pair.second && pair.second->blk_allocator() &&
pair.second->blk_allocator()->get_name().find("append_chunk_") == 0;
});
ASSERT_NE(it, chunks.end()) << "No AppendBlkAllocator chunks found";
auto chunk = it->second;
auto chunk_id = chunk->chunk_id();

LOGINFO("Scenario 4: Testing chunk_id={}", chunk_id);

// Simulate cp_flush holding a reference to the old allocator before reset begins.
LOGINFO("Scenario 4: Simulate cp_flush acquiring shared_ptr to old allocator before reset");
auto old_alloc = chunk->blk_allocator_shared();

// Simulate GC's purge_reserved_chunk calling reset_block_allocator().
LOGINFO("Scenario 4: Simulate GC calling reset_block_allocator on the chunk");
chunk->reset_block_allocator();
auto alloc_after_reset = chunk->blk_allocator_shared();
ASSERT_FALSE(old_alloc.owner_before(alloc_after_reset) || alloc_after_reset.owner_before(old_alloc));

// Simulate gc_repl_reqs calling free() on the old allocator reference after reset
LOGINFO("Scenario 4: Simulate gc_repl_reqs calling free() on old allocator reference after reset");
BlkId fake_bid{0, 1, chunk_id};
old_alloc->free(fake_bid);

// Simulate cp_flush running on the old allocator reference.
// With fix: safe - m_sb is still valid, writes commit_offset=0 correctly.
// Without fix: UB - m_sb was destroyed in reset(), writing to it crashes.
LOGINFO("Scenario 4: Simulate cp_flush running on old allocator reference after reset and free");
old_alloc->cp_flush(nullptr);

LOGINFO("Scenario 4: PASSED - free() + cp_flush on old allocator after reset is safe");
}

#endif // _PRERELEASE

// Stream related test
Expand Down
Loading