From f922a27fec0e6bd447bccfa32f00e3df59b88835 Mon Sep 17 00:00:00 2001
From: Jie Yao <jyao3@ebaychina.com>
Date: Mon, 9 Mar 2026 14:56:29 +0800
Subject: [PATCH 1/4] [scrubber] phase1: add scrub manager

Add comprehensive scrub infrastructure to detect data corruption and
inconsistencies across replicas in HomeObject. This is phase 1 of the
scrubber implementation.

- Implements deep and shallow scrubbing for PG metadata, shards, and blobs
- Supports periodic and manual scrub triggering modes
- Uses priority queue (MPMCPriorityQueue) for scrub task scheduling
- Persists scrub metadata using superblocks to track last scrub times
- Coordinates scrub operations across all replicas in a PG

1. **Deep Scrub**: Full data integrity verification
   - PG metadata validation
   - Shard existence and consistency checks
   - Blob hash verification (reads data and computes checksums)
   - Detects corrupted, missing, and inconsistent data across replicas

2. **Shallow Scrub**: Lightweight metadata-only verification
   - Shard existence checks
   - Blob index validation (no data reads)
   - Faster execution for routine checks

- FlatBuffer-based serialization for scrub requests and responses
- Leader sends scrub requests to all replicas
- Followers return scrub maps with their local state
- Retry logic with configurable timeouts for reliability

- **ShallowScrubReport**: Tracks missing shards and blobs per peer
- **DeepScrubReport**: Extends shallow report with:
  - Corrupted blobs/shards with error details
  - Inconsistent blobs (different hashes across replicas)
  - Corrupted PG metadata

- Scrubs data in configurable ranges to avoid timeouts
- Shard range: 2M shards per request
- Blob range: Based on HDD IOPS for deep scrub, 2M for shallow
- Early cancellation support for graceful shutdown

1. **DeepScrubTest**: Verifies detection of:
   - Missing blobs on followers
   - Missing shards on followers
   - Corrupted blob data (IO errors)
   - Inconsistent blob hashes across replicas

2. **MPMCPriorityQueue Tests**: Lock-free queue validation
   - Concurrent push/pop operations
   - Priority ordering verification
   - Thread safety under contention
---
 CHANGELOG.md                                  |   13 -
 conanfile.py                                  |    2 +-
 src/include/homeobject/common.hpp             |    2 +-
 src/lib/homeobject_impl.hpp                   |    2 +
 src/lib/homestore_backend/CMakeLists.txt      |   27 +-
 .../homestore_backend/MPMCPriorityQueue.hpp   |  188 ++
 src/lib/homestore_backend/gc_manager.cpp      |   16 +-
 src/lib/homestore_backend/gc_manager.hpp      |   12 +-
 src/lib/homestore_backend/hs_homeobject.cpp   |   30 +-
 src/lib/homestore_backend/hs_homeobject.hpp   |   43 +-
 .../hs_homeobject_fbs/blob_scrub_req.fbs      |   13 +
 .../hs_homeobject_fbs/deep_blob_scrub_map.fbs |   31 +
 .../deep_shard_scrub_map.fbs                  |   16 +
 .../hs_backend_config.fbs                     |    4 +
 .../hs_homeobject_fbs/meta_scrub_req.fbs      |   13 +
 .../hs_homeobject_fbs/pg_meta_scrub_map.fbs   |   11 +
 .../hs_homeobject_fbs/pg_meta_scrub_req.fbs   |   10 +
 .../resync_blob_data.fbs                      |    0
 .../resync_pg_data.fbs                        |    0
 .../resync_shard_data.fbs                     |    0
 .../hs_homeobject_fbs/scrub_common.fbs        |   23 +
 .../shallow_blob_scrub_map.fbs                |   13 +
 .../shallow_shard_scrub_map.fbs               |   13 +
 .../hs_homeobject_fbs/shard_scrub_req.fbs     |   13 +
 src/lib/homestore_backend/hs_pg_manager.cpp   |  283 ++-
 .../homestore_backend/hs_shard_manager.cpp    |   26 +-
 .../replication_state_machine.cpp             |   37 +-
 .../replication_state_machine.hpp             |    8 +
 src/lib/homestore_backend/scrub_manager.cpp   | 1878 +++++++++++++++++
 src/lib/homestore_backend/scrub_manager.hpp   |  471 +++++
 .../homestore_backend/tests/CMakeLists.txt    |    9 +
 .../tests/hs_scrubber_tests.cpp               |  569 +++++
 .../tests/test_mpmc_priority_queue.cpp        |  417 ++++
 33 files changed, 4111 insertions(+), 82 deletions(-)
 delete mode 100644 CHANGELOG.md
 create mode 100644 src/lib/homestore_backend/MPMCPriorityQueue.hpp
 create mode 100644 src/lib/homestore_backend/hs_homeobject_fbs/blob_scrub_req.fbs
 create mode 100644 src/lib/homestore_backend/hs_homeobject_fbs/deep_blob_scrub_map.fbs
 create mode 100644 src/lib/homestore_backend/hs_homeobject_fbs/deep_shard_scrub_map.fbs
 rename src/lib/homestore_backend/{ => hs_homeobject_fbs}/hs_backend_config.fbs (93%)
 create mode 100644 src/lib/homestore_backend/hs_homeobject_fbs/meta_scrub_req.fbs
 create mode 100644 src/lib/homestore_backend/hs_homeobject_fbs/pg_meta_scrub_map.fbs
 create mode 100644 src/lib/homestore_backend/hs_homeobject_fbs/pg_meta_scrub_req.fbs
 rename src/lib/homestore_backend/{ => hs_homeobject_fbs}/resync_blob_data.fbs (100%)
 rename src/lib/homestore_backend/{ => hs_homeobject_fbs}/resync_pg_data.fbs (100%)
 rename src/lib/homestore_backend/{ => hs_homeobject_fbs}/resync_shard_data.fbs (100%)
 create mode 100644 src/lib/homestore_backend/hs_homeobject_fbs/scrub_common.fbs
 create mode 100644 src/lib/homestore_backend/hs_homeobject_fbs/shallow_blob_scrub_map.fbs
 create mode 100644 src/lib/homestore_backend/hs_homeobject_fbs/shallow_shard_scrub_map.fbs
 create mode 100644 src/lib/homestore_backend/hs_homeobject_fbs/shard_scrub_req.fbs
 create mode 100644 src/lib/homestore_backend/scrub_manager.cpp
 create mode 100644 src/lib/homestore_backend/scrub_manager.hpp
 create mode 100644 src/lib/homestore_backend/tests/hs_scrubber_tests.cpp
 create mode 100644 src/lib/homestore_backend/tests/test_mpmc_priority_queue.cpp

diff --git a/CHANGELOG.md b/CHANGELOG.md
deleted file mode 100644
index 51f00cd2e..000000000
--- a/CHANGELOG.md
+++ /dev/null
@@ -1,13 +0,0 @@
-# Changelog
-All notable changes to this project will be documented in this file.
-
-The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
-and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-
-## [Unreleased]
-
-### Added
-
-- Created repository
-
-[Unreleased]: https://github.com/eBay/HomeObject/compare/...HEAD
diff --git a/conanfile.py b/conanfile.py
index 565d84535..73fd2f159 100644
--- a/conanfile.py
+++ b/conanfile.py
@@ -10,7 +10,7 @@
 
 class HomeObjectConan(ConanFile):
     name = "homeobject"
-    version = "4.1.5"
+    version = "4.2.0"
 
     homepage = "https://github.com/eBay/HomeObject"
     description = "Blob Store built on HomeStore"
diff --git a/src/include/homeobject/common.hpp b/src/include/homeobject/common.hpp
index 29a0589a9..63eff1305 100644
--- a/src/include/homeobject/common.hpp
+++ b/src/include/homeobject/common.hpp
@@ -14,7 +14,7 @@
 
 SISL_LOGGING_DECL(homeobject);
 
-#define HOMEOBJECT_LOG_MODS homeobject, blobmgr, shardmgr, gcmgr
+#define HOMEOBJECT_LOG_MODS homeobject, blobmgr, shardmgr, gcmgr, scrubmgr
 
 #ifndef Ki
 constexpr uint64_t Ki = 1024ul;
diff --git a/src/lib/homeobject_impl.hpp b/src/lib/homeobject_impl.hpp
index 4eb2af48f..4a0841fdd 100644
--- a/src/lib/homeobject_impl.hpp
+++ b/src/lib/homeobject_impl.hpp
@@ -70,6 +70,8 @@ struct PG {
     std::atomic< bool > is_dirty_{false};
     ShardPtrList shards_;
 
+    blob_id_t get_last_blob_id() const { return durable_entities_.blob_sequence_num; }
+
     void durable_entities_update(auto&& cb, bool dirty = true) {
         cb(durable_entities_);
         if (dirty) { is_dirty_.store(true, std::memory_order_relaxed); }
diff --git a/src/lib/homestore_backend/CMakeLists.txt b/src/lib/homestore_backend/CMakeLists.txt
index 441492a6b..f71daae72 100644
--- a/src/lib/homestore_backend/CMakeLists.txt
+++ b/src/lib/homestore_backend/CMakeLists.txt
@@ -30,6 +30,8 @@ target_sources("${PROJECT_NAME}_homestore" PRIVATE
     hs_cp_callbacks.cpp
     hs_http_manager.cpp
     gc_manager.cpp
+    scrub_manager.cpp
+    MPMCPriorityQueue.hpp
     $<TARGET_OBJECTS:${PROJECT_NAME}_core>
 )
 target_link_libraries("${PROJECT_NAME}_homestore" PUBLIC
@@ -42,10 +44,19 @@ settings_gen_cpp(
     ${FLATBUFFERS_FLATC_EXECUTABLE}
     ${CMAKE_CURRENT_BINARY_DIR}/generated/
     "${PROJECT_NAME}_homestore"
-    hs_backend_config.fbs
-    resync_pg_data.fbs
-    resync_shard_data.fbs
-    resync_blob_data.fbs
+    hs_homeobject_fbs/hs_backend_config.fbs
+    hs_homeobject_fbs/resync_pg_data.fbs
+    hs_homeobject_fbs/resync_shard_data.fbs
+    hs_homeobject_fbs/resync_blob_data.fbs
+    hs_homeobject_fbs/deep_blob_scrub_map.fbs
+    hs_homeobject_fbs/shallow_blob_scrub_map.fbs
+    hs_homeobject_fbs/blob_scrub_req.fbs
+    hs_homeobject_fbs/shard_scrub_req.fbs
+    hs_homeobject_fbs/deep_shard_scrub_map.fbs
+    hs_homeobject_fbs/shallow_shard_scrub_map.fbs
+    hs_homeobject_fbs/pg_meta_scrub_req.fbs
+    hs_homeobject_fbs/pg_meta_scrub_map.fbs
+    hs_homeobject_fbs/scrub_common.fbs
   )
 
 # Unit test objects
@@ -155,3 +166,11 @@ add_test(NAME HomestoreTestGC COMMAND homestore_test_gc -csv error --executor im
         --override_config hs_backend_config.gc_garbage_rate_threshold=0 
         --override_config hs_backend_config.gc_scan_interval_sec=5)
 
+add_executable(homestore_test_scrubber)
+target_sources(homestore_test_scrubber PRIVATE $<TARGET_OBJECTS:homestore_tests_scrubber>)
+target_link_libraries(homestore_test_scrubber PUBLIC homeobject_homestore ${COMMON_TEST_DEPS})
+add_test(NAME HomestoreTestScrubber COMMAND homestore_test_scrubber -csv error --executor immediate --config_path ./
+        --override_config hs_backend_config.enable_scrubber=true
+        --override_config nuraft_mesg_config.mesg_factory_config.data_request_deadline_secs:10)
+
+
diff --git a/src/lib/homestore_backend/MPMCPriorityQueue.hpp b/src/lib/homestore_backend/MPMCPriorityQueue.hpp
new file mode 100644
index 000000000..585e2925e
--- /dev/null
+++ b/src/lib/homestore_backend/MPMCPriorityQueue.hpp
@@ -0,0 +1,188 @@
+#pragma once
+
+#include <condition_variable>
+#include <concepts>
+#include <cstddef>
+#include <functional>
+#include <mutex>
+#include <optional>
+#include <queue>
+#include <utility>
+#include <vector>
+
+namespace homeobject {
+
+/**
+ * @brief Multi-Producer Multi-Consumer Priority Queue (C++20)
+ *
+ * Thread-safe priority queue that supports:
+ * - Concurrent push operations from multiple producers
+ * - Concurrent pop operations from multiple consumers
+ * - Blocking pop when queue is empty
+ * - Graceful shutdown via close() method
+ *
+ * @tparam T Element type (must be comparable)
+ * @tparam Compare Comparison function (default: std::less for max-heap)
+ */
+template < typename T, typename Compare = std::less< T > >
+    requires std::regular< T > && std::predicate< Compare, T, T >
+class MPMCPriorityQueue {
+public:
+    using value_type = T;
+    using size_type = std::size_t;
+    using comparator_type = Compare;
+
+    /**
+     * @brief Status codes returned by pop operations
+     */
+    enum class Status : uint8_t {
+        Ok,    ///< Successfully popped an element
+        Closed ///< Queue is closed, no more elements available
+    };
+
+    /**
+     * @brief Result of a pop operation
+     */
+    struct PopResult {
+        Status status;
+        std::optional< T > value; ///< Has value only if status == Ok
+
+        // Convenience methods
+        [[nodiscard]] constexpr bool is_ok() const noexcept { return status == Status::Ok; }
+        [[nodiscard]] constexpr bool is_closed() const noexcept { return status == Status::Closed; }
+    };
+
+    /**
+     * @brief Construct an empty priority queue
+     */
+    constexpr MPMCPriorityQueue() noexcept(std::is_nothrow_default_constructible_v< Compare >) = default;
+
+    /**
+     * @brief Destructor - automatically closes the queue
+     */
+    ~MPMCPriorityQueue() { close(); }
+
+    // Disable copy and move to prevent issues with condition variables
+    MPMCPriorityQueue(const MPMCPriorityQueue&) = delete;
+    MPMCPriorityQueue& operator=(const MPMCPriorityQueue&) = delete;
+    MPMCPriorityQueue(MPMCPriorityQueue&&) = delete;
+    MPMCPriorityQueue& operator=(MPMCPriorityQueue&&) = delete;
+
+    /**
+     * @brief Thread-safe push operation (copy)
+     *
+     * @param value Element to insert
+     * @note No-op if queue is closed
+     */
+    void push(const T& value) {
+        {
+            std::scoped_lock lock(mutex_);
+            if (closed_) [[unlikely]] {
+                return; // Silently ignore pushes to closed queue
+            }
+            pq_.push(value);
+        }
+        cv_.notify_one(); // Wake one waiting consumer
+    }
+
+    /**
+     * @brief Thread-safe push operation (move)
+     *
+     * @param value Element to insert (will be moved)
+     * @note No-op if queue is closed
+     */
+    void push(T&& value) {
+        {
+            std::scoped_lock lock(mutex_);
+            if (closed_) [[unlikely]] { return; }
+            pq_.push(std::move(value));
+        }
+        cv_.notify_one();
+    }
+
+    /**
+     * @brief Thread-safe pop operation
+     *
+     * Blocks if queue is empty and not closed.
+     * Returns immediately if queue is closed.
+     *
+     * @return PopResult containing status and optional value
+     * @note Thread-safe for multiple concurrent consumers
+     */
+    [[nodiscard]] PopResult pop() {
+        std::unique_lock lock(mutex_);
+
+        // Wait until queue has elements or is closed
+        cv_.wait(lock, [this] { return closed_ || !pq_.empty(); });
+
+        // Try to pop an element
+        if (!pq_.empty()) {
+            T top = std::move(const_cast< T& >(pq_.top()));
+            pq_.pop();
+            return PopResult{.status = Status::Ok, .value = std::move(top)};
+        }
+
+        // Queue is empty and closed
+        return PopResult{.status = Status::Closed, .value = std::nullopt};
+    }
+
+    /**
+     * @brief Close the queue
+     *
+     * After calling close():
+     * - All blocked pop() calls will wake up
+     * - Existing elements can still be popped
+     * - New push() calls will be ignored
+     * - pop() returns Status::Closed when queue becomes empty
+     *
+     * @note Thread-safe and idempotent
+     */
+    void close() noexcept {
+        {
+            std::scoped_lock lock(mutex_);
+            closed_ = true;
+        }
+        cv_.notify_all(); // Wake all waiting consumers
+    }
+
+    /**
+     * @brief Get current number of elements
+     *
+     * @return Number of elements in the queue
+     * @note Thread-safe
+     */
+    [[nodiscard]] size_type size() const {
+        std::scoped_lock lock(mutex_);
+        return pq_.size();
+    }
+
+    /**
+     * @brief Check if queue is empty
+     *
+     * @return true if queue has no elements
+     * @note Thread-safe
+     */
+    [[nodiscard]] bool empty() const {
+        std::scoped_lock lock(mutex_);
+        return pq_.empty();
+    }
+
+    /**
+     * @brief Check if queue is closed
+     *
+     * @return true if close() has been called
+     * @note Thread-safe
+     */
+    [[nodiscard]] bool is_closed() const {
+        std::scoped_lock lock(mutex_);
+        return closed_;
+    }
+
+private:
+    mutable std::mutex mutex_;
+    std::condition_variable cv_;
+    bool closed_{false};
+    std::priority_queue< T, std::vector< T >, Compare > pq_;
+};
+
+} // namespace homeobject
diff --git a/src/lib/homestore_backend/gc_manager.cpp b/src/lib/homestore_backend/gc_manager.cpp
index 8076d92f3..83fcaf1df 100644
--- a/src/lib/homestore_backend/gc_manager.cpp
+++ b/src/lib/homestore_backend/gc_manager.cpp
@@ -25,14 +25,14 @@ SISL_LOGGING_DECL(gcmgr)
 GCManager::GCManager(HSHomeObject* homeobject) :
         m_chunk_selector{homeobject->chunk_selector()}, m_hs_home_object{homeobject} {
     homestore::meta_service().register_handler(
-        _gc_actor_meta_name,
+        gc_actor_meta_name,
         [this](homestore::meta_blk* mblk, sisl::byte_view buf, size_t size) {
             on_gc_actor_meta_blk_found(std::move(buf), voidptr_cast(mblk));
         },
         nullptr, true);
 
     homestore::meta_service().register_handler(
-        _gc_reserved_chunk_meta_name,
+        gc_reserved_chunk_meta_name,
         [this](homestore::meta_blk* mblk, sisl::byte_view buf, size_t size) {
             on_reserved_chunk_meta_blk_found(std::move(buf), voidptr_cast(mblk));
         },
@@ -44,7 +44,7 @@ GCManager::GCManager(HSHomeObject* homeobject) :
         true);
 
     homestore::meta_service().register_handler(
-        _gc_task_meta_name,
+        gc_task_meta_name,
         [this](homestore::meta_blk* mblk, sisl::byte_view buf, size_t size) {
             on_gc_task_meta_blk_found(std::move(buf), voidptr_cast(mblk));
         },
@@ -64,7 +64,7 @@ void GCManager::on_gc_task_meta_blk_found(sisl::byte_view const& buf, void* meta
     // here, we are under the protection of the lock of metaservice. however, we will also try to update pg and shard
     // metablk and then destroy the gc_task_sb, which will also try to acquire the lock of metaservice, as a result, a
     // dead lock will happen. so here we will handle all the gc tasks after read all the metablks
-    m_recovered_gc_tasks.emplace_back(_gc_task_meta_name);
+    m_recovered_gc_tasks.emplace_back(gc_task_meta_name);
     m_recovered_gc_tasks.back().load(buf, meta_cookie);
 }
 
@@ -89,7 +89,7 @@ void GCManager::handle_all_recovered_gc_tasks() {
 }
 
 void GCManager::on_gc_actor_meta_blk_found(sisl::byte_view const& buf, void* meta_cookie) {
-    m_gc_actor_sbs.emplace_back(_gc_actor_meta_name);
+    m_gc_actor_sbs.emplace_back(gc_actor_meta_name);
     auto& gc_actor_sb = m_gc_actor_sbs.back();
     gc_actor_sb.load(buf, meta_cookie);
     auto pdev_id = gc_actor_sb->pdev_id;
@@ -100,7 +100,7 @@ void GCManager::on_gc_actor_meta_blk_found(sisl::byte_view const& buf, void* met
 }
 
 void GCManager::on_reserved_chunk_meta_blk_found(sisl::byte_view const& buf, void* meta_cookie) {
-    homestore::superblk< gc_reserved_chunk_superblk > reserved_chunk_sb(_gc_reserved_chunk_meta_name);
+    homestore::superblk< gc_reserved_chunk_superblk > reserved_chunk_sb(gc_reserved_chunk_meta_name);
     auto chunk_id = reserved_chunk_sb.load(buf, meta_cookie)->chunk_id;
     auto EXVchunk = m_chunk_selector->get_extend_vchunk(chunk_id);
     if (EXVchunk == nullptr) {
@@ -976,7 +976,7 @@ bool GCManager::pdev_gc_actor::copy_valid_data(
 
                             if (err) {
                                 // we will come here if:
-                                //  1 any blob copy fails, then err is operation_canceled
+                                //  1 any blob copy fails, then err is operation_cancelled
                                 //  2 write footer fails， then err is the error code of write footer
                                 GCLOGE(task_id, pg_id, shard_id,
                                        "Failed to copy some blos or failed to write shard footer for move_to_chunk={}, "
@@ -1271,7 +1271,7 @@ void GCManager::pdev_gc_actor::process_gc_task(chunk_id_t move_from_chunk, uint8
 
     // after data copy, we persist the gc task meta blk. now, we can make sure all the valid blobs are successfully
     // copyed and new blob indexes have be written to gc index table before gc task superblk is persisted.
-    homestore::superblk< GCManager::gc_task_superblk > gc_task_sb{GCManager::_gc_task_meta_name};
+    homestore::superblk< GCManager::gc_task_superblk > gc_task_sb{GCManager::gc_task_meta_name};
     gc_task_sb.create(sizeof(GCManager::gc_task_superblk));
     gc_task_sb->move_from_chunk = move_from_chunk;
     gc_task_sb->move_to_chunk = move_to_chunk;
diff --git a/src/lib/homestore_backend/gc_manager.hpp b/src/lib/homestore_backend/gc_manager.hpp
index 7fd2a46be..6a0415023 100644
--- a/src/lib/homestore_backend/gc_manager.hpp
+++ b/src/lib/homestore_backend/gc_manager.hpp
@@ -46,9 +46,9 @@ class GCManager {
     GCManager& operator=(GCManager&&) = delete;
 
 public:
-    inline static auto const _gc_actor_meta_name = std::string("GCActor");
-    inline static auto const _gc_task_meta_name = std::string("GCTask");
-    inline static auto const _gc_reserved_chunk_meta_name = std::string("GCReservedChunk");
+    inline static auto const gc_actor_meta_name = std::string("GCActor");
+    inline static auto const gc_task_meta_name = std::string("GCTask");
+    inline static auto const gc_reserved_chunk_meta_name = std::string("GCReservedChunk");
     inline static atomic_uint64_t _gc_task_id{1}; // 0 is used for crash recovery
 
 #pragma pack(1)
@@ -61,7 +61,7 @@ class GCManager {
         uint64_t failed_egc_task_count{0ull};
         uint64_t total_reclaimed_blk_count_by_gc{0ull};
         uint64_t total_reclaimed_blk_count_by_egc{0ull};
-        static std::string name() { return _gc_actor_meta_name; }
+        static std::string name() { return gc_actor_meta_name; }
     };
 
     struct gc_task_superblk {
@@ -70,12 +70,12 @@ class GCManager {
         chunk_id_t vchunk_id;
         pg_id_t pg_id;
         uint8_t priority;
-        static std::string name() { return _gc_task_meta_name; }
+        static std::string name() { return gc_task_meta_name; }
     };
 
     struct gc_reserved_chunk_superblk {
         chunk_id_t chunk_id;
-        static std::string name() { return _gc_reserved_chunk_meta_name; }
+        static std::string name() { return gc_reserved_chunk_meta_name; }
     };
 #pragma pack()
 
diff --git a/src/lib/homestore_backend/hs_homeobject.cpp b/src/lib/homestore_backend/hs_homeobject.cpp
index ef84a4c27..b030815cd 100644
--- a/src/lib/homestore_backend/hs_homeobject.cpp
+++ b/src/lib/homestore_backend/hs_homeobject.cpp
@@ -259,6 +259,14 @@ void HSHomeObject::init_homestore() {
     } else {
         LOGI("GC is disabled");
     }
+
+    // start scrubber
+    if (HS_BACKEND_DYNAMIC_CONFIG(enable_scrubber)) {
+        LOGI("Starting scrub manager");
+        scrub_mgr_->start();
+    } else {
+        LOGI("scrub manager is disabled");
+    }
 }
 
 void HSHomeObject::on_replica_restart() {
@@ -309,7 +317,6 @@ void HSHomeObject::on_replica_restart() {
 
         // gc_manager will be created only once here. we need make sure gc manager is created after all the pg meta blk
         // are replayed since we build pdev chunk heap in the constructor of gc manager , which depends on the pg meta.
-
         // gc metablk handlers are registered in the constructor of gc manager
         gc_mgr_ = std::make_shared< GCManager >(this);
 
@@ -326,7 +333,7 @@ void HSHomeObject::on_replica_restart() {
                 gc_index_table_map.emplace(boost::uuids::to_string(uuid), gc_index_table);
 
                 // 2 create gc actor superblk for each pdev, which contains the pdev_id and index table uuid.
-                homestore::superblk< GCManager::gc_actor_superblk > gc_actor_sb{GCManager::_gc_actor_meta_name};
+                homestore::superblk< GCManager::gc_actor_superblk > gc_actor_sb{GCManager::gc_actor_meta_name};
                 gc_actor_sb.create(sizeof(GCManager::gc_actor_superblk));
                 gc_actor_sb->pdev_id = pdev_id;
                 gc_actor_sb->index_table_uuid = uuid;
@@ -340,7 +347,7 @@ void HSHomeObject::on_replica_restart() {
                 for (size_t i = 0; i < reserved_chunk_num_per_pdev; ++i) {
                     auto chunk = chunks[i];
                     homestore::superblk< GCManager::gc_reserved_chunk_superblk > reserved_chunk_sb{
-                        GCManager::_gc_reserved_chunk_meta_name};
+                        GCManager::gc_reserved_chunk_meta_name};
                     reserved_chunk_sb.create(sizeof(GCManager::gc_reserved_chunk_superblk));
                     reserved_chunk_sb->chunk_id = chunk;
                     reserved_chunk_sb.write();
@@ -356,9 +363,9 @@ void HSHomeObject::on_replica_restart() {
 
         // when initializing, there is not gc task. we need to recover reserved chunks here, so that the reserved chunks
         // will not be put into pdev heap when built
-        homestore::meta_service().read_sub_sb(GCManager::_gc_actor_meta_name);
-        homestore::meta_service().read_sub_sb(GCManager::_gc_reserved_chunk_meta_name);
-        homestore::meta_service().read_sub_sb(GCManager::_gc_task_meta_name);
+        homestore::meta_service().read_sub_sb(GCManager::gc_actor_meta_name);
+        homestore::meta_service().read_sub_sb(GCManager::gc_reserved_chunk_meta_name);
+        homestore::meta_service().read_sub_sb(GCManager::gc_task_meta_name);
 
         // At this point, log replay has not started yet. We must process all recovered GC tasks before replay begins.
         // After log replay completes, ReplicationStateMachine::on_log_replay_done() calls select_specific_chunk() for
@@ -377,6 +384,9 @@ void HSHomeObject::on_replica_restart() {
 
         gc_mgr_->handle_all_recovered_gc_tasks();
     });
+
+    // initialize scrub manager
+    scrub_mgr_ = std::make_shared< ScrubManager >(this);
 }
 
 #if 0
@@ -446,16 +456,20 @@ void HSHomeObject::shutdown() {
         LOGI("waiting for {} pending requests to complete", pending_reqs);
         std::this_thread::sleep_for(std::chrono::milliseconds(1000));
     };
-    LOGI("start stopping GC");
+    LOGI("stopping GC");
     // we need stop gc before shutting down homestore(where metaservice is shutdown), because gc mgr needs metaservice
     // to persist gc task metablk if there is any ongoing gc task. after stopping gc manager, there is no gc task
     // anymore, and thus now new gc task will be written to metaservice during homestore shutdown.
-    gc_mgr_->stop();
+    if (gc_mgr_) gc_mgr_->stop();
+
+    LOGI("stopping scrubbing");
+    if (scrub_mgr_) scrub_mgr_->stop();
 
     LOGI("start shutting down HomeStore");
     homestore::HomeStore::instance()->shutdown();
     homestore::HomeStore::reset_instance();
     gc_mgr_.reset();
+    scrub_mgr_.reset();
     iomanager.stop();
     LOGI("complete shutting down HomeStore");
 }
diff --git a/src/lib/homestore_backend/hs_homeobject.hpp b/src/lib/homestore_backend/hs_homeobject.hpp
index d4a1d25f4..d5a11c78d 100644
--- a/src/lib/homestore_backend/hs_homeobject.hpp
+++ b/src/lib/homestore_backend/hs_homeobject.hpp
@@ -14,10 +14,27 @@
 #include "homeobject/common.hpp"
 #include "index_kv.hpp"
 #include "gc_manager.hpp"
+#include "scrub_manager.hpp"
 #include "hs_backend_config.hpp"
 #include "generated/resync_pg_data_generated.h"
 #include "generated/resync_shard_data_generated.h"
 #include "generated/resync_blob_data_generated.h"
+#include "generated/blob_scrub_req_generated.h"
+#include "generated/deep_blob_scrub_map_generated.h"
+#include "generated/shallow_blob_scrub_map_generated.h"
+#include "generated/shard_scrub_req_generated.h"
+#include "generated/shallow_shard_scrub_map_generated.h"
+#include "generated/deep_shard_scrub_map_generated.h"
+#include "generated/pg_meta_scrub_req_generated.h"
+#include "generated/pg_meta_scrub_map_generated.h"
+#include "generated/scrub_common_generated.h"
+
+#define SCRUB_RESULT_STRING(type_)                                                                                     \
+    ((type_) == ScrubResult::NONE            ? "NONE"                                                                  \
+         : (type_) == ScrubResult::IO_ERROR  ? "IO_ERROR"                                                              \
+         : (type_) == ScrubResult::MISMATCH  ? "MISMATCH"                                                              \
+         : (type_) == ScrubResult::NOT_FOUND ? "NOT_FOUND"                                                             \
+                                             : "UNKNOWN")
 
 namespace homestore {
 struct meta_blk;
@@ -364,6 +381,7 @@ class HSHomeObject : public HomeObjectImpl {
         shared< homestore::ReplDev > repl_dev_;
         std::shared_ptr< BlobIndexTable > index_table_;
         PGMetrics metrics_;
+        HSHomeObject& home_obj_;
         mutable pg_state pg_state_{0};
 
         // Snapshot receiver progress info, used as a checkpoint for recovery
@@ -372,8 +390,8 @@ class HSHomeObject : public HomeObjectImpl {
         mutable homestore::superblk< snapshot_rcvr_shard_list_superblk > snp_rcvr_shard_list_sb_;
 
         HS_PG(PGInfo info, shared< homestore::ReplDev > rdev, shared< BlobIndexTable > index_table,
-              std::shared_ptr< const std::vector< homestore::chunk_num_t > > pg_chunk_ids);
-        HS_PG(homestore::superblk< pg_info_superblk >&& sb, shared< homestore::ReplDev > rdev);
+              std::shared_ptr< const std::vector< homestore::chunk_num_t > > pg_chunk_ids, HSHomeObject& home_obj);
+        HS_PG(homestore::superblk< pg_info_superblk >&& sb, shared< homestore::ReplDev > rdev, HSHomeObject& home_obj);
         ~HS_PG() override = default;
 
         static PGInfo pg_info_from_sb(homestore::superblk< pg_info_superblk > const& sb);
@@ -416,6 +434,19 @@ class HSHomeObject : public HomeObjectImpl {
          * Update membership in pg's superblock.
          */
         void update_membership(const MemberSet& members);
+
+        /*
+         * RPC handlers for scrub:
+         * 1. on_scrub_req_received: receive the scrub req from leader
+         * 2. on_scrub_map_received: receive the scrub map from followers
+         */
+        void on_scrub_req_received(boost::intrusive_ptr< sisl::GenericRpcData >& rpc_data);
+        void on_scrub_map_received(boost::intrusive_ptr< sisl::GenericRpcData >& rpc_data);
+
+        /**
+         * Register data RPC handlers for this PG
+         */
+        void register_data_rpc_handlers();
     };
 
     struct HS_Shard : public Shard {
@@ -537,6 +568,11 @@ class HSHomeObject : public HomeObjectImpl {
     inline const static homestore::MultiBlkId tombstone_pbas{0, 0, 0};
     inline const static std::string delete_marker_blob_data{"HOMEOBJECT_BLOB_DELETE_MARKER"};
 
+    // ask followers to scrub
+    inline const static std::string PUSH_SCRUB_REQ{"scrub_req"};
+    // return scrub map to leader
+    inline const static std::string PUSH_SCRUB_MAP{"push_scrub_map"};
+
     class PGBlobIterator {
     public:
         struct blob_read_result {
@@ -732,6 +768,7 @@ class HSHomeObject : public HomeObjectImpl {
     mutable std::shared_mutex snp_sbs_lock_;
     shared< HeapChunkSelector > chunk_selector_;
     shared< GCManager > gc_mgr_;
+    shared< ScrubManager > scrub_mgr_;
     unique< HttpManager > http_mgr_;
 
     static constexpr size_t max_zpad_bufs = _data_block_size / io_align;
@@ -767,6 +804,7 @@ class HSHomeObject : public HomeObjectImpl {
     void local_create_shard(ShardInfo shard_info, homestore::chunk_num_t v_chunk_id, homestore::chunk_num_t p_chunk_id,
                             homestore::blk_count_t blk_count, trace_id_t tid = 0);
     void add_new_shard_to_map(std::unique_ptr< HS_Shard > shard);
+    void delete_shard_from_map(shard_id_t shard_id);
     void update_shard_in_map(const ShardInfo& shard_info);
 
     // recover part
@@ -986,6 +1024,7 @@ class HSHomeObject : public HomeObjectImpl {
 
     cshared< HeapChunkSelector > chunk_selector() const { return chunk_selector_; }
     cshared< GCManager > gc_manager() const { return gc_mgr_; }
+    cshared< ScrubManager > scrub_manager() const { return scrub_mgr_; }
 
     /**
      * @brief Reconciles the leaders for all PGs or a specific PG identified by pg_id.
diff --git a/src/lib/homestore_backend/hs_homeobject_fbs/blob_scrub_req.fbs b/src/lib/homestore_backend/hs_homeobject_fbs/blob_scrub_req.fbs
new file mode 100644
index 000000000..d276c3587
--- /dev/null
+++ b/src/lib/homestore_backend/hs_homeobject_fbs/blob_scrub_req.fbs
@@ -0,0 +1,13 @@
+include "scrub_common.fbs";
+
+namespace homeobject;
+
+table BlobScrubReq {
+  scrub_info: ScrubInfo;
+  start: uint64;
+  end: uint64;
+  isdeepscrub: bool;
+}
+
+// BlobScrubReq is used for requesting blob range scrub
+root_type BlobScrubReq;
\ No newline at end of file
diff --git a/src/lib/homestore_backend/hs_homeobject_fbs/deep_blob_scrub_map.fbs b/src/lib/homestore_backend/hs_homeobject_fbs/deep_blob_scrub_map.fbs
new file mode 100644
index 000000000..aaffe25a8
--- /dev/null
+++ b/src/lib/homestore_backend/hs_homeobject_fbs/deep_blob_scrub_map.fbs
@@ -0,0 +1,31 @@
+include "scrub_common.fbs";
+
+namespace homeobject;
+
+table HashValue {
+  hash:[ubyte];
+}
+
+table ScrubResultValue {
+  result:ScrubResult;
+}
+
+union ScrubValue {
+  HashValue,
+  ScrubResultValue
+}
+
+table DeepBlobScrubResultEntry {
+  blob_key: BlobKey;
+  scrub_result: ScrubValue;
+}
+
+table DeepBlobScrubMap {
+  scrub_info: ScrubInfo;
+  start: uint64;
+  end: uint64;
+  deep_blob_scrub_results: [DeepBlobScrubResultEntry];
+}
+
+// this is used for sending deep scrub result
+root_type DeepBlobScrubMap;
\ No newline at end of file
diff --git a/src/lib/homestore_backend/hs_homeobject_fbs/deep_shard_scrub_map.fbs b/src/lib/homestore_backend/hs_homeobject_fbs/deep_shard_scrub_map.fbs
new file mode 100644
index 000000000..5d2b42487
--- /dev/null
+++ b/src/lib/homestore_backend/hs_homeobject_fbs/deep_shard_scrub_map.fbs
@@ -0,0 +1,16 @@
+include "shallow_shard_scrub_map.fbs";
+
+namespace homeobject;
+
+table DeepShardScrubResultEntry {
+  shard_id: uint64;
+  result: ScrubResult;
+}
+
+table DeepShardScrubMap {
+  shallow_map: ShallowShardScrubMap;
+  problematic_shards: [DeepShardScrubResultEntry];
+}
+
+// this is used for sending deep shard scrub map
+root_type DeepShardScrubMap;
\ No newline at end of file
diff --git a/src/lib/homestore_backend/hs_backend_config.fbs b/src/lib/homestore_backend/hs_homeobject_fbs/hs_backend_config.fbs
similarity index 93%
rename from src/lib/homestore_backend/hs_backend_config.fbs
rename to src/lib/homestore_backend/hs_homeobject_fbs/hs_backend_config.fbs
index bd6991db9..983d19208 100644
--- a/src/lib/homestore_backend/hs_backend_config.fbs
+++ b/src/lib/homestore_backend/hs_homeobject_fbs/hs_backend_config.fbs
@@ -23,6 +23,10 @@ table HSBackendSettings {
     //TODO: make this hotswap after gc is well tested
     enable_gc: bool = true;
 
+    //Enable scrubber
+    //TODO: make this hotswap after scrubber is well tested
+    enable_scrubber: bool = false;
+
     //Total reserved chunk num (dedicated for gc/egc) per pdev 
     reserved_chunk_num_per_pdev: uint8 = 6;
 
diff --git a/src/lib/homestore_backend/hs_homeobject_fbs/meta_scrub_req.fbs b/src/lib/homestore_backend/hs_homeobject_fbs/meta_scrub_req.fbs
new file mode 100644
index 000000000..5892b3e7e
--- /dev/null
+++ b/src/lib/homestore_backend/hs_homeobject_fbs/meta_scrub_req.fbs
@@ -0,0 +1,13 @@
+include "scrub_common.fbs";
+
+namespace homeobject;
+
+table MetaScrubReq {
+  scrub_type: ScrubType;
+  issuer_uuid : [ubyte];
+  scrub_lsn: int64;
+  scrub_task_id: int64;
+}
+
+// MetaScrubReq is used for requesting metadata scrub (PG/shard metadata)
+// root_type MetaScrubReq;
\ No newline at end of file
diff --git a/src/lib/homestore_backend/hs_homeobject_fbs/pg_meta_scrub_map.fbs b/src/lib/homestore_backend/hs_homeobject_fbs/pg_meta_scrub_map.fbs
new file mode 100644
index 000000000..d372e0fde
--- /dev/null
+++ b/src/lib/homestore_backend/hs_homeobject_fbs/pg_meta_scrub_map.fbs
@@ -0,0 +1,11 @@
+include "scrub_common.fbs";
+
+namespace homeobject;
+
+table PGMetaScrubMap {
+  scrub_info: ScrubInfo;
+  pg_meta_scrub_result: ScrubResult;
+}
+
+// this is used for sending pg meta scrub result
+root_type PGMetaScrubMap;
diff --git a/src/lib/homestore_backend/hs_homeobject_fbs/pg_meta_scrub_req.fbs b/src/lib/homestore_backend/hs_homeobject_fbs/pg_meta_scrub_req.fbs
new file mode 100644
index 000000000..db0cc0400
--- /dev/null
+++ b/src/lib/homestore_backend/hs_homeobject_fbs/pg_meta_scrub_req.fbs
@@ -0,0 +1,10 @@
+include "scrub_common.fbs";
+
+namespace homeobject;
+
+table PgMetaScrubReq {
+  scrub_info: ScrubInfo;
+}
+
+// PgMetaScrubReq is used for requesting pg meta scrub map
+root_type PgMetaScrubReq;
\ No newline at end of file
diff --git a/src/lib/homestore_backend/resync_blob_data.fbs b/src/lib/homestore_backend/hs_homeobject_fbs/resync_blob_data.fbs
similarity index 100%
rename from src/lib/homestore_backend/resync_blob_data.fbs
rename to src/lib/homestore_backend/hs_homeobject_fbs/resync_blob_data.fbs
diff --git a/src/lib/homestore_backend/resync_pg_data.fbs b/src/lib/homestore_backend/hs_homeobject_fbs/resync_pg_data.fbs
similarity index 100%
rename from src/lib/homestore_backend/resync_pg_data.fbs
rename to src/lib/homestore_backend/hs_homeobject_fbs/resync_pg_data.fbs
diff --git a/src/lib/homestore_backend/resync_shard_data.fbs b/src/lib/homestore_backend/hs_homeobject_fbs/resync_shard_data.fbs
similarity index 100%
rename from src/lib/homestore_backend/resync_shard_data.fbs
rename to src/lib/homestore_backend/hs_homeobject_fbs/resync_shard_data.fbs
diff --git a/src/lib/homestore_backend/hs_homeobject_fbs/scrub_common.fbs b/src/lib/homestore_backend/hs_homeobject_fbs/scrub_common.fbs
new file mode 100644
index 000000000..24800ada5
--- /dev/null
+++ b/src/lib/homestore_backend/hs_homeobject_fbs/scrub_common.fbs
@@ -0,0 +1,23 @@
+native_include "sisl/utility/non_null_ptr.hpp";
+
+namespace homeobject;
+
+enum ScrubResult : uint8 {
+  NONE = 0,
+  IO_ERROR = 1,
+  MISMATCH = 2,
+  NOT_FOUND = 3
+}
+
+table BlobKey {
+  shard_id: uint64;
+  blob_id: uint64;
+}
+
+table ScrubInfo {
+  pg_id: uint16;
+  task_id: uint64;
+  req_id: uint64;
+  scrub_lsn: int64;
+  issuer_uuid : [ubyte];
+}
\ No newline at end of file
diff --git a/src/lib/homestore_backend/hs_homeobject_fbs/shallow_blob_scrub_map.fbs b/src/lib/homestore_backend/hs_homeobject_fbs/shallow_blob_scrub_map.fbs
new file mode 100644
index 000000000..c3bb19b40
--- /dev/null
+++ b/src/lib/homestore_backend/hs_homeobject_fbs/shallow_blob_scrub_map.fbs
@@ -0,0 +1,13 @@
+include "scrub_common.fbs";
+
+namespace homeobject;
+
+table ShallowBlobScrubMap {
+  scrub_info: ScrubInfo;
+  start: uint64;
+  end: uint64;
+  blobs: [BlobKey];
+}
+
+// this is used for sending shallow blob scrub map
+root_type ShallowBlobScrubMap;
\ No newline at end of file
diff --git a/src/lib/homestore_backend/hs_homeobject_fbs/shallow_shard_scrub_map.fbs b/src/lib/homestore_backend/hs_homeobject_fbs/shallow_shard_scrub_map.fbs
new file mode 100644
index 000000000..f591f7565
--- /dev/null
+++ b/src/lib/homestore_backend/hs_homeobject_fbs/shallow_shard_scrub_map.fbs
@@ -0,0 +1,13 @@
+include "scrub_common.fbs";
+
+namespace homeobject;
+
+table ShallowShardScrubMap {
+  scrub_info: ScrubInfo;
+  start: uint64;
+  end: uint64;
+  shards: [uint64];
+}
+
+// this is used for sending shallow shard scrub map
+root_type ShallowShardScrubMap;
\ No newline at end of file
diff --git a/src/lib/homestore_backend/hs_homeobject_fbs/shard_scrub_req.fbs b/src/lib/homestore_backend/hs_homeobject_fbs/shard_scrub_req.fbs
new file mode 100644
index 000000000..981807eb0
--- /dev/null
+++ b/src/lib/homestore_backend/hs_homeobject_fbs/shard_scrub_req.fbs
@@ -0,0 +1,13 @@
+include "scrub_common.fbs";
+
+namespace homeobject;
+
+table ShardScrubReq {
+  scrub_info: ScrubInfo;
+  start: uint64;
+  end: uint64;
+  isdeepscrub: bool;
+}
+
+// ShardScrubReq is used for requesting shard range scrub
+root_type ShardScrubReq;
\ No newline at end of file
diff --git a/src/lib/homestore_backend/hs_pg_manager.cpp b/src/lib/homestore_backend/hs_pg_manager.cpp
index e12cbf672..cb6ea952b 100644
--- a/src/lib/homestore_backend/hs_pg_manager.cpp
+++ b/src/lib/homestore_backend/hs_pg_manager.cpp
@@ -223,7 +223,7 @@ folly::Expected< HSHomeObject::HS_PG*, PGError > HSHomeObject::local_create_pg(s
     auto uuid_str = boost::uuids::to_string(index_table->uuid());
 
     repl_dev->set_custom_rdev_name(fmt::format("rdev{}", pg_info.id));
-    auto hs_pg = std::make_unique< HS_PG >(std::move(pg_info), std::move(repl_dev), index_table, chunk_ids);
+    auto hs_pg = std::make_unique< HS_PG >(std::move(pg_info), std::move(repl_dev), index_table, chunk_ids, *this);
     auto ret = hs_pg.get();
     {
         scoped_lock lck(index_lock_);
@@ -236,6 +236,9 @@ folly::Expected< HSHomeObject::HS_PG*, PGError > HSHomeObject::local_create_pg(s
         // Add to index service, so that it gets cleaned up when index service is shutdown.
         hs()->index_service().add_index_table(index_table);
         add_pg_to_map(std::move(hs_pg));
+
+        // when local_create_pg is called by BR ,pg scrub superblk will not be overrite if it already exists
+        scrub_mgr_->add_pg(pg_info.id);
     }
     return ret;
 }
@@ -350,7 +353,6 @@ void HSHomeObject::on_pg_start_replace_member(group_id_t group_id, const std::st
             auto hs_pg = static_cast< HSHomeObject::HS_PG* >(pg.get());
             pg->pg_info_.members.emplace(std::move(to_pg_member(member_in)));
             pg->pg_info_.members.emplace(std::move(to_pg_member(member_out)));
-
             uint32_t i{0};
             pg_members* sb_members = hs_pg->pg_sb_->get_pg_members_mutable();
             for (auto const& m : pg->pg_info_.members) {
@@ -815,7 +817,6 @@ void HSHomeObject::destroy_hs_resources(pg_id_t pg_id) { chunk_selector_->reset_
 
 void HSHomeObject::destroy_pg_index_table(pg_id_t pg_id) {
     std::shared_ptr< BlobIndexTable > index_table;
-
     {
         // index_table->destroy() will trigger a cp_flush, which will call homeobject#cp_flush and try to acquire
         // `_pg_lock`, so we need to release the lock here to avoid a dead lock
@@ -935,7 +936,7 @@ void HSHomeObject::on_pg_meta_blk_found(sisl::byte_view const& buf, void* meta_c
     std::vector< chunk_num_t > p_chunk_ids(pg_sb->get_chunk_ids(), pg_sb->get_chunk_ids() + pg_sb->num_chunks);
     bool set_pg_chunks_res = chunk_selector_->recover_pg_chunks(pg_id, std::move(p_chunk_ids));
     auto uuid_str = boost::uuids::to_string(pg_sb->index_table_uuid);
-    auto hs_pg = std::make_unique< HS_PG >(std::move(pg_sb), std::move(v.value()));
+    auto hs_pg = std::make_unique< HS_PG >(std::move(pg_sb), std::move(v.value()), *this);
     if (!set_pg_chunks_res) {
         hs_pg->pg_state_.set_state(PGStateMask::DISK_DOWN);
         hs_pg->repl_dev_->set_stage(homestore::repl_dev_stage_t::UNREADY);
@@ -971,12 +972,13 @@ PGInfo HSHomeObject::HS_PG::pg_info_from_sb(homestore::superblk< pg_info_superbl
 }
 
 HSHomeObject::HS_PG::HS_PG(PGInfo info, shared< homestore::ReplDev > rdev, shared< BlobIndexTable > index_table,
-                           std::shared_ptr< const std::vector< chunk_num_t > > pg_chunk_ids) :
+                           std::shared_ptr< const std::vector< chunk_num_t > > pg_chunk_ids, HSHomeObject& home_obj) :
         PG{std::move(info)},
         pg_sb_{_pg_meta_name},
         repl_dev_{std::move(rdev)},
         index_table_{std::move(index_table)},
         metrics_{*this},
+        home_obj_{home_obj},
         snp_rcvr_info_sb_{_snp_rcvr_meta_name},
         snp_rcvr_shard_list_sb_{_snp_rcvr_shard_list_meta_name} {
     RELEASE_ASSERT(pg_chunk_ids != nullptr, "PG chunks null, pg={}", pg_info_.id);
@@ -1011,15 +1013,23 @@ HSHomeObject::HS_PG::HS_PG(PGInfo info, shared< homestore::ReplDev > rdev, share
         pg_sb_chunk_ids[i] = pg_chunk_ids->at(i);
     }
     pg_sb_.write();
+
+    register_data_rpc_handlers();
 }
 
-HSHomeObject::HS_PG::HS_PG(superblk< pg_info_superblk >&& sb, shared< ReplDev > rdev) :
-        PG{pg_info_from_sb(sb)}, pg_sb_{std::move(sb)}, repl_dev_{std::move(rdev)}, metrics_{*this} {
+HSHomeObject::HS_PG::HS_PG(superblk< pg_info_superblk >&& sb, shared< ReplDev > rdev, HSHomeObject& home_obj) :
+        PG{pg_info_from_sb(sb)},
+        pg_sb_{std::move(sb)},
+        repl_dev_{std::move(rdev)},
+        metrics_{*this},
+        home_obj_{home_obj} {
     durable_entities_.blob_sequence_num = pg_sb_->blob_sequence_num;
     durable_entities_.active_blob_count = pg_sb_->active_blob_count;
     durable_entities_.tombstone_blob_count = pg_sb_->tombstone_blob_count;
     durable_entities_.total_occupied_blk_count = pg_sb_->total_occupied_blk_count;
     durable_entities_.total_reclaimed_blk_count = pg_sb_->total_reclaimed_blk_count;
+
+    register_data_rpc_handlers();
 }
 
 uint32_t HSHomeObject::HS_PG::total_shards() const { return shards_.size(); }
@@ -1115,6 +1125,212 @@ void HSHomeObject::HS_PG::update_membership(const MemberSet& members) {
     LOGI("PG membership updated, member_nums={}", pg_sb_->num_dynamic_members);
 }
 
+void HSHomeObject::HS_PG::register_data_rpc_handlers() {
+    const auto& pg_id = pg_info_.id;
+    bool success;
+
+    success = repl_dev_->add_data_rpc_service(PUSH_SCRUB_REQ, bind_this(HS_PG::on_scrub_req_received, 1));
+    if (success) {
+        LOGI("Successfully registered PUSH_SCRUB_REQ RPC handler for pg={}", pg_id);
+    } else {
+        LOGW("PUSH_SCRUB_REQ RPC handler already registered for pg={}", pg_id);
+    }
+
+    success = repl_dev_->add_data_rpc_service(PUSH_SCRUB_MAP, bind_this(HS_PG::on_scrub_map_received, 1));
+    if (success) {
+        LOGI("Successfully registered PUSH_SCRUB_MAP RPC handler for pg={}", pg_id);
+    } else {
+        LOGW("PUSH_SCRUB_MAP RPC handler already registered for pg={}", pg_id);
+    }
+}
+
+void HSHomeObject::HS_PG::on_scrub_req_received(boost::intrusive_ptr< sisl::GenericRpcData >& rpc_data) {
+    const auto pg_id = pg_info_.id;
+    LOGD("Received scrub_blob request for pg={}", pg_id);
+
+    struct rpc_cleanup {
+        boost::intrusive_ptr< sisl::GenericRpcData >& rpc_data_;
+        ~rpc_cleanup() {
+            if (rpc_data_) { rpc_data_->send_response(); }
+        }
+    } rpc_cleanup{rpc_data};
+
+    auto const& incoming_buf = rpc_data->request_blob();
+    const auto buf_size = incoming_buf.size();
+    const auto buf_ptr = incoming_buf.cbytes();
+
+    if (!buf_ptr || !buf_size) {
+        LOGW("SCRUB_BLOB received with empty buffer for pg={}", pg_id);
+        return;
+    }
+
+    const auto scrub_type = *reinterpret_cast< const SCRUB_TYPE* >(buf_ptr);
+    const auto flatbuf_ptr = buf_ptr + sizeof(SCRUB_TYPE);
+    const auto flatbuf_size = buf_size - sizeof(SCRUB_TYPE);
+    flatbuffers::Verifier verifier(flatbuf_ptr, flatbuf_size);
+
+    std::shared_ptr< ScrubManager::base_scrub_req > scrub_req;
+    bool success_to_load{false};
+    switch (scrub_type) {
+    case SCRUB_TYPE::PG_META: {
+        if (!VerifySizePrefixedPgMetaScrubReqBuffer(verifier)) {
+            LOGW("SCRUB_BLOB received with invalid flatbuffer for pg={}", pg_id);
+            return;
+        }
+        scrub_req = std::make_shared< ScrubManager::base_scrub_req >();
+        success_to_load = scrub_req->load(flatbuf_ptr, flatbuf_size);
+        break;
+    }
+    case SCRUB_TYPE::DEEP_BLOB:
+    case SCRUB_TYPE::SHALLOW_BLOB: {
+        if (!VerifySizePrefixedBlobScrubReqBuffer(verifier)) {
+            LOGW("SCRUB_BLOB received with invalid flatbuffer for pg={}", pg_id);
+            return;
+        }
+        scrub_req = std::make_shared< ScrubManager::blob_scrub_req >();
+        success_to_load = scrub_req->load(flatbuf_ptr, flatbuf_size);
+        break;
+    }
+    case SCRUB_TYPE::DEEP_SHARD:
+    case SCRUB_TYPE::SHALLOW_SHARD: {
+        if (!VerifySizePrefixedShardScrubReqBuffer(verifier)) {
+            LOGW("SCRUB_SHARD received with invalid flatbuffer for pg={}", pg_id);
+            return;
+        }
+        scrub_req = std::make_shared< ScrubManager::shard_scrub_req >();
+        success_to_load = scrub_req->load(flatbuf_ptr, flatbuf_size);
+        break;
+    }
+    default:
+        RELEASE_ASSERT(false, "Received unknown scrub type {} for pg={}", scrub_type, pg_id);
+    }
+
+    if (!success_to_load) {
+        LOGW("Failed to load scrub_blob request from flatbuffer for pg={}", pg_id);
+        return;
+    }
+
+    if (scrub_type != scrub_req->get_scrub_type()) {
+        LOGW("Scrub type in the request {} does not match with the scrub type in the buffer {}, pg={}", scrub_type,
+             scrub_req->get_scrub_type(), pg_id);
+        return;
+    }
+
+    auto scrub_mgr = home_obj_.scrub_manager();
+    if (!scrub_mgr) {
+        LOGW("ScrubManager is not initialized in HS_PG::on_scrub_req_received for pg={}", pg_id);
+        return;
+    }
+    scrub_mgr->add_scrub_req(scrub_req);
+}
+
+void HSHomeObject::HS_PG::on_scrub_map_received(boost::intrusive_ptr< sisl::GenericRpcData >& rpc_data) {
+    const auto pg_id = pg_info_.id;
+
+    struct rpc_cleanup {
+        boost::intrusive_ptr< sisl::GenericRpcData >& rpc_data_;
+        ~rpc_cleanup() {
+            if (rpc_data_) { rpc_data_->send_response(); }
+        }
+    } rpc_cleanup{rpc_data};
+
+    auto const& incoming_buf = rpc_data->request_blob();
+    const auto buf_size = incoming_buf.size();
+    const auto buf_ptr = incoming_buf.cbytes();
+
+    if (!buf_ptr || !buf_size) {
+        LOGW("PUSH_DEEP_BLOB_SM received with empty buffer for pg={}, buffer_size={}", pg_id, buf_size);
+        return;
+    }
+
+    const auto scrub_type = *reinterpret_cast< const SCRUB_TYPE* >(buf_ptr);
+    const auto flatbuf_ptr = buf_ptr + sizeof(SCRUB_TYPE);
+    const auto flatbuf_size = buf_size - sizeof(SCRUB_TYPE);
+    flatbuffers::Verifier verifier(flatbuf_ptr, flatbuf_size);
+
+    /*
+        auto fnv1a64 = [](const void* data, std::size_t len) -> std::uint64_t {
+            const std::uint8_t* p = static_cast< const std::uint8_t* >(data);
+            std::uint64_t h = 14695981039346656037ull; // offset basis
+            for (std::size_t i = 0; i < len; ++i) {
+                h ^= p[i];
+                h *= 1099511628211ull; // FNV prime
+            }
+            return h;
+        };
+    */
+
+    std::shared_ptr< ScrubManager::BaseScrubMap > scrub_map;
+    bool success_to_load{false};
+    switch (scrub_type) {
+    case SCRUB_TYPE::SHALLOW_BLOB: {
+        if (!VerifySizePrefixedShallowBlobScrubMapBuffer(verifier)) {
+            LOGW("SHALLOW_BLOB scrub map received with invalid flatbuffer for pg={}, buffer_size={}", pg_id, buf_size);
+            return;
+        }
+        scrub_map = std::make_shared< ScrubManager::ShallowBlobScrubMap >();
+        success_to_load = scrub_map->load(flatbuf_ptr, flatbuf_size);
+        break;
+    }
+    case SCRUB_TYPE::DEEP_BLOB: {
+        if (!VerifySizePrefixedDeepBlobScrubMapBuffer(verifier)) {
+            LOGW("DEEP_BLOB scrub map received with invalid flatbuffer for pg={}, buffer_size={}", pg_id, buf_size);
+            return;
+        }
+        scrub_map = std::make_shared< ScrubManager::DeepBlobScrubMap >();
+        success_to_load = scrub_map->load(flatbuf_ptr, flatbuf_size);
+        break;
+    }
+    case SCRUB_TYPE::DEEP_SHARD: {
+        if (!VerifySizePrefixedDeepShardScrubMapBuffer(verifier)) {
+            LOGW("DEEP_SHARD scrub map received with invalid flatbuffer for pg={}, buffer_size={}", pg_id, buf_size);
+            return;
+        }
+        scrub_map = std::make_shared< ScrubManager::DeepShardScrubMap >();
+        success_to_load = scrub_map->load(flatbuf_ptr, flatbuf_size);
+        break;
+    }
+    case SCRUB_TYPE::SHALLOW_SHARD: {
+        if (!VerifySizePrefixedShallowShardScrubMapBuffer(verifier)) {
+            LOGW("SHALLOW_SHARD scrub map received with invalid flatbuffer for pg={}, buffer_size={}", pg_id, buf_size);
+            return;
+        }
+        scrub_map = std::make_shared< ScrubManager::ShallowShardScrubMap >();
+        success_to_load = scrub_map->load(flatbuf_ptr, flatbuf_size);
+        break;
+    }
+    case SCRUB_TYPE::PG_META: {
+        if (!VerifySizePrefixedPGMetaScrubMapBuffer(verifier)) {
+            LOGW("PG_META scrub map received with invalid flatbuffer for pg={}, buffer_size={}", pg_id, buf_size);
+            return;
+        }
+        scrub_map = std::make_shared< ScrubManager::PGMetaScrubMap >();
+        success_to_load = scrub_map->load(flatbuf_ptr, flatbuf_size);
+        break;
+    }
+    default:
+        RELEASE_ASSERT(false, "Received unknown scrub map type {} for pg={}", scrub_type, pg_id);
+    }
+
+    if (!success_to_load) {
+        LOGW("Failed to load scrub map from flatbuffer for pg={}, scrub_type:{}", pg_id, scrub_type);
+        return;
+    }
+
+    if (scrub_type != scrub_map->get_scrub_type()) {
+        LOGW("Scrub type in the request {} does not match with the scrub type in the buffer {}, pg={}", scrub_type,
+             scrub_map->get_scrub_type(), pg_id);
+        return;
+    }
+
+    auto scrub_mgr = home_obj_.scrub_manager();
+    if (!scrub_mgr) {
+        LOGW("ScrubManager is not initialized in HS_PG::on_scrub_map_received for pg={}", pg_id);
+        return;
+    }
+    scrub_mgr->add_scrub_map(pg_id, scrub_map);
+}
+
 // NOTE: caller should hold the _pg_lock
 const HSHomeObject::HS_PG* HSHomeObject::_get_hs_pg_unlocked(pg_id_t pg_id) const {
     auto iter = _pg_map.find(pg_id);
@@ -1324,9 +1540,9 @@ void HSHomeObject::update_pg_meta_after_gc(const pg_id_t pg_id, const homestore:
     auto hs_pg = dynamic_cast< HS_PG* >(iter->second.get());
     auto move_from_v_chunk = chunk_selector()->get_extend_vchunk(move_from_chunk);
 
-    // TODO:: for now, when updating pchunk for a vchunk, we have to update the whole pg super blk. we can optimize this
-    // by persist a single superblk for each vchunk in the pg, so that we only need to update the vchunk superblk
-    // itself.
+    // TODO:: for now, when updating pchunk for a vchunk, we have to update the whole pg super blk. we can optimize
+    // this by persist a single superblk for each vchunk in the pg, so that we only need to update the vchunk
+    // superblk itself.
 
     auto pg_chunks = hs_pg->pg_sb_->get_chunk_ids_mutable();
 
@@ -1338,7 +1554,7 @@ void HSHomeObject::update_pg_meta_after_gc(const pg_id_t pg_id, const homestore:
     if (sisl_unlikely(pg_chunks[v_chunk_id] == move_to_chunk)) {
         // this might happens when crash recovery. the crash happens after pg metablk is updated but before gc task
         // metablk is destroyed.
-        LOGD("gc task_id={}, the pchunk_id for vchunk={} for pg_id={} is already {},  update pg metablk again!",
+        LOGD("gc task_id={}, the pchunk_id for vchunk={} for pg_id={} is already {}, skip updating pg metablk!",
              task_id, v_chunk_id, pg_id, move_to_chunk);
     } else {
         RELEASE_ASSERT(pg_chunks[v_chunk_id] == move_from_chunk,
@@ -1349,35 +1565,36 @@ void HSHomeObject::update_pg_meta_after_gc(const pg_id_t pg_id, const homestore:
         LOGD("gc task_id={}, pchunk for vchunk={} of pg_id={} is updated from {} to {}", task_id, v_chunk_id, pg_id,
              move_from_chunk, move_to_chunk);
 
-        // TODO:hs_pg->shards_.size() will be decreased by 1 in delete_shard if gc finds a empty shard, which will be
-        // implemented later
-        hs_pg->durable_entities_update([this, move_from_v_chunk, &move_to_chunk, &move_from_chunk, &pg_id,
-                                        &task_id](auto& de) {
-            // active_blob_count is updated by put/delete blob, not change it here.
+        // TODO:hs_pg->shards_.size() will be decreased by 1 in delete_shard if gc finds a empty shard, which will
+        // be implemented later
+        hs_pg->durable_entities_update(
+            [this, move_from_v_chunk, &move_to_chunk, &move_from_chunk, &pg_id, &task_id](auto& de) {
+                // active_blob_count is updated by put/delete blob, not change it here.
 
-            // considering the complexity of gc crash recovery for tombstone_blob_count, we get it directly from index
-            // table , which is the most accurate.
+                // considering the complexity of gc crash recovery for tombstone_blob_count, we get it directly from
+                // index table , which is the most accurate.
 
-            // TODO::do we need this as durable entity? remove it and get all the from pg index in real time.
-            de.tombstone_blob_count = get_pg_tombstone_blob_count(pg_id);
+                // TODO::do we need this as durable entity? remove it and get all the from pg index in real time.
+                de.tombstone_blob_count = get_pg_tombstone_blob_count(pg_id);
 
-            auto move_to_v_chunk = chunk_selector()->get_extend_vchunk(move_to_chunk);
+                auto move_to_v_chunk = chunk_selector()->get_extend_vchunk(move_to_chunk);
 
-            auto total_occupied_blk_count_by_move_from_chunk = move_from_v_chunk->get_used_blks();
-            auto total_occupied_blk_count_by_move_to_chunk = move_to_v_chunk->get_used_blks();
+                auto total_occupied_blk_count_by_move_from_chunk = move_from_v_chunk->get_used_blks();
+                auto total_occupied_blk_count_by_move_to_chunk = move_to_v_chunk->get_used_blks();
 
-            // TODO::in recovery case , this might be updated again , fix me later.
-            const auto reclaimed_blk_count =
-                total_occupied_blk_count_by_move_from_chunk - total_occupied_blk_count_by_move_to_chunk;
+                // TODO::in recovery case , this might be updated again , fix me later.
+                const auto reclaimed_blk_count =
+                    total_occupied_blk_count_by_move_from_chunk - total_occupied_blk_count_by_move_to_chunk;
 
-            de.total_occupied_blk_count -= reclaimed_blk_count;
-            de.total_reclaimed_blk_count += reclaimed_blk_count;
+                de.total_occupied_blk_count -= reclaimed_blk_count;
+                de.total_reclaimed_blk_count += reclaimed_blk_count;
 
-            LOGD("gc task_id={}, move_from_chunk={}, total_occupied_blk_count_by_move_from_chunk={}, move_to_chunk={}, "
-                 "total_occupied_blk_count_by_move_to_chunk={}, total_occupied_blk_count={}",
-                 task_id, move_from_chunk, total_occupied_blk_count_by_move_from_chunk, move_to_chunk,
-                 total_occupied_blk_count_by_move_to_chunk, de.total_occupied_blk_count.load());
-        });
+                LOGD("gc task_id={}, move_from_chunk={}, total_occupied_blk_count_by_move_from_chunk={}, "
+                     "move_to_chunk={}, "
+                     "total_occupied_blk_count_by_move_to_chunk={}, total_occupied_blk_count={}",
+                     task_id, move_from_chunk, total_occupied_blk_count_by_move_from_chunk, move_to_chunk,
+                     total_occupied_blk_count_by_move_to_chunk, de.total_occupied_blk_count.load());
+            });
 
         hs_pg->pg_sb_->total_occupied_blk_count =
             hs_pg->durable_entities().total_occupied_blk_count.load(std::memory_order_relaxed);
diff --git a/src/lib/homestore_backend/hs_shard_manager.cpp b/src/lib/homestore_backend/hs_shard_manager.cpp
index 8c949cb3e..387515b89 100644
--- a/src/lib/homestore_backend/hs_shard_manager.cpp
+++ b/src/lib/homestore_backend/hs_shard_manager.cpp
@@ -63,15 +63,15 @@ uint64_t ShardManager::max_shard_size() { return Gi; }
 
 uint64_t ShardManager::max_shard_num_in_pg() { return ((uint64_t)0x01) << shard_width; }
 
-shard_id_t HSHomeObject::generate_new_shard_id(pg_id_t pgid) {
+shard_id_t HSHomeObject::generate_new_shard_id(pg_id_t pg_id) {
     std::scoped_lock lock_guard(_pg_lock);
-    auto hs_pg = const_cast< HS_PG* >(_get_hs_pg_unlocked(pgid));
+    auto hs_pg = const_cast< HS_PG* >(_get_hs_pg_unlocked(pg_id));
     RELEASE_ASSERT(hs_pg, "Missing pg info");
 
     auto new_sequence_num = ++hs_pg->shard_sequence_num_;
     RELEASE_ASSERT(new_sequence_num < ShardManager::max_shard_num_in_pg(),
                    "new shard id must be less than ShardManager::max_shard_num_in_pg()");
-    return make_new_shard_id(pgid, new_sequence_num);
+    return make_new_shard_id(pg_id, new_sequence_num);
 }
 
 uint64_t HSHomeObject::get_sequence_num_from_shard_id(uint64_t shard_id) {
@@ -737,6 +737,26 @@ void HSHomeObject::add_new_shard_to_map(std::unique_ptr< HS_Shard > shard) {
     if (sequence_num > hs_pg->shard_sequence_num_) { hs_pg->shard_sequence_num_ = sequence_num; }
 }
 
+void HSHomeObject::delete_shard_from_map(shard_id_t shard_id) {
+    std::scoped_lock lock_guard(_pg_lock, _shard_lock);
+    auto shard_iter = _shard_map.find(shard_id);
+    RELEASE_ASSERT(shard_iter != _shard_map.end(),
+                   "try to delete shardID=0x{:x}, pg={}, shard=0x{:x}, but shard does not exist", shard_id,
+                   (shard_id >> homeobject::shard_width), (shard_id & homeobject::shard_mask));
+    auto hs_shard = d_cast< HS_Shard* >((*shard_iter->second).get());
+    const auto pg_id = hs_shard->info.placement_group;
+
+    auto hs_pg = const_cast< HS_PG* >(_get_hs_pg_unlocked(pg_id));
+    RELEASE_ASSERT(hs_pg, "Missing pg info, pg={}", pg_id);
+    auto& shards = hs_pg->shards_;
+    shards.remove_if([shard_id](auto& shard_it) { return (shard_it->info).id == shard_id; });
+    _shard_map.erase(shard_id);
+
+    auto p_chunk_id = hs_shard->p_chunk_id();
+    chunk_to_shards_map_[p_chunk_id].erase(shard_id);
+    // TODO:: delete shard meta blk
+}
+
 void HSHomeObject::update_shard_in_map(const ShardInfo& shard_info) {
     std::scoped_lock lock_guard(_shard_lock);
     auto shard_iter = _shard_map.find(shard_info.id);
diff --git a/src/lib/homestore_backend/replication_state_machine.cpp b/src/lib/homestore_backend/replication_state_machine.cpp
index 6ea8a1c07..5d022213b 100644
--- a/src/lib/homestore_backend/replication_state_machine.cpp
+++ b/src/lib/homestore_backend/replication_state_machine.cpp
@@ -287,9 +287,13 @@ void ReplicationStateMachine::on_destroy(const homestore::group_id_t& group_id)
         LOGW("do not have pg mapped by group_id={}", boost::uuids::to_string(group_id));
         return;
     }
-    home_object_->pg_destroy(PG_ID.value());
-    LOGI("replica destroyed, cleared pg={} resources with group_id={}", PG_ID.value(),
-         boost::uuids::to_string(group_id));
+
+    const auto pg_id = PG_ID.value();
+    home_object_->pg_destroy(pg_id);
+    LOGI("replica destroyed, cleared pg={} resources with group_id={}", pg_id, boost::uuids::to_string(group_id));
+    // there is a case that after pg is destroyed above and crash happends before scrub_mgr#remove_pg is called, there
+    // will be a stale pg_scrub_superblk. we will handle this in metablk replay.
+    home_object_->scrub_manager()->remove_pg(pg_id);
 }
 
 void ReplicationStateMachine::on_remove_member(const homestore::replica_id_t& member, trace_id_t tid) {
@@ -1052,4 +1056,31 @@ void ReplicationStateMachine::on_log_replay_done(const homestore::group_id_t& gr
     home_object_->refresh_pg_statistics(pg_id);
 }
 
+void ReplicationStateMachine::on_become_leader(const homestore::group_id_t& group_id) {
+    auto pg_id_opt = home_object_->get_pg_id_with_group_id(group_id);
+    if (!pg_id_opt.has_value()) {
+        LOGE("become leader but can not find any pg for group={}!", group_id);
+        return;
+    }
+    const auto pg_id = pg_id_opt.value();
+    RELEASE_ASSERT(home_object_->pg_exists(pg_id), "pg={} should exist, but not! fatal error!", pg_id);
+    // TODO:: add whatever acitons needed to be take.
+}
+
+void ReplicationStateMachine::on_become_follower(const homestore::group_id_t& group_id) {
+    auto pg_id_opt = home_object_->get_pg_id_with_group_id(group_id);
+    if (!pg_id_opt.has_value()) {
+        LOGE("become follower but can not find any pg for group={}!", group_id);
+        return;
+    }
+    const auto pg_id = pg_id_opt.value();
+    RELEASE_ASSERT(home_object_->pg_exists(pg_id), "pg={} should exist, but not! fatal error!", pg_id);
+
+    LOGI("become follower of group {}, cancel scrub task for pg={}", group_id, pg_id);
+    // TODO:: add whatever acitons needed to be take.
+
+    // cancel scrub task if I am not leader again.
+    home_object_->scrub_manager()->cancel_scrub_task(pg_id);
+}
+
 } // namespace homeobject
diff --git a/src/lib/homestore_backend/replication_state_machine.hpp b/src/lib/homestore_backend/replication_state_machine.hpp
index 77906c8ae..98849507e 100644
--- a/src/lib/homestore_backend/replication_state_machine.hpp
+++ b/src/lib/homestore_backend/replication_state_machine.hpp
@@ -240,6 +240,14 @@ class ReplicationStateMachine : public homestore::ReplDevListener {
     ///
     void on_log_replay_done(const homestore::group_id_t& group_id) override;
 
+    /// @brief  this is called when this node becomes leader for the group
+    /// @param group_id - the group , where all the logs are replayed but not join raft group
+    virtual void on_become_leader(const homestore::group_id_t& group_id) override;
+
+    /// @brief  this is called when this node becomes follower for the group
+    /// @param group_id - the group , where all the logs are replayed but not join raft group
+    virtual void on_become_follower(const homestore::group_id_t& group_id) override;
+
 private:
     HSHomeObject* home_object_{nullptr};
 
diff --git a/src/lib/homestore_backend/scrub_manager.cpp b/src/lib/homestore_backend/scrub_manager.cpp
new file mode 100644
index 000000000..5b9452fa7
--- /dev/null
+++ b/src/lib/homestore_backend/scrub_manager.cpp
@@ -0,0 +1,1878 @@
+#include "hs_homeobject.hpp"
+#include <homestore/btree/btree_req.hpp>
+#include <homestore/btree/btree_kv.hpp>
+#include <sstream>
+#include <algorithm>
+
+namespace homeobject {
+
+SISL_LOGGING_DECL(scrubmgr)
+#define NO_TASK_ID 0
+#define HDD_IOPS 200
+
+#define SCRUBLOG(level, pg_id, task_id, msg, ...)                                                                      \
+    LOG##level##MOD(scrubmgr, "[pg_id={}, task_id={}] " msg, pg_id, task_id, ##__VA_ARGS__)
+
+#define SCRUBLOGD(pg_id, task_id, msg, ...) SCRUBLOG(DEBUG, pg_id, task_id, msg, ##__VA_ARGS__)
+#define SCRUBLOGI(pg_id, task_id, msg, ...) SCRUBLOG(INFO, pg_id, task_id, msg, ##__VA_ARGS__)
+#define SCRUBLOGW(pg_id, task_id, msg, ...) SCRUBLOG(WARN, pg_id, task_id, msg, ##__VA_ARGS__)
+#define SCRUBLOGE(pg_id, task_id, msg, ...) SCRUBLOG(ERROR, pg_id, task_id, msg, ##__VA_ARGS__)
+#define SCRUBLOGC(pg_id, task_id, msg, ...) SCRUBLOG(CRITICAL, pg_id, task_id, msg, ##__VA_ARGS__)
+
+ScrubManager::ScrubManager(HSHomeObject* homeobject) : m_hs_home_object{homeobject} {
+    // Register meta_service handlers to recover pg scrub superblocks
+    std::vector< homestore::superblk< pg_scrub_superblk > > stale_pg_scrub_sbs;
+    homestore::meta_service().register_handler(
+        pg_scrub_meta_name,
+        [this, &stale_pg_scrub_sbs](homestore::meta_blk* mblk, sisl::byte_view buf, size_t size) {
+            on_pg_scrub_meta_blk_found(std::move(buf), voidptr_cast(mblk), stale_pg_scrub_sbs);
+        },
+        nullptr, true);
+    homestore::meta_service().read_sub_sb(pg_scrub_meta_name);
+
+    // remove stale pg scrub superblocks
+    for (auto& sb : stale_pg_scrub_sbs)
+        sb.destroy();
+}
+
+ScrubManager::~ScrubManager() { stop(); }
+
+void ScrubManager::scan_pg_for_scrub() {
+    for (auto const& [pg_id, _] : m_pg_scrub_sb_map) {
+        if (is_eligible_for_deep_scrub(pg_id)) {
+            LOGINFOMOD(scrubmgr, "pg={} is eligible for deep scrub", pg_id);
+            submit_scrub_task(pg_id, true)
+                .via(&folly::InlineExecutor::instance())
+                .thenValue([this, pg_id](std::shared_ptr< ShallowScrubReport > report) {
+                    if (!report) {
+                        LOGERRORMOD(scrubmgr, "deep scrub failed for pg={}", pg_id);
+                        return;
+                    }
+                    LOGINFOMOD(scrubmgr, "deep scrub is completed for pg={}", pg_id);
+                    auto deep_report = std::dynamic_pointer_cast< DeepScrubReport >(report);
+                    if (!deep_report) {
+                        LOGERRORMOD(scrubmgr, "report for deep scrub can not be casted to DeepScrubReport for pg={}",
+                                    pg_id);
+                        return;
+                    }
+                    handle_deep_pg_scrub_report(std::move(deep_report));
+                });
+        } else if (is_eligible_for_shallow_scrub(pg_id)) {
+            LOGINFOMOD(scrubmgr, "pg={} is eligible for shallow scrub", pg_id);
+            submit_scrub_task(pg_id, false)
+                .via(&folly::InlineExecutor::instance())
+                .thenValue([this, pg_id](std::shared_ptr< ShallowScrubReport > report) {
+                    if (!report) {
+                        LOGERRORMOD(scrubmgr, "deep scrub failed for pg={}", pg_id);
+                        return;
+                    }
+                    LOGINFOMOD(scrubmgr, "shallow scrub is completed for pg={}", pg_id);
+                    handle_shallow_pg_scrub_report(std::move(report));
+                });
+        } else {
+            LOGINFOMOD(scrubmgr, "pg={} is not eligible for any scrubbing!", pg_id);
+        }
+    }
+}
+
+void ScrubManager::handle_shallow_pg_scrub_report(std::shared_ptr< ShallowScrubReport > report) {
+    if (!report) {
+        LOGERRORMOD(scrubmgr, "Shallow scrub report is null!");
+        return;
+    }
+
+    report->print();
+    // TODO:: add more logic, log event for notifcation.
+}
+
+void ScrubManager::handle_deep_pg_scrub_report(std::shared_ptr< DeepScrubReport > report) {
+    if (!report) {
+        LOGERRORMOD(scrubmgr, "Deep scrub report is null!");
+        return;
+    }
+
+    report->print();
+    // TODO:: add more logic, log event for notifcation.
+}
+
+bool ScrubManager::is_eligible_for_deep_scrub(const pg_id_t& pg_id) {
+    // TODO:: add the real eligibility check logic
+    return false;
+}
+
+bool ScrubManager::is_eligible_for_shallow_scrub(const pg_id_t& pg_id) {
+    // TODO:: add the real eligibility check logic
+    return false;
+}
+
+void ScrubManager::start() {
+    // TODO :: make thread count configurable, thread number is the most concurrent scrub tasks that can be handled
+    // concurrently. too many concurrent scrub tasks may bring too much pressure to the node.
+    const auto most_concurrent_scrub_task_num = 2;
+    m_scrub_executor = std::make_shared< folly::IOThreadPoolExecutor >(most_concurrent_scrub_task_num);
+    for (int i = 0; i < most_concurrent_scrub_task_num; ++i) {
+        m_scrub_executor->add([this]() {
+            while (true) {
+                // if no available scrub task, it will be blocked here.
+                auto pop_result = m_scrub_task_queue.pop();
+                if (pop_result.is_closed()) {
+                    LOGINFOMOD(scrubmgr, "scrub task queue is stopped, no need to handle scrub task anymore!");
+                    break;
+                }
+                RELEASE_ASSERT(pop_result.value.has_value() && pop_result.is_ok(),
+                               "pop from scrub task queue should not fail when it is not closed!");
+                auto task = pop_result.value.value();
+                // we handle pg scrub task in a single thread , so that we can control the concurrent scrub tasks by
+                // controlling the thread number of m_scrub_executor.
+                handle_pg_scrub_task(std::move(task));
+            }
+        });
+    }
+
+    const auto most_concurrent_scrub_req_num = 2;
+    // we don`t set priority for req as that of task, only control the concurrency to not bring too much pressuer to
+    // this node.
+    m_scrub_req_executor = std::make_shared< folly::IOThreadPoolExecutor >(most_concurrent_scrub_req_num);
+
+    iomanager.run_on_wait(iomgr::reactor_regex::random_worker, [&]() {
+        m_scrub_timer_fiber = iomanager.iofiber_self();
+        // TODO: make the interval configurable, for now set it to 60 seconds
+        m_scrub_timer_hdl = iomanager.schedule_thread_timer(60ull * 1000 * 1000 * 1000, true, nullptr /*cookie*/,
+                                                            [this](void*) { scan_pg_for_scrub(); });
+    });
+    LOGINFOMOD(scrubmgr, "scrub manager started!");
+}
+
+void ScrubManager::stop() {
+    // shutdown timer
+    if (m_scrub_timer_hdl == iomgr::null_timer_handle) {
+        LOGINFOMOD(scrubmgr, "scrub scheduler timer is not running, no need to stop it");
+        return;
+    }
+    RELEASE_ASSERT(m_scrub_timer_fiber,
+                   "m_scrub_timer_hdl is not null_timer_handle, but m_scrub_timer_fiber is null, fatal error!");
+    LOGINFOMOD(scrubmgr, "stop scrub scheduler timer");
+    iomanager.run_on_wait(m_scrub_timer_fiber, [&]() {
+        iomanager.cancel_timer(m_scrub_timer_hdl, true);
+        m_scrub_timer_hdl = iomgr::null_timer_handle;
+    });
+    m_scrub_timer_fiber = nullptr;
+
+    // cancel all the running scrub tasks and clear the scrub task queue.
+    // TODO:: add a stoopeed flag to avoid adding new scrub task if stopped.
+    m_scrub_task_queue.close();
+    for (auto& [_, pg_scrub_ctx] : m_pg_scrub_ctx_map) {
+        pg_scrub_ctx->cancel();
+    }
+
+    m_scrub_executor->stop();
+    m_scrub_executor.reset();
+    m_scrub_req_executor->stop();
+    m_scrub_req_executor.reset();
+    LOGINFOMOD(scrubmgr, "scrub manager stopped!");
+}
+
+void ScrubManager::add_scrub_req(std::shared_ptr< base_scrub_req > req) {
+    m_scrub_req_executor->add([this, req = std::move(req)]() { handle_scrub_req(req); });
+}
+
+bool ScrubManager::add_scrub_map(const pg_id_t pg_id, std::shared_ptr< BaseScrubMap > bsm) {
+    auto pg_scrub_ctx_it = m_pg_scrub_ctx_map.find(pg_id);
+    if (pg_scrub_ctx_it == m_pg_scrub_ctx_map.end()) {
+        LOGERRORMOD(scrubmgr, "can not find scrub context for pg_id={}, fail to add scrub map!", pg_id);
+        return false;
+    }
+
+    auto& pg_scrub_ctx = pg_scrub_ctx_it->second;
+    return pg_scrub_ctx->add_scrub_map(std::move(bsm));
+}
+
+void ScrubManager::handle_scrub_req(std::shared_ptr< base_scrub_req > req) {
+    if (!req) {
+        LOGERRORMOD(scrubmgr, "scrub req is null, can not handle it!");
+        return;
+    }
+
+    const auto& pg_id = req->pg_id;
+    const auto& task_id = req->task_id;
+    const auto hs_pg = m_hs_home_object->get_hs_pg(pg_id);
+    if (!hs_pg) {
+        SCRUBLOGD(pg_id, task_id, "can not find hs_pg, fail to handle scrub req!");
+        return;
+    }
+
+    const auto& pg_repl_dev = hs_pg->repl_dev_;
+    if (!pg_repl_dev) {
+        SCRUBLOGD(pg_id, task_id, "repl_dev is null, fail to handle scrub req!");
+        return;
+    }
+
+    if (pg_repl_dev->is_leader()) {
+        SCRUBLOGD(pg_id, task_id, "leader of pg, no need to handle stale scrub req!");
+        return;
+    }
+
+    std::shared_ptr< BaseScrubMap > scrub_map;
+    auto& remote_peer_id = req->issuer_peer_id;
+
+    // 1 do scrub
+    const auto scrub_type = req->get_scrub_type();
+    switch (scrub_type) {
+    case SCRUB_TYPE::PG_META: {
+        SCRUBLOGD(pg_id, task_id, "handling pg meta scrub req, ");
+        scrub_map = scrub_pg_meta(req);
+        break;
+    }
+    case SCRUB_TYPE::DEEP_BLOB:
+    case SCRUB_TYPE::SHALLOW_BLOB: {
+        auto blob_req = std::dynamic_pointer_cast< blob_scrub_req >(req);
+        RELEASE_ASSERT(blob_req, "Failed to cast to blob_scrub_req");
+        SCRUBLOGD(pg_id, task_id, "handling blob scrub req, is_deep_scrub:{}", req->is_deep_scrub());
+        scrub_map = local_scrub_blob(blob_req);
+        break;
+    }
+    case SCRUB_TYPE::DEEP_SHARD:
+    case SCRUB_TYPE::SHALLOW_SHARD: {
+        auto shard_req = std::dynamic_pointer_cast< shard_scrub_req >(req);
+        RELEASE_ASSERT(shard_req, "Failed to cast to shard_scrub_req");
+        SCRUBLOGD(pg_id, task_id, "handling shard scrub req, is_deep_scrub:{}", req->is_deep_scrub());
+        scrub_map = local_scrub_shard(shard_req);
+        break;
+    }
+    default:
+        RELEASE_ASSERT(false, "unknown scrub req type: {}!", scrub_type);
+    }
+
+    if (!scrub_map) {
+        SCRUBLOGD(pg_id, task_id, "fail to handle scrub req, drop it!");
+        return;
+    }
+
+    // 2 send scrub map back to leader
+    auto flatbuffer = scrub_map->build_flat_buffer();
+    sisl::io_blob_list_t blob_list;
+    blob_list.emplace_back(reinterpret_cast< const uint8_t* >(&scrub_type), sizeof(scrub_type), false);
+    blob_list.emplace_back(flatbuffer.data(), flatbuffer.size(), false);
+
+    // no need to retry, leader will handle retries
+    pg_repl_dev->data_request_unidirectional(remote_peer_id, HSHomeObject::PUSH_SCRUB_MAP, blob_list)
+        .via(&folly::InlineExecutor::instance())
+        .thenValue([pg_id, remote_peer_id, scrub_type, task_id](auto&& response) {
+            if (response.hasError()) {
+                SCRUBLOGD(pg_id, task_id, "failed to send scrub map to peer {}, scrub_type:{}, error={}",
+                          remote_peer_id, scrub_type, response.error());
+                return;
+            }
+
+            SCRUBLOGD(pg_id, task_id, "successfully sent scrub map to peer {}, scrub_type:{}", remote_peer_id,
+                      scrub_type);
+        });
+}
+
+bool ScrubManager::wait_for_scrub_lsn_commit(shared< homestore::ReplDev > repl_dev, int64_t scrub_lsn) {
+    if (!repl_dev) {
+        LOGERRORMOD(scrubmgr, "repl_dev is null, can not wait for scrub lsn commit!");
+        return false;
+    }
+
+    // TODO:: make this configurable
+    const auto wait_retry_times = 2;
+    for (auto i = 0; i < wait_retry_times; ++i) {
+        auto commit_lsn = repl_dev->get_last_commit_lsn();
+        if (commit_lsn >= scrub_lsn) {
+            LOGINFOMOD(scrubmgr, "commit lsn {} is greater than or equal to scrub lsn {}, wait successfully",
+                       commit_lsn, scrub_lsn);
+            return true;
+        }
+        LOGINFOMOD(scrubmgr,
+                   "commit lsn {} is less than scrub lsn {}, wait for 1 second before retrying, retry times {}/{}",
+                   commit_lsn, scrub_lsn, i + 1, wait_retry_times);
+        std::this_thread::sleep_for(std::chrono::seconds(1));
+    }
+
+    return false;
+}
+
+std::shared_ptr< ScrubManager::PGMetaScrubMap > ScrubManager::scrub_pg_meta(std::shared_ptr< base_scrub_req > req) {
+    const auto my_uuid = m_hs_home_object->our_uuid();
+    const auto pg_id = req->pg_id;
+    const auto task_id = req->task_id;
+    const auto req_id = req->req_id;
+    const auto scrub_lsn = req->scrub_lsn;
+    auto pg_meta_scrub_map =
+        std::make_shared< ScrubManager::PGMetaScrubMap >(pg_id, task_id, req_id, scrub_lsn, my_uuid);
+
+    SCRUBLOGD(pg_id, task_id, "req_id={}, do pg meta scrub", req_id);
+
+    // TODO:: add support to read the pg meta blk of a specific pg.
+    // read pg metablk and compare with in-memory state, return the real pg meta scrub map after comparison.
+
+    return pg_meta_scrub_map;
+}
+
+std::shared_ptr< ScrubManager::BaseScrubMap > ScrubManager::local_scrub_blob(std::shared_ptr< blob_scrub_req > req) {
+    const auto my_uuid = m_hs_home_object->our_uuid();
+    const auto task_id = req->task_id;
+    const auto req_id = req->req_id;
+    const auto scrub_lsn = req->scrub_lsn;
+    const auto& pg_id = req->pg_id;
+    const auto& start_blob_id = req->start;
+    const auto& end_blob_id = req->end;
+
+    SCRUBLOGD(pg_id, task_id, "req_id={}, scrub_blob: range [{}, {}], scrub_lsn={}", req_id, start_blob_id, end_blob_id,
+              scrub_lsn);
+
+    auto hs_pg = m_hs_home_object->get_hs_pg(pg_id);
+    if (!hs_pg) {
+        SCRUBLOGD(pg_id, task_id, "req_id={},can not find hs_pg, fail to do deep blob scrub!", req_id);
+        return nullptr;
+    }
+
+    if (!wait_for_scrub_lsn_commit(hs_pg->repl_dev_, scrub_lsn)) {
+        SCRUBLOGD(pg_id, task_id,
+                  "req_id={}, commit lsn is not advanced to scrub lsn {} after waiting for a while, fail to do deep "
+                  "blob scrub",
+                  req_id, scrub_lsn);
+        return nullptr;
+    }
+
+    // get all the scrub candidate blobs. we only get those blobs in this range and the sealed_lsn of the shard is after
+    // the scrub_lsn.
+    const auto start = BlobRouteKey{BlobRoute{0, start_blob_id}};
+    const auto end = BlobRouteKey{BlobRoute{std::numeric_limits< uint64_t >::max(), end_blob_id}};
+
+    std::vector< std::pair< BlobRouteKey, BlobRouteValue > > scrub_candidate_blobs;
+    auto& pg_index_table = hs_pg->index_table_;
+    homestore::BtreeQueryRequest< BlobRouteKey > query_req{
+        homestore::BtreeKeyRange< BlobRouteKey >{start, true /* inclusive */, end, true /* inclusive */},
+        homestore::BtreeQueryType::SWEEP_NON_INTRUSIVE_PAGINATION_QUERY, std::numeric_limits< uint32_t >::max(),
+        [scrub_lsn, start_blob_id, end_blob_id](homestore::BtreeKey const& key,
+                                                homestore::BtreeValue const& value) -> bool {
+            BlobRouteValue existing_value{value};
+            if (existing_value.pbas() == HSHomeObject::tombstone_pbas) { return false; }
+            const auto blob_route_key = BlobRouteKey{key};
+            if (blob_route_key.key().blob < start_blob_id || blob_route_key.key().blob > end_blob_id) { return false; }
+
+            // TODO:: after we have shard seal_lsn, check whether the shard of the blob is sealed after scrub_lsn. If
+            // yes, filter it out as well.
+
+            return true;
+        }};
+
+    auto const status = pg_index_table->query(query_req, scrub_candidate_blobs);
+    if (status != homestore::btree_status_t::success) {
+        SCRUBLOGD(pg_id, task_id, "req_id={}, Failed to query blobs in index table for status={}", req_id, status);
+        return nullptr;
+    }
+
+    const bool is_deep_scrub = req->is_deep_scrub();
+
+    if (!is_deep_scrub) {
+        auto shallow_srcub_map = std::make_shared< ScrubManager::ShallowBlobScrubMap >(
+            pg_id, task_id, req_id, scrub_lsn, my_uuid, start_blob_id, end_blob_id);
+
+        for (const auto& [k, _] : scrub_candidate_blobs) {
+            shallow_srcub_map->add_blob(k.key());
+        }
+
+        SCRUBLOGD(pg_id, task_id, "req_id={}, shallow blob scrub completed, found {} blobs in range [{},{})", req_id,
+                  shallow_srcub_map->blobs.size(), start, end);
+
+        return shallow_srcub_map;
+    }
+
+    // deep scrub: read and check blobs.
+    auto deep_scrub_map = std::make_shared< ScrubManager::DeepBlobScrubMap >(pg_id, task_id, req_id, scrub_lsn, my_uuid,
+                                                                             start_blob_id, end_blob_id);
+    auto& data_service = homestore::data_service();
+    const auto blk_size = data_service.get_blk_size();
+
+    // Sort scrub_candidate_blobs by PBA (physical block address) for sequential disk access
+    std::sort(scrub_candidate_blobs.begin(), scrub_candidate_blobs.end(), [](const auto& a, const auto& b) {
+        // Compare by PBA to_string() for ordering
+        const auto pba_a = a.second.pbas().to_single_blkid();
+        const auto pba_b = b.second.pbas().to_single_blkid();
+        return pba_a.blk_num() < pba_b.blk_num();
+    });
+
+    // to not bring to much io pressure, we deep scrub blob one by one.
+    // TODO: scrubbing blobs concurrently if neccessary.
+    for (const auto& [k, v] : scrub_candidate_blobs) {
+        auto pba = v.pbas();
+        auto total_size = pba.blk_count() * blk_size;
+        sisl::sg_list data_sgs;
+        data_sgs.size = total_size;
+        data_sgs.iovs.emplace_back(
+            iovec{.iov_base = iomanager.iobuf_alloc(blk_size, total_size), .iov_len = total_size});
+
+        data_service.async_read(pba, data_sgs, total_size)
+            .thenValue([this, &k, data_sgs = std::move(data_sgs), deep_scrub_map](auto&& err) {
+                auto blob = data_sgs.iovs[0].iov_base;
+
+                struct buffer_free_guard {
+                    uint8_t* buf;
+                    ~buffer_free_guard() { iomanager.iobuf_free(buf); }
+                } guard{reinterpret_cast< uint8_t* >(blob)};
+
+                if (err) {
+                    LOGERRORMOD(scrubmgr, "Failed to read blob for deep scrub, blob_route={}, error={}", k.key(),
+                                err.message());
+                    deep_scrub_map->add_blob_result(k.key(), ScrubResult::IO_ERROR);
+                    return;
+                }
+
+                const auto& shard_id = k.key().shard;
+                const auto& blob_id = k.key().blob;
+                const auto blob_verify_succeed = m_hs_home_object->verify_blob(blob, shard_id, blob_id, true);
+                if (!blob_verify_succeed) {
+                    LOGERRORMOD(scrubmgr, "Blob verification failed for deep scrub, blob_route={}", k.key());
+                    deep_scrub_map->add_blob_result(k.key(), ScrubResult::MISMATCH);
+                    return;
+                }
+
+                BlobHashArray blob_hash{};
+                HSHomeObject::BlobHeader const* header = r_cast< HSHomeObject::BlobHeader const* >(blob);
+                std::memcpy(blob_hash.data(), header->hash, blob_hash.size());
+                deep_scrub_map->add_blob_result(k.key(), blob_hash);
+            })
+            // we do deep blob sequentially, so that we can control the io pressure brought by deep scrub.
+            .get();
+    }
+
+    SCRUBLOGD(pg_id, task_id, "req_id={}, deep blob scrub completed, found {} blobs in range [{},{})", req_id,
+              deep_scrub_map->blobs.size(), start, end);
+
+    return deep_scrub_map;
+}
+
+std::shared_ptr< ScrubManager::ShallowShardScrubMap >
+ScrubManager::local_scrub_shard(std::shared_ptr< shard_scrub_req > req) {
+    const auto my_uuid = m_hs_home_object->our_uuid();
+    const auto task_id = req->task_id;
+    const auto req_id = req->req_id;
+    const auto scrub_lsn = req->scrub_lsn;
+    const auto& pg_id = req->pg_id;
+    const auto start = req->start;
+    const auto end = req->end;
+
+    auto hs_pg = m_hs_home_object->get_hs_pg(pg_id);
+    if (!hs_pg) {
+        SCRUBLOGD(pg_id, task_id, "can not find hs_pg, fail to do deep shard scrub!");
+        return nullptr;
+    }
+
+    if (!wait_for_scrub_lsn_commit(hs_pg->repl_dev_, scrub_lsn)) {
+        SCRUBLOGD(pg_id, task_id,
+                  "commit lsn is not advanced to scrub lsn {} after waiting for a while, fail to do local shard scrub ",
+                  scrub_lsn);
+        return nullptr;
+    }
+
+    std::shared_ptr< ScrubManager::ShallowShardScrubMap > shard_srcub_map;
+    const bool is_deep_scrub = req->is_deep_scrub();
+    if (is_deep_scrub) {
+        shard_srcub_map =
+            std::make_shared< ScrubManager::DeepShardScrubMap >(pg_id, task_id, req_id, scrub_lsn, my_uuid);
+    } else {
+        shard_srcub_map =
+            std::make_shared< ScrubManager::ShallowShardScrubMap >(pg_id, task_id, req_id, scrub_lsn, my_uuid);
+    }
+
+    // Iterate through all shards in the PG
+    for (const auto& shard_it : hs_pg->shards_) {
+        auto shard_id = (shard_it->info).id;
+        // remove pg_id, get the pure shard id.
+        auto pure_shard_id = (shard_id << pg_width) >> pg_width;
+        if (pure_shard_id < start || pure_shard_id > end) { continue; }
+
+        // TODO:: filter out those shards whose seal_lsn is after scrub_lsn, as they are not in the candidate shard
+        // list for scrub.
+        shard_srcub_map->add_shard(shard_id);
+
+        // TODO:: optimize the folloing logic, dynamic cast once.
+        if (is_deep_scrub) {
+            auto deep_shard_scrub_map = std::dynamic_pointer_cast< DeepShardScrubMap >(shard_srcub_map);
+            RELEASE_ASSERT(deep_shard_scrub_map,
+                           "shard_srcub_map should be DeepShardScrubMap when is_deep_scrub is true!");
+
+            // TODO: Read and verify shard metablk
+            // For now, we just mark it as NONE (no error found) since we can not read a specific shard metablk for now.
+            // it needs the support of homestore#metaservice. we should:
+            // 1. Read shard metablk from homestore
+            // 2. Compare with in-memory shard info
+            // 3. If mismatch or not found or error_io, add to problematic_shards with appropriate ScrubResult
+
+            // TODO:: if find any problematic shard meta blk.
+            //  deep_shard_scrub_map->add_problematic_shard(shard_id, ScrubResult::NONE);
+        }
+    }
+
+    SCRUBLOGD(pg_id, task_id, "shard scrub completed, checked {} shards in range [{},{})",
+              shard_srcub_map->shards.size(), start, end);
+
+    return shard_srcub_map;
+}
+
+folly::SemiFuture< std::shared_ptr< ScrubManager::ShallowScrubReport > >
+ScrubManager::submit_scrub_task(const pg_id_t& pg_id, const bool is_deep, const bool force,
+                                SCRUB_TRIGGER_TYPE trigger_type) {
+    LOGINFOMOD(scrubmgr, "submit a scrub task for pg={}, deep_scrub:{}", pg_id, is_deep);
+    auto it = m_pg_scrub_ctx_map.find(pg_id);
+    if (it != m_pg_scrub_ctx_map.end()) {
+        // TODO:: there is case that two thread try to submit scrub task for the same pg at the same time, we can
+        // optimize it by adding a lock for each pg or using atomic operation to make sure only one scrub task can be
+        // submitted for each pg, and other threads can get the existing scrub task if they want to submit another scrub
+        // task for the same pg.
+        LOGWARNMOD(scrubmgr, "a scrub task is already running for pg={}, no need to submit another one!", pg_id);
+        return folly::makeFuture(std::shared_ptr< ScrubManager::ShallowScrubReport >(nullptr));
+    }
+
+    const auto ps_scrub_super_blk_it = m_pg_scrub_sb_map.find(pg_id);
+    if (ps_scrub_super_blk_it == m_pg_scrub_sb_map.end()) {
+        LOGERRORMOD(scrubmgr, "can not find scrub superblk for pg={}, fail to submit scrub task!", pg_id);
+        return folly::makeFuture(std::shared_ptr< ScrubManager::ShallowScrubReport >(nullptr));
+    }
+
+    // Get the PG and check its state
+    const auto hs_pg = m_hs_home_object->get_hs_pg(pg_id);
+    if (!hs_pg) {
+        LOGERRORMOD(scrubmgr, "can not find hs_pg for pg={}, fail to submit scrub task!", pg_id);
+        return folly::makeFuture(std::shared_ptr< ScrubManager::ShallowScrubReport >(nullptr));
+    }
+
+    // Check if pg_state is HEALTHY (state must be 0)
+    if (!force) {
+        const auto current_state = hs_pg->pg_state_.get();
+        if (current_state != 0) {
+            LOGWARNMOD(scrubmgr, "pg={} is not in HEALTHY state (current_state={}), cannot submit scrub task!", pg_id,
+                       current_state);
+            return folly::makeFuture(std::shared_ptr< ScrubManager::ShallowScrubReport >(nullptr));
+        }
+
+        // Set SCRUBBING state
+        hs_pg->pg_state_.set_state(PGStateMask::SCRUBBING);
+        LOGINFOMOD(scrubmgr, "set SCRUBBING state for pg={}", pg_id);
+    }
+
+    // TODO::check the stopped flag to avoid submit new scrub task when scrub manager is stopped.
+
+    const auto& pg_scrub_sb = *(ps_scrub_super_blk_it->second);
+    const auto last_scrub_time =
+        is_deep ? pg_scrub_sb->last_deep_scrub_timestamp : pg_scrub_sb->last_shallow_scrub_timestamp;
+
+    auto [promise, future] = folly::makePromiseContract< std::shared_ptr< ShallowScrubReport > >();
+    ScrubManager::scrub_task task(last_scrub_time, pg_id, is_deep, trigger_type, std::move(promise));
+    m_scrub_task_queue.push(std::move(task));
+    return std::move(future);
+}
+
+void ScrubManager::cancel_scrub_task(const pg_id_t& pg_id) {
+    auto it = m_pg_scrub_ctx_map.find(pg_id);
+    if (it == m_pg_scrub_ctx_map.end()) {
+        LOGWARNMOD(scrubmgr, "no running scrub task for pg={}, no need to cancel!", pg_id);
+        return;
+    }
+    it->second->cancel();
+    LOGINFOMOD(scrubmgr, "cancel scrub task for pg={}", pg_id);
+}
+
+// Helper function to send scrub requests to all peers and handle retries
+bool ScrubManager::send_scrub_req_and_wait(pg_id_t pg_id, uint64_t task_id,
+                                           const std::unordered_set< peer_id_t >& all_member_peer_ids,
+                                           const peer_id_t& my_uuid, shared< homestore::ReplDev > pg_repl_dev,
+                                           const sisl::io_blob_list_t& req_blob_list,
+                                           std::shared_ptr< PGScrubContext > scrub_ctx, uint32_t max_retries,
+                                           std::chrono::seconds timeout, const std::string& scrub_type_name) {
+    // Lambda to send requests to a list of peers
+    auto send_requests_to_remote_peers = [&](const auto& peer_list, bool is_retry) {
+        for (const auto& peer_id : peer_list) {
+            if (peer_id == my_uuid) continue;
+            pg_repl_dev->data_request_unidirectional(peer_id, HSHomeObject::PUSH_SCRUB_REQ, req_blob_list)
+                .via(&folly::InlineExecutor::instance())
+                .thenValue([pg_id, peer_id, task_id, scrub_type_name, is_retry](auto&& response) {
+                    if (response.hasError()) {
+                        SCRUBLOGE(pg_id, task_id, "{} to send {} scrub request to peer {}",
+                                  is_retry ? "retry failed" : "failed", scrub_type_name, peer_id);
+                    }
+                });
+        }
+    };
+
+    // Send initial requests to all peers
+    send_requests_to_remote_peers(all_member_peer_ids, false);
+
+    // Wait for all responses and retry if needed
+    if (!scrub_ctx->wait_for_all_req_sms(timeout)) {
+        for (uint32_t retry = 0; retry < max_retries; ++retry) {
+            auto peers_to_retry = scrub_ctx->get_peers_to_retry();
+            if (peers_to_retry.empty()) break;
+
+            SCRUBLOGD(pg_id, task_id, "Retrying {} scrub for {} peers", scrub_type_name, peers_to_retry.size());
+            send_requests_to_remote_peers(peers_to_retry, true);
+
+            if (scrub_ctx->wait_for_all_req_sms(timeout)) break;
+        }
+    }
+
+    // Check if cancelled or incomplete
+    if (scrub_ctx->is_cancelled() || scrub_ctx->peer_sm_map_.size() != scrub_ctx->member_peer_ids_.size()) {
+        SCRUBLOGD(pg_id, task_id, "scrub task is cancelled or incomplete when scrubbing {}!", scrub_type_name);
+        return false;
+    }
+    return true;
+}
+
+void ScrubManager::handle_pg_scrub_task(scrub_task task) {
+    // we handle deep and shallow scrub task in the same fuction to reduce code duplication.
+    // TODO:: separate them if the logic is very different in the future.
+
+    const auto& pg_id = task.pg_id;
+    const auto& task_id = task.task_id;
+    const auto is_deep_scrub = task.is_deep_scrub;
+    SCRUBLOGD(pg_id, task_id,
+              "Starting handling {} scrub task, last_scrub_time={} =====", is_deep_scrub ? "deep" : "shallow",
+              task.last_scrub_time);
+
+    std::shared_ptr< ShallowScrubReport > pg_scrub_report =
+        is_deep_scrub ? std::make_shared< DeepScrubReport >(pg_id) : std::make_shared< ShallowScrubReport >(pg_id);
+
+    struct scrub_task_guard {
+        HSHomeObject* home_obj;
+        folly::ConcurrentHashMap< pg_id_t, std::shared_ptr< PGScrubContext > >& pg_scrub_ctx_map;
+        scrub_task& task;
+        std::shared_ptr< ShallowScrubReport >& scrub_report;
+        const pg_id_t& pg_id;
+
+        ~scrub_task_guard() {
+            pg_scrub_ctx_map.erase(pg_id);
+            task.scrub_report_promise->setValue(scrub_report);
+
+            // Clear SCRUBBING state from pg_state
+            auto hs_pg = home_obj->get_hs_pg(pg_id);
+            if (hs_pg) {
+                hs_pg->pg_state_.clear_state(PGStateMask::SCRUBBING);
+                LOGINFOMOD(scrubmgr, "cleared SCRUBBING state for pg={}", pg_id);
+            }
+        }
+    } guard{m_hs_home_object, m_pg_scrub_ctx_map, task, pg_scrub_report, pg_id};
+
+    const auto hs_pg = m_hs_home_object->get_hs_pg(pg_id);
+    if (!hs_pg) {
+        SCRUBLOGE(pg_id, task_id, "can not find hs_pg for this pg, fail this scrub task!");
+        return;
+    }
+
+    const auto& members = (hs_pg->pg_info_).members;
+    std::unordered_set< peer_id_t > all_member_peer_ids;
+    for (const auto& member : members) {
+        all_member_peer_ids.insert(member.id);
+    }
+
+    const auto& my_uuid = m_hs_home_object->our_uuid();
+    // TODO: the node is removed from the raft group? handle this case later
+    RELEASE_ASSERT(all_member_peer_ids.find(my_uuid) != all_member_peer_ids.end(),
+                   "my uuid={} is not in the member list of this pg, something is wrong!", my_uuid);
+
+    auto [ctx_it, happened] =
+        m_pg_scrub_ctx_map.try_emplace(pg_id, std::make_shared< PGScrubContext >(task_id, all_member_peer_ids));
+    if (!happened) {
+        SCRUBLOGE(pg_id, task_id, "a scrub task is already running for this pg, fail this {} scrub task!",
+                  is_deep_scrub ? "deep" : "shallow");
+        return;
+    }
+
+    auto& scrub_ctx = ctx_it->second;
+    const auto& pg_repl_dev = hs_pg->repl_dev_;
+    auto scrub_lsn = pg_repl_dev->get_last_commit_lsn();
+
+    // Scrub timeout configuration (based on HDD random read performance， iops)
+    // Worst case scenario: 2GB data in a chunk, 4K (min blob size) random read, 7200 RPM HDD ~200 IOPS
+    // Total read operations: 2GB / 4KB = 524,288 reads
+    // Estimated time: 524,288 / 200 = 5,243 seconds ≈ 44minutes
+    // so scrub the whole chunk at a time is not acceptable. we need to scrub blobs range by range , every range should
+    // have a acceptable timeout.
+
+    // TODO::make the following parameters configurable and find the optimal value based on real world scrub performance
+    // test. for hdd, iops matters more than throughput in scrubbing case.
+    constexpr uint32_t MAX_RETRIES = 5; // Maximum retry attempts
+    constexpr auto SM_REQUEST_TIMEOUT = std::chrono::seconds(10);
+
+    // Step 1: Scrub PG Meta (only for deep scrub)
+    if (is_deep_scrub) {
+        SCRUBLOGD(pg_id, task_id, "Starting PG meta scrub");
+        auto pg_meta_req = std::make_shared< base_scrub_req >(task_id, scrub_ctx->req_id.fetch_add(1), scrub_lsn,
+                                                              my_uuid, pg_id, true);
+        // TODO:: add a lock here to protect add_scrub_map when changing current_req.
+        scrub_ctx->current_req = pg_meta_req;
+        // Send requests to all peers
+        auto flatbuffer = pg_meta_req->build_flat_buffer();
+        sisl::io_blob_list_t req_blob_list;
+        const auto scrub_type = SCRUB_TYPE::PG_META;
+        req_blob_list.emplace_back(reinterpret_cast< const uint8_t* >(&scrub_type), sizeof(scrub_type), false);
+        req_blob_list.emplace_back(flatbuffer.data(), flatbuffer.size(), false);
+
+        // Scrub locally async (runs in parallel with remote requests)
+        m_scrub_req_executor->add([this, pg_meta_req, scrub_ctx, pg_id, task_id]() {
+            auto pg_meta_map = scrub_pg_meta(pg_meta_req);
+            if (!scrub_ctx->add_scrub_map(pg_meta_map)) {
+                SCRUBLOGE(pg_id, task_id, "failed to add local PG meta scrub map to context!");
+            } else {
+                SCRUBLOGD(pg_id, task_id, "Local PG meta scrub added");
+            }
+        });
+
+        // Send requests to all peers and wait for responses
+        if (!send_scrub_req_and_wait(pg_id, task_id, all_member_peer_ids, my_uuid, pg_repl_dev, req_blob_list,
+                                     scrub_ctx, MAX_RETRIES, SM_REQUEST_TIMEOUT, "PG meta")) {
+            return;
+        }
+
+        // Merge PG meta scrub results
+        pg_scrub_report->merge(scrub_ctx->peer_sm_map_);
+        SCRUBLOGD(pg_id, task_id, "PG meta scrub completed");
+    }
+
+    // Step 2: Scrub Shard Range
+    SCRUBLOGD(pg_id, task_id, "Starting shard range {} scrub", is_deep_scrub ? "deep" : "shallow");
+    {
+        // we can not scrub all shards based on the shard sealed_lsn, especially after we have shard seal_lsn. since
+        // leader might lose some shard, so if we select the shard range seen by leader itself, we might miss those lost
+        // shards which exist on the follower , but not on leader. so for now, we select shard range based on the
+        // current shard_sequence_num_ at leader, which is the last shard_id in of the pg.
+
+        // we assume shard_id will not overflow uint64_t;
+        const auto last_shard_id = hs_pg->shard_sequence_num_;
+
+        // the key point here is that until we commit to scrub_lsn , we should at least see last_shard_id.
+        scrub_lsn = pg_repl_dev->get_last_commit_lsn();
+        SCRUBLOGD(pg_id, task_id, "Shard range: 0 to {}, scrub_lsn={}", last_shard_id, scrub_lsn);
+
+        // TODO:: make it configurable。
+        // a shard_id_t is uint64(8B). if we want the max size of a shard scrub map is 16MB, then the num of
+        // shard_it in a shard scrub map should be 16MB/8B=2M(2097152)
+        const auto shard_scrub_range_size = 2097152;
+
+        // Scrub shard range
+        uint64_t shard_start = 0;
+        uint64_t shard_end = shard_scrub_range_size;
+        uint64_t shard_range_count = 0;
+        for (; shard_start <= last_shard_id;
+             shard_start = shard_end + 1, shard_end = std::min(shard_end + shard_scrub_range_size, last_shard_id)) {
+            ++shard_range_count;
+            SCRUBLOGD(pg_id, task_id, "Scrubbing shard range {}: [{}, {}]", shard_range_count, shard_start, shard_end);
+
+            auto shard_req = std::make_shared< shard_scrub_req >(task_id, scrub_ctx->req_id.fetch_add(1), scrub_lsn,
+                                                                 my_uuid, pg_id, shard_start, shard_end, is_deep_scrub);
+            scrub_ctx->reset_for_new_req();
+            scrub_ctx->current_req = shard_req;
+
+            // scrub locally async (runs in parallel with remote requests)
+            m_scrub_req_executor->add([this, shard_req, scrub_ctx, pg_id, task_id, is_deep_scrub]() {
+                auto scrub_map = local_scrub_shard(shard_req);
+                if (!scrub_ctx->add_scrub_map(scrub_map)) {
+                    SCRUBLOGE(pg_id, task_id, "failed to add local {} shard scrub map to context!",
+                              is_deep_scrub ? "deep" : "shallow");
+                } else {
+                    SCRUBLOGD(pg_id, task_id, "local {} shard scrub map added!", is_deep_scrub ? "deep" : "shallow");
+                }
+            });
+
+            // request remote peers to scrub this shard range and wait for responses
+            auto flatbuffer = shard_req->build_flat_buffer();
+            sisl::io_blob_list_t req_blob_list;
+            const auto scrub_type = is_deep_scrub ? SCRUB_TYPE::DEEP_SHARD : SCRUB_TYPE::SHALLOW_SHARD;
+            req_blob_list.emplace_back(reinterpret_cast< const uint8_t* >(&scrub_type), sizeof(scrub_type), false);
+            req_blob_list.emplace_back(flatbuffer.data(), flatbuffer.size(), false);
+
+            if (!send_scrub_req_and_wait(pg_id, task_id, all_member_peer_ids, my_uuid, pg_repl_dev, req_blob_list,
+                                         scrub_ctx, MAX_RETRIES, SM_REQUEST_TIMEOUT, "shard")) {
+                SCRUBLOGE(pg_id, task_id, "shard scrub failed or was cancelled");
+                return;
+            }
+
+            SCRUBLOGD(pg_id, task_id, "Merging shard scrub results for range [{}, {}]", shard_start, shard_end);
+            pg_scrub_report->merge(scrub_ctx->peer_sm_map_);
+        }
+        SCRUBLOGD(pg_id, task_id, "shard scrub completed, total ranges scrubbed: {}", shard_range_count);
+    }
+
+    // Step 3: Scrub Blob Range
+    SCRUBLOGD(pg_id, task_id, "Starting blob range {} scrub", is_deep_scrub ? "deep" : "shallow");
+    {
+        // we assume shard_id will not overflow uint64_t;
+        const auto last_blob_id = hs_pg->get_last_blob_id();
+
+        // just like shard, the key point here is that until we commit to scrub_lsn , we can see last_blob_id.
+        scrub_lsn = pg_repl_dev->get_last_commit_lsn();
+        SCRUBLOGD(pg_id, task_id, "Blob range: 0 to {}, scrub_lsn={}", last_blob_id, scrub_lsn);
+
+        // For deep scrub: since we have a SM_REQUEST_TIMEOUT as scrub map request timeout. assuming the iops of a hdd
+        // is 200, and we want at most half of the time to be spent on io, so we have this blob range.
+        // For shallow scrub: we will not schedule io to disk, so we set blob scrub range the same as that of shard.
+        const auto blob_scrub_range_size = is_deep_scrub ? (HDD_IOPS * (SM_REQUEST_TIMEOUT.count() / 2)) : 2097152;
+
+        // Scrub blob range
+        uint64_t blob_start = 0;
+        uint64_t blob_end = blob_scrub_range_size;
+        uint64_t blob_range_count = 0;
+        for (; blob_start <= last_blob_id;
+             blob_start = blob_end + 1, blob_end = std::min(blob_end + blob_scrub_range_size, last_blob_id)) {
+            ++blob_range_count;
+            SCRUBLOGD(pg_id, task_id, "Scrubbing blob range {}: [{}, {}]", blob_range_count, blob_start, blob_end);
+
+            auto blob_req = std::make_shared< blob_scrub_req >(task_id, scrub_ctx->req_id.fetch_add(1), scrub_lsn,
+                                                               my_uuid, pg_id, blob_start, blob_end, is_deep_scrub);
+            scrub_ctx->reset_for_new_req();
+            scrub_ctx->current_req = blob_req;
+
+            // locally scrub this blob range async (runs in parallel with remote requests)
+            m_scrub_req_executor->add([this, blob_req, scrub_ctx, pg_id, task_id, is_deep_scrub]() {
+                auto scrub_map = local_scrub_blob(blob_req);
+                if (!scrub_ctx->add_scrub_map(scrub_map)) {
+                    SCRUBLOGE(pg_id, task_id, "failed to add local {} blob scrub map to context!",
+                              is_deep_scrub ? "deep" : "shallow");
+                } else {
+                    SCRUBLOGD(pg_id, task_id, "local {} blob scrub map added!", is_deep_scrub ? "deep" : "shallow");
+                }
+            });
+
+            // request remote peers to scrub this blob range and wait for responses
+            auto flatbuffer = blob_req->build_flat_buffer();
+            sisl::io_blob_list_t req_blob_list;
+            const auto scrub_type = is_deep_scrub ? SCRUB_TYPE::DEEP_BLOB : SCRUB_TYPE::SHALLOW_BLOB;
+            req_blob_list.emplace_back(reinterpret_cast< const uint8_t* >(&scrub_type), sizeof(scrub_type), false);
+            req_blob_list.emplace_back(flatbuffer.data(), flatbuffer.size(), false);
+
+            if (!send_scrub_req_and_wait(pg_id, task_id, all_member_peer_ids, my_uuid, pg_repl_dev, req_blob_list,
+                                         scrub_ctx, MAX_RETRIES, SM_REQUEST_TIMEOUT, "blob")) {
+                SCRUBLOGE(pg_id, task_id, "blob scrub failed or was cancelled");
+                return;
+            }
+
+            SCRUBLOGD(pg_id, task_id, "Merging blob scrub results for range [{}, {}]", blob_start, blob_end);
+            pg_scrub_report->merge(scrub_ctx->peer_sm_map_);
+        }
+        SCRUBLOGD(pg_id, task_id, "blob scrub completed, total ranges scrubbed: {}", blob_range_count);
+    }
+
+    // only if pg is successfully scrubbed, we persist scrub metablk.
+    save_scrub_superblk(pg_id, is_deep_scrub, true);
+    SCRUBLOGD(pg_id, task_id, "successfully complete {} scrub task!", is_deep_scrub ? "deep" : "shallow");
+}
+
+void ScrubManager::add_pg(const pg_id_t pg_id) {
+    LOGINFOMOD(scrubmgr, "added new scrub superblock for pg={}", pg_id);
+    if (nullptr == m_hs_home_object->get_hs_pg(pg_id)) {
+        LOGINFOMOD(scrubmgr, "can not find pg={}!", pg_id);
+        return;
+    }
+
+    // to avoid create-pg log replay overriding existing scrub superblock, we only create new superblock when there is
+    // no existing one
+    save_scrub_superblk(pg_id, false, false);
+}
+
+void ScrubManager::remove_pg(const pg_id_t pg_id) {
+    auto it = m_pg_scrub_sb_map.find(pg_id);
+    if (it == m_pg_scrub_sb_map.end()) {
+        LOGINFOMOD(scrubmgr, "no scrub superblock found for pg={}, no need to remove", pg_id);
+        return;
+    }
+
+    LOGINFOMOD(scrubmgr, "removed pg={} in scrub manager!", pg_id);
+    cancel_scrub_task(pg_id);
+    it->second->destroy();
+    m_pg_scrub_ctx_map.erase(pg_id);
+    m_pg_scrub_sb_map.erase(it);
+}
+
+// this function is called in meta_service thread context and m_pg_scrub_sb_map_mtx
+void ScrubManager::on_pg_scrub_meta_blk_found(
+    sisl::byte_view const& buf, void* meta_cookie,
+    std::vector< homestore::superblk< pg_scrub_superblk > >& stale_pg_scrub_sbs) {
+    auto sb = std::make_shared< homestore::superblk< pg_scrub_superblk > >();
+    (*sb).load(buf, meta_cookie);
+    const auto pg_id = (*sb)->pg_id;
+
+    auto hs_pg = m_hs_home_object->get_hs_pg(pg_id);
+    if (!hs_pg) {
+        // this is a stale pg scrub superblock, we just log and destroy it.
+        LOGINFOMOD(scrubmgr, "can not find pg={}, destroy stale scrub superblock", pg_id);
+        stale_pg_scrub_sbs.emplace_back(std::move(*sb));
+        return;
+    }
+    const auto last_deep_scrub_time = (*sb)->last_deep_scrub_timestamp;
+    const auto last_shallow_scrub_time = (*sb)->last_shallow_scrub_timestamp;
+
+    m_pg_scrub_sb_map.emplace(pg_id, std::move(sb));
+    LOGINFOMOD(scrubmgr, "loaded scrub superblock for pg={}, last_deep_scrub_time={}, last_shallow_scrub_time={}",
+               pg_id, last_deep_scrub_time, last_shallow_scrub_time);
+}
+
+void ScrubManager::save_scrub_superblk(const pg_id_t pg_id, const bool is_deep_scrub, bool force_update) {
+    const auto current_time =
+        std::chrono::duration_cast< std::chrono::seconds >(std::chrono::system_clock::now().time_since_epoch()).count();
+
+    auto it = m_pg_scrub_sb_map.find(pg_id);
+    if (it == m_pg_scrub_sb_map.end()) {
+        // Create new superblock for this PG
+        auto sb = std::make_shared< homestore::superblk< pg_scrub_superblk > >(pg_scrub_meta_name);
+        (*sb).create(sizeof(pg_scrub_superblk));
+        (*sb)->pg_id = pg_id;
+        (*sb)->last_deep_scrub_timestamp = current_time;
+        (*sb)->last_deep_scrub_timestamp = current_time;
+        (*sb).write();
+        m_pg_scrub_sb_map.emplace(pg_id, std::move(sb));
+        return;
+    }
+
+    if (force_update) {
+        // Update existing superblock
+        if (is_deep_scrub) {
+            (*(it->second))->last_deep_scrub_timestamp = current_time;
+        } else {
+            (*(it->second))->last_shallow_scrub_timestamp = current_time;
+        }
+        (*(it->second)).write();
+    } else {
+        LOGINFOMOD(scrubmgr, "skip updating scrub superblock for pg={} since there is no scrub progress update", pg_id);
+    }
+}
+
+std::optional< ScrubManager::pg_scrub_superblk > ScrubManager::get_scrub_superblk(const pg_id_t pg_id) const {
+    auto it = m_pg_scrub_sb_map.find(pg_id);
+    if (it == m_pg_scrub_sb_map.end()) {
+        LOGWARNMOD(scrubmgr, "scrub superblk not found for pg {}", pg_id);
+        return std::nullopt;
+    }
+
+    return *(*(it->second));
+}
+
+/* ScrubContext */
+bool ScrubManager::PGScrubContext::add_scrub_map(std::shared_ptr< ScrubManager::BaseScrubMap > bsm) {
+    if (!bsm) {
+        LOGWARNMOD(scrubmgr, "received null scrub map, ignore it!");
+        return false;
+    }
+
+    const auto& peer_id = bsm->peer_id;
+    const auto pg_id = bsm->pg_id;
+    if (member_peer_ids_.find(peer_id) == member_peer_ids_.end()) {
+        SCRUBLOGD(pg_id, task_id, "received scrub map from peer {} which is not in the pg member list, ignore it!",
+                  peer_id);
+        return false;
+    }
+
+    {
+        std::lock_guard lg(mtx_);
+        if (!bsm->match(current_req)) {
+            SCRUBLOGD(pg_id, task_id, "scrub map does not match up with current req, skip adding");
+            return false;
+        }
+        auto [_, happened] = peer_sm_map_.try_emplace(peer_id, bsm);
+        if (!happened) {
+            SCRUBLOGD(pg_id, task_id, "already received scrub map from peer {}, ignore the duplicated one!", peer_id);
+            return false;
+        }
+        const auto received_sm_count = peer_sm_map_.size();
+        RELEASE_ASSERT(received_sm_count <= member_peer_ids_.size(),
+                       "received scrub map count {} should not exceed member peer count {}, something is wrong!",
+                       received_sm_count, member_peer_ids_.size());
+    }
+
+    // this is a best effort notification, wait might miss this notification and wait for timeout, but it won't cause
+    // correctness issue.
+    cv_.notify_all();
+    SCRUBLOGD(pg_id, task_id, "added scrub map from peer {}, current received scrub map count {}/{}", peer_id,
+              peer_sm_map_.size(), member_peer_ids_.size());
+    return true;
+}
+
+std::vector< peer_id_t > ScrubManager::PGScrubContext::get_peers_to_retry() const {
+    std::vector< peer_id_t > peers_to_retry;
+    std::lock_guard lg(mtx_);
+    for (const auto& peer_id : member_peer_ids_) {
+        if (peer_sm_map_.find(peer_id) == peer_sm_map_.end()) { peers_to_retry.push_back(peer_id); }
+    }
+
+    return peers_to_retry;
+}
+
+// wait until sms from all peers are received, or the task is cancelled, or timeout happens. if timeout happens, caller
+// can decide to retry or not.
+bool ScrubManager::PGScrubContext::wait_for_all_req_sms(std::chrono::milliseconds timeout) {
+    // return true means no need to wait and can proceed, false means timeout and need to retry.
+    if (cancelled) {
+        LOGINFOMOD(scrubmgr, "scrub task is cancelled, no need to wait for req sms!");
+        return true;
+    }
+
+    std::unique_lock lock(mtx_);
+    if (peer_sm_map_.size() == member_peer_ids_.size()) return true;
+
+    // receiving sm or task cancellation will notify this condition variable.
+    cv_.wait_for(lock, timeout, [this] { return cancelled || peer_sm_map_.size() == member_peer_ids_.size(); });
+
+    // if task is cancelled or all the req sms are received, we can proceed, otherwise it means timeout happens and we
+    // can retry for pending peers.
+    return cancelled || (peer_sm_map_.size() == member_peer_ids_.size());
+}
+
+void ScrubManager::PGScrubContext::cancel() {
+    cancelled.store(true);
+    cv_.notify_all();
+}
+
+void ScrubManager::PGScrubContext::reset_for_new_req() {
+    std::lock_guard lg(mtx_);
+    peer_sm_map_.clear();
+    current_req.reset();
+}
+
+//=========================== Scrub Request Serialization/Deserialization ===========================//
+
+// base_scrub_req implementations
+flatbuffers::DetachedBuffer ScrubManager::base_scrub_req::build_flat_buffer() const {
+    flatbuffers::FlatBufferBuilder fb_builder;
+    // Prepare peer_id as UUID bytes
+    std::vector< uint8_t > peer_uuid_bytes(issuer_peer_id.data, issuer_peer_id.data + 16);
+    auto scrub_info_off =
+        CreateScrubInfo(fb_builder, pg_id, task_id, req_id, scrub_lsn, fb_builder.CreateVector(peer_uuid_bytes));
+    auto pg_meta_req_off = CreatePgMetaScrubReq(fb_builder, scrub_info_off);
+    FinishSizePrefixedPgMetaScrubReqBuffer(fb_builder, pg_meta_req_off);
+    return fb_builder.Release();
+}
+
+bool ScrubManager::base_scrub_req::load(uint8_t const* buf_ptr, const uint32_t buf_size) {
+    if (!buf_ptr || buf_size == 0) {
+        LOGERROR("Invalid buffer for base_scrub_req deserialization");
+        return false;
+    }
+
+    auto fb_req = GetSizePrefixedPgMetaScrubReq(buf_ptr);
+    if (!fb_req) {
+        LOGERROR("Failed to parse base_scrub_req from buffer");
+        return false;
+    }
+
+    auto scrub_info = fb_req->scrub_info();
+    if (!scrub_info) {
+        LOGERROR("Missing scrub_info in base_scrub_req");
+        return false;
+    }
+
+    pg_id = scrub_info->pg_id();
+    task_id = scrub_info->task_id();
+    req_id = scrub_info->req_id();
+    scrub_lsn = scrub_info->scrub_lsn();
+
+    // Load peer_id from issuer_uuid
+    auto issuer_uuid_bytes = scrub_info->issuer_uuid();
+    if (issuer_uuid_bytes && issuer_uuid_bytes->size() == 16) {
+        std::memcpy(issuer_peer_id.data, issuer_uuid_bytes->data(), 16);
+    }
+
+    return true;
+}
+
+// blob_scrub_req implementations
+flatbuffers::DetachedBuffer ScrubManager::blob_scrub_req::build_flat_buffer() const {
+    flatbuffers::FlatBufferBuilder fb_builder;
+    // Prepare peer_id as UUID bytes
+    std::vector< uint8_t > peer_uuid_bytes(issuer_peer_id.data, issuer_peer_id.data + 16);
+    auto scrub_info_off =
+        CreateScrubInfo(fb_builder, pg_id, task_id, req_id, scrub_lsn, fb_builder.CreateVector(peer_uuid_bytes));
+    auto blob_req_off = CreateBlobScrubReq(fb_builder, scrub_info_off, start, end, is_deep_scrub_);
+    FinishSizePrefixedBlobScrubReqBuffer(fb_builder, blob_req_off);
+    return fb_builder.Release();
+}
+
+bool ScrubManager::blob_scrub_req::load(uint8_t const* buf_ptr, const uint32_t buf_size) {
+    if (!buf_ptr || buf_size == 0) {
+        LOGERROR("Invalid buffer for blob_scrub_req deserialization");
+        return false;
+    }
+
+    auto fb_req = GetSizePrefixedBlobScrubReq(buf_ptr);
+    if (!fb_req) {
+        LOGERROR("Failed to parse blob_scrub_req from buffer");
+        return false;
+    }
+
+    auto scrub_info = fb_req->scrub_info();
+    if (!scrub_info) {
+        LOGERROR("Missing scrub_info in blob_scrub_req");
+        return false;
+    }
+
+    pg_id = scrub_info->pg_id();
+    task_id = scrub_info->task_id();
+    req_id = scrub_info->req_id();
+    scrub_lsn = scrub_info->scrub_lsn();
+
+    // Load peer_id from issuer_uuid
+    auto issuer_uuid_bytes = scrub_info->issuer_uuid();
+    if (issuer_uuid_bytes && issuer_uuid_bytes->size() == 16) {
+        std::memcpy(issuer_peer_id.data, issuer_uuid_bytes->data(), 16);
+    }
+
+    // Load start and end blob_id
+    start = fb_req->start();
+    end = fb_req->end();
+
+    is_deep_scrub_ = fb_req->isdeepscrub();
+
+    return true;
+}
+
+// shard_scrub_req implementations
+flatbuffers::DetachedBuffer ScrubManager::shard_scrub_req::build_flat_buffer() const {
+    flatbuffers::FlatBufferBuilder fb_builder;
+    // Prepare peer_id as UUID bytes
+    std::vector< uint8_t > peer_uuid_bytes(issuer_peer_id.data, issuer_peer_id.data + 16);
+    auto scrub_info_off =
+        CreateScrubInfo(fb_builder, pg_id, task_id, req_id, scrub_lsn, fb_builder.CreateVector(peer_uuid_bytes));
+    auto shard_req_off = CreateShardScrubReq(fb_builder, scrub_info_off, start, end, is_deep_scrub_);
+    FinishSizePrefixedShardScrubReqBuffer(fb_builder, shard_req_off);
+    return fb_builder.Release();
+}
+
+bool ScrubManager::shard_scrub_req::load(uint8_t const* buf_ptr, const uint32_t buf_size) {
+    if (!buf_ptr || buf_size == 0) {
+        LOGERROR("Invalid buffer for shard_scrub_req deserialization");
+        return false;
+    }
+
+    auto fb_req = GetSizePrefixedShardScrubReq(buf_ptr);
+    if (!fb_req) {
+        LOGERROR("Failed to parse shard_scrub_req from buffer");
+        return false;
+    }
+
+    auto scrub_info = fb_req->scrub_info();
+    if (!scrub_info) {
+        LOGERROR("Missing scrub_info in shard_scrub_req");
+        return false;
+    }
+
+    pg_id = scrub_info->pg_id();
+    task_id = scrub_info->task_id();
+    req_id = scrub_info->req_id();
+    scrub_lsn = scrub_info->scrub_lsn();
+
+    // Load peer_id from issuer_uuid
+    auto issuer_uuid_bytes = scrub_info->issuer_uuid();
+    if (issuer_uuid_bytes && issuer_uuid_bytes->size() == 16) {
+        std::memcpy(issuer_peer_id.data, issuer_uuid_bytes->data(), 16);
+    }
+
+    start = fb_req->start();
+    end = fb_req->end();
+    is_deep_scrub_ = fb_req->isdeepscrub();
+
+    return true;
+}
+
+//=========================== Scrub Map Serialization/Deserialization ===========================//
+
+// DeepBlobScrubMap implementations
+flatbuffers::DetachedBuffer ScrubManager::DeepBlobScrubMap::build_flat_buffer() const {
+    flatbuffers::FlatBufferBuilder fb_builder;
+    // Prepare peer_id as UUID bytes
+    std::vector< uint8_t > peer_uuid_bytes(16);
+    peer_uuid_bytes.assign(peer_id.begin(), peer_id.end());
+    // Create scrub_info
+    auto scrub_info_off =
+        CreateScrubInfo(fb_builder, pg_id, task_id, req_id, scrub_lsn, fb_builder.CreateVector(peer_uuid_bytes));
+    // Create deep blob scrub result entries
+    std::vector< flatbuffers::Offset< DeepBlobScrubResultEntry > > result_entries;
+    for (const auto& [blob_route, scrub_result_variant] : blobs) {
+        auto blob_key_off = CreateBlobKey(fb_builder, blob_route.shard, blob_route.blob);
+
+        homeobject::ScrubValue scrub_value_type;
+        flatbuffers::Offset< void > scrub_value_off;
+        if (std::holds_alternative< ScrubResult >(scrub_result_variant)) {
+            // It's a ScrubResult
+            auto result = std::get< ScrubResult >(scrub_result_variant);
+            scrub_value_type = homeobject::ScrubValue::ScrubResultValue;
+            scrub_value_off = CreateScrubResultValue(fb_builder, result).Union();
+        } else {
+            // It's a BlobHashArray
+            const auto& hash_array = std::get< BlobHashArray >(scrub_result_variant);
+            std::vector< uint8_t > hash_vec(hash_array.begin(), hash_array.end());
+            scrub_value_type = homeobject::ScrubValue::HashValue;
+            scrub_value_off = CreateHashValueDirect(fb_builder, &hash_vec).Union();
+        }
+
+        result_entries.push_back(
+            CreateDeepBlobScrubResultEntry(fb_builder, blob_key_off, scrub_value_type, scrub_value_off));
+    }
+    auto results_vec_off = fb_builder.CreateVector(result_entries);
+    auto deep_blob_map_off = CreateDeepBlobScrubMap(fb_builder, scrub_info_off, start, end, results_vec_off);
+    FinishSizePrefixedDeepBlobScrubMapBuffer(fb_builder, deep_blob_map_off);
+
+    return fb_builder.Release();
+}
+
+bool ScrubManager::DeepBlobScrubMap::load(uint8_t const* buf_ptr, const uint32_t buf_size) {
+    if (!buf_ptr || buf_size == 0) {
+        LOGERROR("Invalid buffer for DeepBlobScrubMap deserialization");
+        return false;
+    }
+
+    auto fb_map = GetSizePrefixedDeepBlobScrubMap(buf_ptr);
+    if (!fb_map) {
+        LOGERROR("Failed to parse DeepBlobScrubMap from buffer");
+        return false;
+    }
+
+    // Load scrub_info
+    auto scrub_info = fb_map->scrub_info();
+    if (!scrub_info) {
+        LOGERROR("Missing scrub_info in DeepBlobScrubMap");
+        return false;
+    }
+
+    pg_id = scrub_info->pg_id();
+    task_id = scrub_info->task_id();
+    req_id = scrub_info->req_id();
+    scrub_lsn = scrub_info->scrub_lsn();
+
+    // Load peer_id from issuer_uuid
+    auto issuer_uuid_bytes = scrub_info->issuer_uuid();
+    if (issuer_uuid_bytes && issuer_uuid_bytes->size() == 16) {
+        std::memcpy(peer_id.data, issuer_uuid_bytes->data(), 16);
+    }
+
+    // Load start and end blob_id
+    start = fb_map->start();
+    end = fb_map->end();
+
+    // Load blob results
+    blobs.clear();
+    auto results = fb_map->deep_blob_scrub_results();
+    if (results) {
+        for (const auto* entry : *results) {
+            if (!entry || !entry->blob_key()) continue;
+
+            BlobRoute blob_route(entry->blob_key()->shard_id(), entry->blob_key()->blob_id());
+
+            auto scrub_value_type = entry->scrub_result_type();
+            if (scrub_value_type == ScrubValue::ScrubResultValue) {
+                auto result_value = static_cast< const ScrubResultValue* >(entry->scrub_result());
+                blobs[blob_route] = result_value->result();
+            } else if (scrub_value_type == ScrubValue::HashValue) {
+                auto hash_value = static_cast< const HashValue* >(entry->scrub_result());
+                BlobHashArray hash_array;
+                if (hash_value->hash() && hash_value->hash()->size() <= blob_max_hash_len) {
+                    std::memcpy(hash_array.data(), hash_value->hash()->data(), hash_value->hash()->size());
+                }
+                blobs[blob_route] = hash_array;
+            }
+        }
+    }
+
+    return true;
+}
+
+// ShallowBlobScrubMap implementations
+flatbuffers::DetachedBuffer ScrubManager::ShallowBlobScrubMap::build_flat_buffer() const {
+    flatbuffers::FlatBufferBuilder fb_builder;
+    // Prepare peer_id as UUID bytes
+    std::vector< uint8_t > peer_uuid_bytes(16);
+    peer_uuid_bytes.assign(peer_id.begin(), peer_id.end());
+    // Create scrub_info
+    auto scrub_info_off =
+        CreateScrubInfo(fb_builder, pg_id, task_id, req_id, scrub_lsn, fb_builder.CreateVector(peer_uuid_bytes));
+    // Create blob keys vector
+    std::vector< flatbuffers::Offset< BlobKey > > blob_keys;
+    for (const auto& blob_route : blobs) {
+        blob_keys.push_back(CreateBlobKey(fb_builder, blob_route.shard, blob_route.blob));
+    }
+    auto blobs_vec_off = fb_builder.CreateVector(blob_keys);
+    auto shallow_blob_map_off = CreateShallowBlobScrubMap(fb_builder, scrub_info_off, start, end, blobs_vec_off);
+    FinishSizePrefixedShallowBlobScrubMapBuffer(fb_builder, shallow_blob_map_off);
+    return fb_builder.Release();
+}
+
+bool ScrubManager::ShallowBlobScrubMap::load(uint8_t const* buf_ptr, const uint32_t buf_size) {
+    if (!buf_ptr || buf_size == 0) {
+        LOGERROR("Invalid buffer for ShallowBlobScrubMap deserialization");
+        return false;
+    }
+
+    auto fb_map = GetSizePrefixedShallowBlobScrubMap(buf_ptr);
+    if (!fb_map) {
+        LOGERROR("Failed to parse ShallowBlobScrubMap from buffer");
+        return false;
+    }
+
+    // Load scrub_info
+    auto scrub_info = fb_map->scrub_info();
+    if (!scrub_info) {
+        LOGERROR("Missing scrub_info in ShallowBlobScrubMap");
+        return false;
+    }
+
+    pg_id = scrub_info->pg_id();
+    task_id = scrub_info->task_id();
+    req_id = scrub_info->req_id();
+    scrub_lsn = scrub_info->scrub_lsn();
+
+    // Load peer_id from issuer_uuid
+    auto issuer_uuid_bytes = scrub_info->issuer_uuid();
+    if (issuer_uuid_bytes && issuer_uuid_bytes->size() == 16) {
+        std::memcpy(peer_id.data, issuer_uuid_bytes->data(), 16);
+    }
+
+    // Load start and end blob_id
+    start = fb_map->start();
+    end = fb_map->end();
+
+    // Load blob routes
+    blobs.clear();
+    auto blob_keys = fb_map->blobs();
+    if (blob_keys) {
+        for (const auto* blob_key : *blob_keys) {
+            if (!blob_key) continue;
+            blobs.insert(BlobRoute(blob_key->shard_id(), blob_key->blob_id()));
+        }
+    }
+
+    return true;
+}
+
+// ShallowShardScrubMap implementations
+flatbuffers::DetachedBuffer ScrubManager::ShallowShardScrubMap::build_flat_buffer() const {
+    flatbuffers::FlatBufferBuilder fb_builder;
+    // Prepare peer_id as UUID bytes
+    std::vector< uint8_t > peer_uuid_bytes(16);
+    peer_uuid_bytes.assign(peer_id.begin(), peer_id.end());
+    // Create scrub_info
+    auto scrub_info_off =
+        CreateScrubInfo(fb_builder, pg_id, task_id, req_id, scrub_lsn, fb_builder.CreateVector(peer_uuid_bytes));
+    // Note: ShallowShardScrubMap doesn't have start/end in flatbuffer schema
+    // Create shard ids vector
+    std::vector< shard_id_t > shard_ids_vec;
+    for (const auto& shard_id : shards) {
+        shard_ids_vec.push_back(shard_id); // Assuming BlobRoute.shard is the shard_id
+    }
+    auto shards_vec_off = fb_builder.CreateVector(shard_ids_vec);
+    auto shallow_shard_map_off = CreateShallowShardScrubMap(fb_builder, scrub_info_off, 0, 0, shards_vec_off);
+    FinishSizePrefixedShallowShardScrubMapBuffer(fb_builder, shallow_shard_map_off);
+    return fb_builder.Release();
+}
+
+bool ScrubManager::ShallowShardScrubMap::load(uint8_t const* buf_ptr, const uint32_t buf_size) {
+    if (!buf_ptr || buf_size == 0) {
+        LOGERROR("Invalid buffer for ShallowShardScrubMap deserialization");
+        return false;
+    }
+
+    auto fb_map = GetSizePrefixedShallowShardScrubMap(buf_ptr);
+    if (!fb_map) {
+        LOGERROR("Failed to parse ShallowShardScrubMap from buffer");
+        return false;
+    }
+
+    // Load scrub_info
+    auto scrub_info = fb_map->scrub_info();
+    if (!scrub_info) {
+        LOGERROR("Missing scrub_info in ShallowShardScrubMap");
+        return false;
+    }
+
+    pg_id = scrub_info->pg_id();
+    task_id = scrub_info->task_id();
+    req_id = scrub_info->req_id();
+    scrub_lsn = scrub_info->scrub_lsn();
+
+    // Load peer_id from issuer_uuid
+    auto issuer_uuid_bytes = scrub_info->issuer_uuid();
+    if (issuer_uuid_bytes && issuer_uuid_bytes->size() == 16) {
+        std::memcpy(peer_id.data, issuer_uuid_bytes->data(), 16);
+    }
+
+    // Load shard ids
+    shards.clear();
+    auto shard_ids = fb_map->shards();
+    if (shard_ids) {
+        for (auto shard_id : *shard_ids) {
+            shards.insert(shard_id);
+        }
+    }
+
+    return true;
+}
+
+// DeepShardScrubMap implementations
+flatbuffers::DetachedBuffer ScrubManager::DeepShardScrubMap::build_flat_buffer() const {
+    flatbuffers::FlatBufferBuilder fb_builder;
+    // Prepare peer_id as UUID bytes
+    std::vector< uint8_t > peer_uuid_bytes(16);
+    peer_uuid_bytes.assign(peer_id.begin(), peer_id.end());
+    // Create scrub_info
+    auto scrub_info_off =
+        CreateScrubInfo(fb_builder, pg_id, task_id, req_id, scrub_lsn, fb_builder.CreateVector(peer_uuid_bytes));
+    // Create shallow shard scrub map (base class data)
+    std::vector< uint64_t > shard_ids_vec;
+    for (const auto& shard_id : shards) {
+        shard_ids_vec.push_back(shard_id);
+    }
+    auto shards_vec_off = fb_builder.CreateVector(shard_ids_vec);
+    auto shallow_shard_map_off = CreateShallowShardScrubMap(fb_builder, scrub_info_off, 0, 0, shards_vec_off);
+    // Create problematic shards entries
+    std::vector< flatbuffers::Offset< DeepShardScrubResultEntry > > result_entries;
+    for (const auto& [shard_id, scrub_result] : problematic_shards) {
+        result_entries.push_back(CreateDeepShardScrubResultEntry(fb_builder, shard_id, scrub_result));
+    }
+    auto results_vec_off = fb_builder.CreateVector(result_entries);
+    auto deep_shard_map_off = CreateDeepShardScrubMap(fb_builder, shallow_shard_map_off, results_vec_off);
+    FinishSizePrefixedDeepShardScrubMapBuffer(fb_builder, deep_shard_map_off);
+
+    return fb_builder.Release();
+}
+
+bool ScrubManager::DeepShardScrubMap::load(uint8_t const* buf_ptr, const uint32_t buf_size) {
+    if (!buf_ptr || buf_size == 0) {
+        LOGERROR("Invalid buffer for DeepShardScrubMap deserialization");
+        return false;
+    }
+
+    auto fb_map = GetSizePrefixedDeepShardScrubMap(buf_ptr);
+    if (!fb_map) {
+        LOGERROR("Failed to parse DeepShardScrubMap from buffer");
+        return false;
+    }
+
+    // Load shallow shard scrub map (base class data)
+    auto shallow_map = fb_map->shallow_map();
+    if (!shallow_map) {
+        LOGERROR("Missing shallow_map in DeepShardScrubMap");
+        return false;
+    }
+
+    // Load scrub_info
+    auto scrub_info = shallow_map->scrub_info();
+    if (!scrub_info) {
+        LOGERROR("Missing scrub_info in DeepShardScrubMap");
+        return false;
+    }
+
+    pg_id = scrub_info->pg_id();
+    task_id = scrub_info->task_id();
+    req_id = scrub_info->req_id();
+    scrub_lsn = scrub_info->scrub_lsn();
+
+    // Load peer_id from issuer_uuid
+    auto issuer_uuid_bytes = scrub_info->issuer_uuid();
+    if (issuer_uuid_bytes && issuer_uuid_bytes->size() == 16) {
+        std::memcpy(peer_id.data, issuer_uuid_bytes->data(), 16);
+    }
+
+    // Load shard ids from shallow map
+    shards.clear();
+    auto shard_ids = shallow_map->shards();
+    if (shard_ids) {
+        for (auto shard_id : *shard_ids) {
+            shards.insert(shard_id);
+        }
+    }
+
+    // Load problematic shards
+    problematic_shards.clear();
+    auto results = fb_map->problematic_shards();
+    if (results) {
+        for (const auto* entry : *results) {
+            if (!entry) continue;
+            problematic_shards[entry->shard_id()] = entry->result();
+        }
+    }
+
+    return true;
+}
+
+flatbuffers::DetachedBuffer ScrubManager::PGMetaScrubMap::build_flat_buffer() const {
+    flatbuffers::FlatBufferBuilder fb_builder;
+    // Prepare peer_id as UUID bytes
+    std::vector< uint8_t > peer_uuid_bytes(16);
+    peer_uuid_bytes.assign(peer_id.begin(), peer_id.end());
+    // Create scrub_info
+    auto scrub_info_off =
+        CreateScrubInfo(fb_builder, pg_id, task_id, req_id, scrub_lsn, fb_builder.CreateVector(peer_uuid_bytes));
+    auto pg_meta_map_off = CreatePGMetaScrubMap(fb_builder, scrub_info_off, pg_meta_scrub_result);
+    FinishSizePrefixedPGMetaScrubMapBuffer(fb_builder, pg_meta_map_off);
+    return fb_builder.Release();
+}
+
+bool ScrubManager::PGMetaScrubMap::load(uint8_t const* buf_ptr, const uint32_t buf_size) {
+    if (!buf_ptr || buf_size == 0) {
+        LOGERROR("Invalid buffer for PGMetaScrubMap deserialization");
+        return false;
+    }
+
+    auto fb_map = GetSizePrefixedPGMetaScrubMap(buf_ptr);
+    if (!fb_map) {
+        LOGERROR("Failed to parse PGMetaScrubMap from buffer");
+        return false;
+    }
+
+    // Load scrub_info
+    auto scrub_info = fb_map->scrub_info();
+    if (!scrub_info) {
+        LOGERROR("Missing scrub_info in PGMetaScrubMap");
+        return false;
+    }
+
+    pg_id = scrub_info->pg_id();
+    task_id = scrub_info->task_id();
+    req_id = scrub_info->req_id();
+    scrub_lsn = scrub_info->scrub_lsn();
+
+    // Load peer_id from issuer_uuid
+    auto issuer_uuid_bytes = scrub_info->issuer_uuid();
+    if (issuer_uuid_bytes && issuer_uuid_bytes->size() == 16) {
+        std::memcpy(peer_id.data, issuer_uuid_bytes->data(), 16);
+    }
+
+    // Load PG meta scrub result
+    pg_meta_scrub_result = fb_map->pg_meta_scrub_result();
+
+    return true;
+}
+
+//=========================== Scrub Report Merge Functions ===========================//
+
+void ScrubManager::ShallowScrubReport::print() const {
+    std::stringstream ss;
+    ss << "ShallowScrubReport for pg=" << pg_id_ << " | ";
+
+    // Report missing shards
+    ss << "MissingShards={";
+    for (const auto& [peer_id, shard_set] : missing_shard_ids) {
+        ss << "peer=" << peer_id << ":[";
+        bool first = true;
+        for (const auto& shard_id : shard_set) {
+            if (!first) ss << ",";
+            ss << shard_id;
+            first = false;
+        }
+        ss << "] ";
+    }
+    ss << "} | ";
+
+    // Report missing blobs
+    ss << "MissingBlobs={";
+    for (const auto& [peer_id, blob_set] : missing_blobs) {
+        ss << "peer=" << peer_id << ":[";
+        bool first = true;
+        for (const auto& blob_route : blob_set) {
+            if (!first) ss << ",";
+            ss << fmt::format("{}", blob_route);
+            first = false;
+        }
+        ss << "] ";
+    }
+    ss << "}";
+
+    LOGINFOMOD(scrubmgr, "{}", ss.str());
+}
+
+void ScrubManager::ShallowScrubReport::merge(
+    const std::map< peer_id_t, std::shared_ptr< BaseScrubMap > >& peer_sm_map) {
+    if (peer_sm_map.empty()) {
+        LOGWARNMOD(scrubmgr, "[pg={}] No scrub maps to merge", pg_id_);
+        return;
+    }
+
+    // Collect all blobs and shards from all peers
+    std::map< BlobRoute, std::set< peer_id_t > > blob_peers_map;   // blob -> set of peers that have it
+    std::map< shard_id_t, std::set< peer_id_t > > shard_peers_map; // shard -> set of peers that have it
+
+    for (const auto& [peer_id, scrub_map] : peer_sm_map) {
+        if (!scrub_map) {
+            LOGWARNMOD(scrubmgr, "[pg={}] Null scrub map from peer {}", pg_id_, peer_id);
+            continue;
+        }
+
+        // Handle ShallowBlobScrubMap
+        auto shallow_blob_map = std::dynamic_pointer_cast< ShallowBlobScrubMap >(scrub_map);
+        if (shallow_blob_map) {
+            for (const auto& blob_route : shallow_blob_map->blobs) {
+                blob_peers_map[blob_route].insert(peer_id);
+            }
+            continue;
+        }
+
+        // Handle DeepBlobScrubMap (also contains blob list)
+        auto deep_blob_map = std::dynamic_pointer_cast< DeepBlobScrubMap >(scrub_map);
+        if (deep_blob_map) {
+            for (const auto& [blob_route, _] : deep_blob_map->blobs) {
+                blob_peers_map[blob_route].insert(peer_id);
+            }
+            continue;
+        }
+
+        // Handle ShallowShardScrubMap
+        auto shallow_shard_map = std::dynamic_pointer_cast< ShallowShardScrubMap >(scrub_map);
+        if (shallow_shard_map) {
+            for (const auto& shard_id : shallow_shard_map->shards) {
+                shard_peers_map[shard_id].insert(peer_id);
+            }
+            continue;
+        }
+
+        // Handle DeepShardScrubMap (inherits from ShallowShardScrubMap)
+        auto deep_shard_map = std::dynamic_pointer_cast< DeepShardScrubMap >(scrub_map);
+        if (deep_shard_map) {
+            for (const auto& shard_id : deep_shard_map->shards) {
+                shard_peers_map[shard_id].insert(peer_id);
+            }
+            continue;
+        }
+    }
+
+    // Determine which blobs are missing on which peers
+    // A blob is considered missing on a peer if it appears on other peers but not this one
+    for (const auto& [blob_route, peer_set] : blob_peers_map) {
+        // If not all peers have this blob, some are missing it
+        if (peer_set.size() < peer_sm_map.size()) {
+            for (const auto& [peer_id, _] : peer_sm_map) {
+                if (peer_set.find(peer_id) == peer_set.end()) {
+                    // This peer is missing the blob
+                    add_missing_blob(blob_route, peer_id);
+                }
+            }
+        }
+    }
+
+    // Determine which shards are missing on which peers
+    for (const auto& [shard_id, peer_set] : shard_peers_map) {
+        if (peer_set.size() < peer_sm_map.size()) {
+            for (const auto& [peer_id, _] : peer_sm_map) {
+                if (peer_set.find(peer_id) == peer_set.end()) {
+                    // This peer is missing the shard
+                    add_missing_shard(shard_id, peer_id);
+                }
+            }
+        }
+    }
+
+    // Count total missing blobs and shards across all peers
+    size_t total_missing_blobs = 0;
+    for (const auto& [peer_id, blobs] : missing_blobs) {
+        total_missing_blobs += blobs.size();
+    }
+    size_t total_missing_shards = 0;
+    for (const auto& [peer_id, shards] : missing_shard_ids) {
+        total_missing_shards += shards.size();
+    }
+    LOGINFOMOD(scrubmgr,
+               "[pg={}] Shallow scrub merge completed: {} peers with missing blobs (total {} blobs), {} peers with "
+               "missing shards (total {} shards)",
+               pg_id_, missing_blobs.size(), total_missing_blobs, missing_shard_ids.size(), total_missing_shards);
+}
+
+void ScrubManager::DeepScrubReport::print() const {
+    std::stringstream ss;
+    ss << "DeepScrubReport for pg=" << pg_id_ << " | ";
+
+    // Report missing shards (from ShallowScrubReport)
+    ss << "MissingShards={";
+    for (const auto& [peer_id, shard_set] : missing_shard_ids) {
+        ss << "peer=" << peer_id << ":[";
+        bool first = true;
+        for (const auto& shard_id : shard_set) {
+            if (!first) ss << ",";
+            ss << shard_id;
+            first = false;
+        }
+        ss << "] ";
+    }
+    ss << "} | ";
+
+    // Report missing blobs (from ShallowScrubReport)
+    ss << "MissingBlobs={";
+    for (const auto& [peer_id, blob_set] : missing_blobs) {
+        ss << "peer=" << peer_id << ":[";
+        bool first = true;
+        for (const auto& blob_route : blob_set) {
+            if (!first) ss << ",";
+            ss << fmt::format("{}", blob_route);
+            first = false;
+        }
+        ss << "] ";
+    }
+    ss << "} | ";
+
+    // Report corrupted blobs
+    ss << "CorruptedBlobs={";
+    for (const auto& [peer_id, blob_map] : corrupted_blobs) {
+        ss << "peer=" << peer_id << ":[";
+        bool first = true;
+        for (const auto& [blob_route, scrub_result] : blob_map) {
+            if (!first) ss << ",";
+            ss << fmt::format("{}", blob_route) << "(" << SCRUB_RESULT_STRING(scrub_result) << ")";
+            first = false;
+        }
+        ss << "] ";
+    }
+    ss << "} | ";
+
+    // Report corrupted shards
+    ss << "CorruptedShards={";
+    for (const auto& [peer_id, shard_map] : corrupted_shards) {
+        ss << "peer=" << peer_id << ":[";
+        bool first = true;
+        for (const auto& [shard_id, scrub_result] : shard_map) {
+            if (!first) ss << ",";
+            ss << shard_id << "(" << SCRUB_RESULT_STRING(scrub_result) << ")";
+            first = false;
+        }
+        ss << "] ";
+    }
+    ss << "} | ";
+
+    // Report inconsistent blobs (different hashes across replicas)
+    ss << "InconsistentBlobs={";
+    for (const auto& [blob_route, peer_hash_map] : inconsistent_blobs) {
+        ss << fmt::format("{}", blob_route);
+        bool first = true;
+        for (const auto& [peer_id, hash] : peer_hash_map) {
+            if (!first) ss << ",";
+            ss << "peer=" << peer_id << "(hash=";
+            // Print first 8 bytes of hash for brevity
+            for (size_t i = 0; i < std::min(size_t(8), hash.size()); ++i) {
+                ss << fmt::format("{:02x}", hash[i]);
+            }
+            ss << ")";
+            first = false;
+        }
+        ss << "] ";
+    }
+    ss << "} | ";
+
+    // Report corrupted PG metadata
+    ss << "CorruptedPGMeta={";
+    bool first = true;
+    for (const auto& [peer_id, scrub_result] : corrupted_pg_metas) {
+        if (!first) ss << ",";
+        ss << "peer=" << peer_id << "(" << SCRUB_RESULT_STRING(scrub_result) << ")";
+        first = false;
+    }
+    ss << "}";
+
+    LOGINFOMOD(scrubmgr, "{}", ss.str());
+}
+
+void ScrubManager::DeepScrubReport::merge(const std::map< peer_id_t, std::shared_ptr< BaseScrubMap > >& peer_sm_map) {
+    // First do shallow merge to find missing blobs/shards
+    ShallowScrubReport::merge(peer_sm_map);
+
+    if (peer_sm_map.empty()) { return; }
+
+    // Now do deep scrub specific comparisons
+    std::map< BlobRoute, std::map< peer_id_t, std::variant< ScrubResult, BlobHashArray > > > blob_results_map;
+    std::map< shard_id_t, std::map< peer_id_t, ScrubResult > > shard_results_map;
+    std::map< peer_id_t, ScrubResult > pg_meta_results_map;
+
+    // Collect all deep scrub results
+    for (const auto& [peer_id, scrub_map] : peer_sm_map) {
+        if (!scrub_map) continue;
+
+        // Handle DeepBlobScrubMap
+        auto deep_blob_map = std::dynamic_pointer_cast< DeepBlobScrubMap >(scrub_map);
+        if (deep_blob_map) {
+            for (const auto& [blob_route, result_variant] : deep_blob_map->blobs) {
+                blob_results_map[blob_route][peer_id] = result_variant;
+            }
+            continue;
+        }
+
+        // Handle DeepShardScrubMap
+        auto deep_shard_map = std::dynamic_pointer_cast< DeepShardScrubMap >(scrub_map);
+        if (deep_shard_map) {
+            for (const auto& [shard_id, scrub_result] : deep_shard_map->problematic_shards) {
+                shard_results_map[shard_id][peer_id] = scrub_result;
+            }
+            continue;
+        }
+
+        // Handle PGMetaScrubMap
+        auto pg_meta_map = std::dynamic_pointer_cast< PGMetaScrubMap >(scrub_map);
+        if (pg_meta_map) {
+            if (pg_meta_map->pg_meta_scrub_result != ScrubResult::NONE) {
+                pg_meta_results_map[peer_id] = pg_meta_map->pg_meta_scrub_result;
+            }
+            continue;
+        }
+    }
+
+    // Analyze blob results
+    for (const auto& [blob_route, peer_results] : blob_results_map) {
+        std::map< peer_id_t, BlobHashArray > hash_map;
+        bool has_error = false;
+
+        for (const auto& [peer_id, result_variant] : peer_results) {
+            if (std::holds_alternative< ScrubResult >(result_variant)) {
+                // This peer has an error (IO_ERROR, MISMATCH, NOT_FOUND)
+                auto scrub_result = std::get< ScrubResult >(result_variant);
+                add_corrupted_blob(peer_id, blob_route, scrub_result);
+                has_error = true;
+            } else {
+                // This peer has a valid hash
+                hash_map[peer_id] = std::get< BlobHashArray >(result_variant);
+            }
+        }
+
+        // Check for hash inconsistencies among peers with valid hashes
+        if (!has_error && hash_map.size() > 1) {
+            // Compare all hashes
+            BlobHashArray reference_hash;
+            peer_id_t reference_peer;
+            bool first = true;
+            bool hashes_consistent = true;
+
+            for (const auto& [peer_id, hash] : hash_map) {
+                if (first) {
+                    reference_hash = hash;
+                    reference_peer = peer_id;
+                    first = false;
+                } else {
+                    if (std::memcmp(reference_hash.data(), hash.data(), blob_max_hash_len) != 0) {
+                        hashes_consistent = false;
+                        break;
+                    }
+                }
+            }
+
+            // If hashes are inconsistent, record all of them
+            if (!hashes_consistent) {
+                for (const auto& [peer_id, hash] : hash_map) {
+                    add_inconsistent_blob(blob_route, peer_id, hash);
+                }
+            }
+        }
+    }
+
+    // Analyze shard results
+    for (const auto& [shard_id, peer_results] : shard_results_map) {
+        for (const auto& [peer_id, scrub_result] : peer_results) {
+            if (scrub_result != ScrubResult::NONE) { add_corrupted_shard(peer_id, shard_id, scrub_result); }
+        }
+    }
+
+    // Record PG meta errors
+    for (const auto& [peer_id, scrub_result] : pg_meta_results_map) {
+        add_corrupted_pg_meta(peer_id, scrub_result);
+    }
+
+    LOGINFOMOD(scrubmgr,
+               "[pg={}] Deep scrub merge completed: {} corrupted blobs, {} inconsistent blobs, "
+               "{} corrupted shards, {} corrupted pg metas",
+               pg_id_, corrupted_blobs.size(), inconsistent_blobs.size(), corrupted_shards.size(),
+               corrupted_pg_metas.size());
+}
+
+} // namespace homeobject
\ No newline at end of file
diff --git a/src/lib/homestore_backend/scrub_manager.hpp b/src/lib/homestore_backend/scrub_manager.hpp
new file mode 100644
index 000000000..771ec5263
--- /dev/null
+++ b/src/lib/homestore_backend/scrub_manager.hpp
@@ -0,0 +1,471 @@
+#pragma once
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wuninitialized"
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#include <folly/futures/Future.h>
+#include <folly/concurrency/ConcurrentHashMap.h>
+#include <folly/executors/IOThreadPoolExecutor.h>
+#pragma GCC diagnostic pop
+
+#include <iomgr/iomgr.hpp>
+#include "homeobject/common.hpp"
+#include <homestore/blk.h>
+#include <homestore/superblk_handler.hpp>
+#include "lib/blob_route.hpp"
+#include "MPMCPriorityQueue.hpp"
+#include "generated/scrub_common_generated.h"
+
+namespace homeobject {
+
+class HSHomeObject;
+
+ENUM(SCRUB_TRIGGER_TYPE, uint8_t, PERIODICALLY = 0, MANUALLY);
+ENUM(SCRUB_TYPE, uint8_t, PG_META = 0, DEEP_SHARD, SHALLOW_SHARD, DEEP_BLOB, SHALLOW_BLOB);
+
+class ScrubManager {
+public:
+    ScrubManager(HSHomeObject* homeobject);
+    ~ScrubManager();
+
+    // Disallow copy and move
+    ScrubManager(const ScrubManager&) = delete;
+    ScrubManager(ScrubManager&&) = delete;
+    ScrubManager& operator=(const ScrubManager&) = delete;
+    ScrubManager& operator=(ScrubManager&&) = delete;
+
+public:
+    inline static auto const pg_scrub_meta_name = std::string("PG_SCRUB");
+    static constexpr uint64_t blob_max_hash_len = 32;
+    using BlobHashArray = std::array< uint8_t, blob_max_hash_len >;
+    using chunk_id_t = homestore::chunk_num_t;
+    // TODO: persist this into metablk.
+    inline static atomic_uint64_t scrub_task_id{1};
+
+    // pg scrub superblk
+#pragma pack(1)
+    struct pg_scrub_superblk {
+        uint64_t last_deep_scrub_timestamp;
+        uint64_t last_shallow_scrub_timestamp;
+        pg_id_t pg_id;
+        static std::string name() { return pg_scrub_meta_name; }
+    };
+#pragma pack()
+
+    // scrub req
+public:
+    class base_scrub_req {
+    public:
+        base_scrub_req() = default;
+        base_scrub_req(uint64_t task_id, uint64_t req_id, int64_t scrub_lsn, peer_id_t issuer_peer_id, pg_id_t pg_id,
+                       bool is_deep_scrub) :
+                task_id(task_id),
+                req_id(req_id),
+                scrub_lsn(scrub_lsn),
+                issuer_peer_id(issuer_peer_id),
+                pg_id(pg_id),
+                is_deep_scrub_(is_deep_scrub) {}
+
+        bool is_deep_scrub() const { return is_deep_scrub_; }
+
+        virtual ~base_scrub_req() = default;
+        virtual SCRUB_TYPE get_scrub_type() const { return SCRUB_TYPE::PG_META; }
+        virtual flatbuffers::DetachedBuffer build_flat_buffer() const;
+        virtual bool load(uint8_t const* buf_ptr, const uint32_t buf_size);
+
+    public:
+        uint64_t task_id;
+        uint64_t req_id;
+        int64_t scrub_lsn;
+        peer_id_t issuer_peer_id;
+        pg_id_t pg_id;
+        bool is_deep_scrub_;
+    };
+
+    class blob_scrub_req : public base_scrub_req {
+    public:
+        blob_scrub_req() = default;
+        blob_scrub_req(uint64_t task_id, uint64_t req_id, int64_t scrub_lsn, peer_id_t issuer_peer_id, pg_id_t pg_id,
+                       blob_id_t start, blob_id_t end, bool is_deep_scrub) :
+                base_scrub_req(task_id, req_id, scrub_lsn, issuer_peer_id, pg_id, is_deep_scrub),
+                start(start),
+                end(end) {}
+        ~blob_scrub_req() = default;
+
+        SCRUB_TYPE get_scrub_type() const override {
+            return is_deep_scrub() ? SCRUB_TYPE::DEEP_BLOB : SCRUB_TYPE::SHALLOW_BLOB;
+        }
+        flatbuffers::DetachedBuffer build_flat_buffer() const override;
+        bool load(uint8_t const* buf_ptr, const uint32_t buf_size) override;
+
+    public:
+        blob_id_t start;
+        blob_id_t end;
+    };
+
+    class shard_scrub_req : public base_scrub_req {
+    public:
+        shard_scrub_req() = default;
+        shard_scrub_req(uint64_t task_id, uint64_t req_id, int64_t scrub_lsn, peer_id_t issuer_peer_id, pg_id_t pg_id,
+                        uint64_t start, uint64_t end, bool is_deep_scrub) :
+                base_scrub_req(task_id, req_id, scrub_lsn, issuer_peer_id, pg_id, is_deep_scrub),
+                start(start),
+                end(end) {}
+        ~shard_scrub_req() = default;
+
+        SCRUB_TYPE get_scrub_type() const override {
+            return is_deep_scrub() ? SCRUB_TYPE::DEEP_SHARD : SCRUB_TYPE::SHALLOW_SHARD;
+        }
+
+        flatbuffers::DetachedBuffer build_flat_buffer() const override;
+        bool load(uint8_t const* buf_ptr, const uint32_t buf_size) override;
+
+    public:
+        uint64_t start;
+        uint64_t end;
+    };
+
+    // scrub map, the scrub result of a specific scrub.
+public:
+    class BaseScrubMap {
+    public:
+        BaseScrubMap() = default;
+        BaseScrubMap(pg_id_t pg_id, uint64_t task_id, uint64_t req_id, int64_t scrub_lsn, peer_id_t peer_id) :
+                pg_id(pg_id), task_id(task_id), req_id(req_id), scrub_lsn(scrub_lsn), peer_id(peer_id) {}
+        virtual ~BaseScrubMap() = default;
+
+    public:
+        // convert the scrub map to io_blob_list for sending through data rpc
+        virtual flatbuffers::DetachedBuffer build_flat_buffer() const = 0;
+        virtual bool load(uint8_t const* buf_ptr, const uint32_t buf_size) = 0;
+        virtual SCRUB_TYPE get_scrub_type() const = 0;
+
+        bool match(std::shared_ptr< base_scrub_req > req) const {
+            if (!req) return false;
+
+            // TODO:: add more logic to check. for example, adding a random sha256 for each req in a scrub task.
+            return pg_id == req->pg_id && task_id == req->task_id && req_id == req->req_id &&
+                scrub_lsn == req->scrub_lsn && get_scrub_type() == req->get_scrub_type();
+        }
+
+    public:
+        pg_id_t pg_id;
+        uint64_t task_id;
+        uint64_t req_id;
+        int64_t scrub_lsn;
+        peer_id_t peer_id;
+    };
+
+    class DeepBlobScrubMap : public BaseScrubMap {
+    public:
+        DeepBlobScrubMap() = default;
+        DeepBlobScrubMap(pg_id_t pg_id, uint64_t task_id, uint64_t req_id, int64_t scrub_lsn, peer_id_t peer_id,
+                         blob_id_t start, blob_id_t end) :
+                BaseScrubMap(pg_id, task_id, req_id, scrub_lsn, peer_id), start(start), end(end) {}
+
+        flatbuffers::DetachedBuffer build_flat_buffer() const override;
+        bool load(uint8_t const* buf_ptr, const uint32_t buf_size) override;
+        SCRUB_TYPE get_scrub_type() const override { return SCRUB_TYPE::DEEP_BLOB; }
+
+        void add_blob_result(const BlobRoute& blob_route, std::variant< ScrubResult, BlobHashArray > scrub_result) {
+            blobs[blob_route] = scrub_result;
+        }
+
+    public:
+        blob_id_t start; // inclusive
+        blob_id_t end;   // exclusive
+        std::map< BlobRoute, std::variant< ScrubResult, BlobHashArray > > blobs;
+    };
+
+    class ShallowBlobScrubMap : public BaseScrubMap {
+    public:
+        ShallowBlobScrubMap() = default;
+        ShallowBlobScrubMap(pg_id_t pg_id, uint64_t task_id, uint64_t req_id, int64_t scrub_lsn, peer_id_t peer_id,
+                            blob_id_t start, blob_id_t end) :
+                BaseScrubMap(pg_id, task_id, req_id, scrub_lsn, peer_id), start(start), end(end) {}
+
+        flatbuffers::DetachedBuffer build_flat_buffer() const override;
+        bool load(uint8_t const* buf_ptr, const uint32_t buf_size) override;
+        SCRUB_TYPE get_scrub_type() const override { return SCRUB_TYPE::SHALLOW_BLOB; }
+
+        void add_blob(const BlobRoute& blob_route) { blobs.insert(blob_route); }
+
+    public:
+        blob_id_t start; // inclusive
+        blob_id_t end;   // exclusive
+        std::set< BlobRoute > blobs;
+    };
+
+    class ShallowShardScrubMap : public BaseScrubMap {
+    public:
+        ShallowShardScrubMap() = default;
+        ShallowShardScrubMap(pg_id_t pg_id, uint64_t task_id, uint64_t req_id, int64_t scrub_lsn, peer_id_t peer_id) :
+                BaseScrubMap(pg_id, task_id, req_id, scrub_lsn, peer_id) {}
+
+        flatbuffers::DetachedBuffer build_flat_buffer() const override;
+        bool load(uint8_t const* buf_ptr, const uint32_t buf_size) override;
+        SCRUB_TYPE get_scrub_type() const override { return SCRUB_TYPE::SHALLOW_SHARD; }
+
+        void add_shard(const shard_id_t& shard_id) { shards.insert(shard_id); }
+
+    public:
+        std::set< shard_id_t > shards;
+    };
+
+    class DeepShardScrubMap : public ShallowShardScrubMap {
+    public:
+        DeepShardScrubMap() = default;
+        DeepShardScrubMap(pg_id_t pg_id, uint64_t task_id, uint64_t req_id, int64_t scrub_lsn, peer_id_t peer_id) :
+                ShallowShardScrubMap(pg_id, task_id, req_id, scrub_lsn, peer_id) {}
+
+        flatbuffers::DetachedBuffer build_flat_buffer() const override;
+        bool load(uint8_t const* buf_ptr, const uint32_t buf_size) override;
+        SCRUB_TYPE get_scrub_type() const override { return SCRUB_TYPE::DEEP_SHARD; }
+
+        void add_problematic_shard(const shard_id_t& shard_id, ScrubResult scrub_result) {
+            problematic_shards[shard_id] = scrub_result;
+        }
+
+    public:
+        std::map< shard_id_t, ScrubResult > problematic_shards;
+    };
+
+    class PGMetaScrubMap : public BaseScrubMap {
+    public:
+        PGMetaScrubMap() = default;
+        PGMetaScrubMap(pg_id_t pg_id, uint64_t task_id, uint64_t req_id, int64_t scrub_lsn, peer_id_t peer_id) :
+                BaseScrubMap(pg_id, task_id, req_id, scrub_lsn, peer_id) {}
+
+        flatbuffers::DetachedBuffer build_flat_buffer() const override;
+        bool load(uint8_t const* buf_ptr, const uint32_t buf_size) override;
+        SCRUB_TYPE get_scrub_type() const override { return SCRUB_TYPE::PG_META; }
+
+    public:
+        ScrubResult pg_meta_scrub_result{ScrubResult::NONE};
+    };
+
+    // scrub report
+public:
+    // shallow scrub report for a pg
+    class ShallowScrubReport {
+    public:
+        ShallowScrubReport(pg_id_t pg_id) : pg_id_(pg_id) {}
+        virtual ~ShallowScrubReport() = default;
+
+    public:
+        pg_id_t get_pg_id() const { return pg_id_; }
+        void add_missing_shard(shard_id_t shard_id, peer_id_t peer_id) { missing_shard_ids[peer_id].insert(shard_id); }
+        void add_missing_blob(BlobRoute blob_route, peer_id_t peer_id) { missing_blobs[peer_id].insert(blob_route); }
+        const auto& get_missing_shard_ids() const { return missing_shard_ids; }
+        const auto& get_missing_blobs() const { return missing_blobs; }
+        virtual void merge(const std::map< peer_id_t, std::shared_ptr< BaseScrubMap > >& peer_sm_map);
+        virtual void print() const;
+
+    public:
+        std::map< peer_id_t, std::set< shard_id_t > > missing_shard_ids;
+        std::map< peer_id_t, std::set< BlobRoute > > missing_blobs;
+        pg_id_t pg_id_;
+    };
+
+    // deep scrub report for a pg
+    class DeepScrubReport : public ShallowScrubReport {
+    public:
+        DeepScrubReport(pg_id_t pg_id) : ShallowScrubReport(pg_id) {}
+        ~DeepScrubReport() = default;
+        void add_corrupted_blob(peer_id_t peer_id, BlobRoute blob_route, ScrubResult scrub_result) {
+            corrupted_blobs[peer_id][blob_route] = scrub_result;
+        }
+        void add_corrupted_shard(peer_id_t peer_id, shard_id_t shard_id, ScrubResult scrub_result) {
+            corrupted_shards[peer_id][shard_id] = scrub_result;
+        }
+        void add_inconsistent_blob(BlobRoute blob_route, peer_id_t peer_id, BlobHashArray hash) {
+            inconsistent_blobs[blob_route][peer_id] = hash;
+        }
+        void add_corrupted_pg_meta(peer_id_t peer_id, ScrubResult scrub_result) {
+            corrupted_pg_metas[peer_id] = scrub_result;
+        }
+
+        const auto& get_corrupted_blobs() const { return corrupted_blobs; }
+        const auto& get_corrupted_shards() const { return corrupted_shards; }
+        const auto& get_inconsistent_blobs() const { return inconsistent_blobs; }
+        const auto& get_corrupted_pg_metas() const { return corrupted_pg_metas; }
+        void merge(const std::map< peer_id_t, std::shared_ptr< BaseScrubMap > >& peer_sm_map) override;
+        void print() const override;
+
+    private:
+        std::map< peer_id_t, std::map< BlobRoute, ScrubResult > > corrupted_blobs;
+        std::map< peer_id_t, std::map< shard_id_t, ScrubResult > > corrupted_shards;
+        std::map< BlobRoute, std::map< peer_id_t, BlobHashArray > > inconsistent_blobs;
+        std::map< peer_id_t, ScrubResult > corrupted_pg_metas;
+    };
+
+    // scrub task that will be put into scrub task queue, and executed by scrub worker
+public:
+    struct scrub_task {
+        // Default constructor (required for std::regular)
+        scrub_task() :
+                task_id{0},
+                last_scrub_time{0},
+                pg_id{0},
+                is_deep_scrub{false},
+                triggered{SCRUB_TRIGGER_TYPE::PERIODICALLY} {}
+
+        // Main constructor
+        scrub_task(uint64_t last_scrub_time, pg_id_t pg_id, bool is_deep_scrub, SCRUB_TRIGGER_TYPE trigger_type,
+                   folly::Promise< std::shared_ptr< ShallowScrubReport > > promise) :
+                task_id{scrub_task_id.fetch_add(1)},
+                last_scrub_time{last_scrub_time},
+                pg_id{pg_id},
+                is_deep_scrub{is_deep_scrub},
+                triggered{trigger_type},
+                scrub_report_promise{
+                    std::make_shared< folly::Promise< std::shared_ptr< ShallowScrubReport > > >(std::move(promise))} {}
+
+        scrub_task(const scrub_task& other) = default;
+        scrub_task& operator=(const scrub_task& other) = default;
+        scrub_task(scrub_task&& other) noexcept = default;
+        scrub_task& operator=(scrub_task&& other) noexcept = default;
+
+        ~scrub_task() {
+            // make sure there is not any unfulfilled promise
+            if (scrub_report_promise && scrub_report_promise->isFulfilled() == false) {
+                scrub_report_promise->setValue(nullptr);
+            }
+        }
+
+        uint64_t task_id;
+        uint64_t last_scrub_time;
+        pg_id_t pg_id;
+        bool is_deep_scrub;
+        SCRUB_TRIGGER_TYPE triggered;
+        std::shared_ptr< folly::Promise< std::shared_ptr< ShallowScrubReport > > > scrub_report_promise;
+
+        // Equality operator (required for std::regular)
+        bool operator==(const scrub_task& other) const noexcept { return task_id == other.task_id; }
+
+        // the priority of `manually` is higher than `periodically`
+        bool operator<(const scrub_task& other) const noexcept {
+            using U = std::underlying_type_t< SCRUB_TRIGGER_TYPE >;
+            // First compare by trigger type (manually > periodically)
+            if (static_cast< U >(triggered) != static_cast< U >(other.triggered)) {
+                return static_cast< U >(triggered) < static_cast< U >(other.triggered);
+            }
+            // If same trigger type, compare by task_id (earlier tasks have higher priority)
+            return task_id > other.task_id;
+            // TODO:: add more logic to decide the priority between two tasks after we introduce more logic for
+            // automatic schedule, the following are some criteria we can consider:
+            /*
+            2. Time Since Last Scrub
+                - PGs that haven't been scrubbed in the longest time get higher priority
+                - Uses last_scrub_stamp timestamp to track
+                - Prevents starvation of individual PGs
+            3. Deep vs Shallow Scrub Deadline
+                - Deep scrub deadline (deep_scrub_interval, default 7 days)
+                - Shallow scrub deadline (scrub_interval_randomize_ratio, default 24 hours)
+                - PGs approaching their deadline get boosted priority
+            4. Load Balancing
+                - scrub_load_threshold prevents scrubbing during high I/O load
+                - scrub_min_interval and scrub_max_interval control frequency
+                - Time window restrictions (scrub_begin_hour, scrub_end_hour)
+            5. Concurrency Limits
+                - max_scrubs (default 1) limits concurrent scrubs per sm
+                - Prevents multiple PGs from overwhelming single sm
+            */
+        }
+    };
+
+    // PG Scrub Context, every pg being scrubbed has a scrub context to track its progress
+private:
+    class PGScrubContext {
+    public:
+        PGScrubContext(uint64_t task_id, std::unordered_set< peer_id_t > member_peer_ids) :
+                task_id(task_id), member_peer_ids_(member_peer_ids) {}
+        ~PGScrubContext() = default;
+
+    public:
+        bool add_scrub_map(std::shared_ptr< BaseScrubMap > bsm);
+        void reset_for_new_req();
+        bool wait_for_all_req_sms(std::chrono::milliseconds timeout);
+        std::vector< peer_id_t > get_peers_to_retry() const;
+        void cancel();
+        bool is_cancelled() const { return cancelled.load(); }
+
+    public:
+        uint64_t task_id{0};
+        std::unordered_set< peer_id_t > member_peer_ids_;
+        std::shared_ptr< base_scrub_req > current_req{nullptr};
+        atomic_uint64_t req_id{0};
+        mutable std::mutex mtx_;
+        std::map< peer_id_t, std::shared_ptr< BaseScrubMap > > peer_sm_map_;
+
+    private:
+        std::atomic_bool cancelled{false};
+        std::condition_variable cv_;
+    };
+
+    /*scrub scheduler*/
+public:
+    void start();
+    void stop();
+
+    folly::SemiFuture< std::shared_ptr< ShallowScrubReport > >
+    submit_scrub_task(const pg_id_t& pg_id, const bool is_deep, const bool force = false,
+                      SCRUB_TRIGGER_TYPE trigger_type = SCRUB_TRIGGER_TYPE::PERIODICALLY);
+
+    // cancel will only cancel a running scrub task. for those submitted but not running tasks in the queue, cancel will
+    // not remove them from the queue.
+    void cancel_scrub_task(const pg_id_t& pg_id);
+
+    bool add_scrub_map(const pg_id_t pg_id, std::shared_ptr< BaseScrubMap > bsm);
+    // new pg is created
+    void add_pg(const pg_id_t pg_id);
+    // new pg permanently removed
+    void remove_pg(const pg_id_t pg_id);
+    std::optional< pg_scrub_superblk > get_scrub_superblk(const pg_id_t pg_id) const;
+    void save_scrub_superblk(const pg_id_t pg_id, const bool is_deep_scrub, bool force_update = true);
+    void add_scrub_req(std::shared_ptr< base_scrub_req > req);
+
+    /*local scrub*/
+public:
+    std::shared_ptr< BaseScrubMap > local_scrub_blob(std::shared_ptr< blob_scrub_req > req);
+    std::shared_ptr< ShallowShardScrubMap > local_scrub_shard(std::shared_ptr< shard_scrub_req > req);
+    std::shared_ptr< PGMetaScrubMap > scrub_pg_meta(std::shared_ptr< base_scrub_req > req);
+
+    // handlers
+private:
+    void scan_pg_for_scrub();
+    void handle_pg_scrub_task(scrub_task task);
+
+    bool send_scrub_req_and_wait(pg_id_t pg_id, uint64_t task_id,
+                                 const std::unordered_set< peer_id_t >& all_member_peer_ids, const peer_id_t& my_uuid,
+                                 shared< homestore::ReplDev > pg_repl_dev, const sisl::io_blob_list_t& req_blob_list,
+                                 std::shared_ptr< PGScrubContext > scrub_ctx, uint32_t max_retries,
+                                 std::chrono::seconds timeout, const std::string& scrub_type_name);
+
+    bool is_eligible_for_deep_scrub(const pg_id_t& pg_id);
+    bool is_eligible_for_shallow_scrub(const pg_id_t& pg_id);
+    void on_pg_scrub_meta_blk_found(sisl::byte_view const& buf, void* meta_cookie,
+                                    std::vector< homestore::superblk< pg_scrub_superblk > >& stale_pg_scrub_sbs);
+    void handle_deep_pg_scrub_report(std::shared_ptr< DeepScrubReport > report);
+    void handle_shallow_pg_scrub_report(std::shared_ptr< ShallowScrubReport > report);
+    void handle_scrub_req(std::shared_ptr< base_scrub_req > req);
+    bool wait_for_scrub_lsn_commit(shared< homestore::ReplDev > repl_dev, int64_t scrub_lsn);
+
+private:
+    iomgr::timer_handle_t m_scrub_timer_hdl{iomgr::null_timer_handle};
+    iomgr::io_fiber_t m_scrub_timer_fiber{nullptr};
+    HSHomeObject* m_hs_home_object{nullptr};
+    MPMCPriorityQueue< scrub_task > m_scrub_task_queue;
+    std::shared_ptr< folly::IOThreadPoolExecutor > m_scrub_executor;
+    folly::ConcurrentHashMap< pg_id_t, std::shared_ptr< PGScrubContext > > m_pg_scrub_ctx_map;
+    folly::ConcurrentHashMap< pg_id_t, std::shared_ptr< homestore::superblk< pg_scrub_superblk > > > m_pg_scrub_sb_map;
+
+    std::shared_ptr< folly::IOThreadPoolExecutor > m_scrub_req_executor;
+};
+} // namespace homeobject
+
+// TODO:: consider the following scenarios and decide how we want to handle them in scrub manager
+// 1 baseline resync
+// 2 replace memeber
+// 3 permeantly destroy pg
+// 4 GC
\ No newline at end of file
diff --git a/src/lib/homestore_backend/tests/CMakeLists.txt b/src/lib/homestore_backend/tests/CMakeLists.txt
index a40812ab3..8eceb3d1f 100644
--- a/src/lib/homestore_backend/tests/CMakeLists.txt
+++ b/src/lib/homestore_backend/tests/CMakeLists.txt
@@ -30,3 +30,12 @@ add_test(NAME HeapChunkSelectorTest COMMAND test_heap_chunk_selector)
 add_library(homestore_tests_gc OBJECT)
 target_sources(homestore_tests_gc PRIVATE test_homestore_backend.cpp hs_gc_tests.cpp)
 target_link_libraries(homestore_tests_gc homeobject_homestore ${COMMON_TEST_DEPS})
+
+add_library(homestore_tests_scrubber OBJECT)
+target_sources(homestore_tests_scrubber PRIVATE test_homestore_backend.cpp hs_scrubber_tests.cpp)
+target_link_libraries(homestore_tests_scrubber homeobject_homestore ${COMMON_TEST_DEPS})
+
+add_executable(test_mpmc_priority_queue)
+target_sources(test_mpmc_priority_queue PRIVATE test_mpmc_priority_queue.cpp)
+target_link_libraries(test_mpmc_priority_queue homeobject_homestore ${COMMON_TEST_DEPS})
+add_test(NAME MPMCPriorityQueueTest COMMAND test_mpmc_priority_queue)
diff --git a/src/lib/homestore_backend/tests/hs_scrubber_tests.cpp b/src/lib/homestore_backend/tests/hs_scrubber_tests.cpp
new file mode 100644
index 000000000..ab7636180
--- /dev/null
+++ b/src/lib/homestore_backend/tests/hs_scrubber_tests.cpp
@@ -0,0 +1,569 @@
+#include "homeobj_fixture.hpp"
+#include <homestore/blk.h>
+#include <homestore/btree/btree_req.hpp>
+#include <homestore/btree/btree_kv.hpp>
+#include <random>
+#include "lib/homestore_backend/hs_homeobject.hpp"
+
+using namespace homeobject;
+using BlobHeader = HSHomeObject::BlobHeader;
+
+// Helper function to delete a blob from index table
+static void delete_blob_from_index(shared< homestore::IndexTable< BlobRouteKey, BlobRouteValue > > pg_index_table,
+                                   shard_id_t shard_id, blob_id_t blob_id) {
+    BlobRouteKey blob_key{BlobRoute{shard_id, blob_id}};
+    BlobRouteValue out_value;
+    homestore::BtreeSingleRemoveRequest remove_req{&blob_key, &out_value};
+    auto status = pg_index_table->remove(remove_req);
+    ASSERT_TRUE(status == homestore::btree_status_t::success) << "Failed to remove blob key from index table";
+}
+
+// Helper function to corrupt a blob's data
+static void corrupt_blob_data(shared< homestore::IndexTable< BlobRouteKey, BlobRouteValue > > pg_index_table,
+                              shard_id_t shard_id, blob_id_t blob_id) {
+    auto& data_service = homestore::data_service();
+    const auto blk_size = data_service.get_blk_size();
+
+    BlobRouteKey blob_key{BlobRoute{shard_id, blob_id}};
+    BlobRouteValue out_value;
+    homestore::BtreeSingleGetRequest blob_get_req{&blob_key, &out_value};
+
+    auto status = pg_index_table->get(blob_get_req);
+    ASSERT_TRUE(status == homestore::btree_status_t::success) << "Failed to get blob key from index table";
+
+    auto pbas = out_value.pbas();
+    auto total_size = pbas.blk_count() * blk_size;
+    sisl::sg_list data_sgs;
+    data_sgs.size = total_size;
+    data_sgs.iovs.emplace_back(iovec{.iov_base = iomanager.iobuf_alloc(blk_size, total_size), .iov_len = total_size});
+
+    data_service.async_read(pbas, data_sgs, total_size)
+        .thenValue([&](auto&& err) {
+            if (err) {
+                LOGE("Failed to read blob data, blob_id={}, err={}", blob_id, err.message());
+                iomanager.iobuf_free(reinterpret_cast< uint8_t* >(data_sgs.iovs[0].iov_base));
+                throw std::runtime_error(fmt::format("Failed to read blob data: {}", err.message()));
+            }
+
+            auto* data_ptr = reinterpret_cast< uint8_t* >(data_sgs.iovs[0].iov_base);
+            for (size_t i = 0; i <= data_sgs.iovs[0].iov_len / 2; i++) {
+                data_ptr[i] ^= 0xFF; // Flip first half of data
+            }
+
+            return data_service.async_write(data_sgs, pbas).thenValue([data_sgs = std::move(data_sgs)](auto&& err) {
+                ASSERT_FALSE(err) << "Failed to write corrupted blob data";
+                iomanager.iobuf_free(reinterpret_cast< uint8_t* >(data_sgs.iovs[0].iov_base));
+            });
+        })
+        .get();
+}
+
+// Helper function to make a blob inconsistent (valid but different hash)
+static void make_blob_inconsistent(shared< homestore::IndexTable< BlobRouteKey, BlobRouteValue > > pg_index_table,
+                                   shard_id_t shard_id, blob_id_t blob_id, HSHomeObject* obj_inst) {
+    auto& data_service = homestore::data_service();
+    const auto blk_size = data_service.get_blk_size();
+
+    BlobRouteKey blob_key{BlobRoute{shard_id, blob_id}};
+    BlobRouteValue out_value;
+    homestore::BtreeSingleGetRequest blob_get_req{&blob_key, &out_value};
+
+    auto status = pg_index_table->get(blob_get_req);
+    ASSERT_TRUE(status == homestore::btree_status_t::success) << "Failed to get blob key from index table";
+
+    auto pbas = out_value.pbas();
+    auto total_size = pbas.blk_count() * blk_size;
+    sisl::sg_list data_sgs;
+    data_sgs.size = total_size;
+    data_sgs.iovs.emplace_back(iovec{.iov_base = iomanager.iobuf_alloc(blk_size, total_size), .iov_len = total_size});
+
+    data_service.async_read(pbas, data_sgs, total_size)
+        .thenValue([&](auto&& err) {
+            if (err) {
+                LOGE("Failed to read blob data, blob_id={}, err={}", blob_id, err.message());
+                iomanager.iobuf_free(reinterpret_cast< uint8_t* >(data_sgs.iovs[0].iov_base));
+                throw std::runtime_error(fmt::format("Failed to read blob data: {}", err.message()));
+            }
+
+            // Modify blob data and recompute valid hash
+            uint8_t* read_buf = r_cast< uint8_t* >(data_sgs.iovs[0].iov_base);
+            auto header = r_cast< BlobHeader* >(read_buf);
+            uint8_t* blob_bytes = read_buf + header->data_offset;
+
+            std::mt19937 rng{std::random_device{}()};
+            std::uniform_int_distribution< int > dist(0, 255);
+
+            for (size_t i = 0; i <= header->blob_size / 2; i++) {
+                blob_bytes[i] ^= static_cast< uint8_t >(dist(rng));
+            }
+
+            std::string user_key = header->user_key_size
+                ? std::string((const char*)(read_buf + sizeof(BlobHeader)), (size_t)header->user_key_size)
+                : std::string{};
+
+            uint8_t computed_hash[BlobHeader::blob_max_hash_len]{};
+            obj_inst->compute_blob_payload_hash(header->hash_algorithm, blob_bytes, header->blob_size, computed_hash,
+                                                BlobHeader::blob_max_hash_len);
+
+            std::memcpy(header->hash, computed_hash, BlobHeader::blob_max_hash_len);
+            std::memset(header->header_hash, 0, BlobHeader::blob_max_hash_len);
+            uint32_t computed_header_hash = crc32_ieee(0, (uint8_t*)header, sizeof(BlobHeader));
+            std::memcpy(header->header_hash, &computed_header_hash, sizeof(uint32_t));
+
+            if (!obj_inst->verify_blob(data_sgs.iovs[0].iov_base, header->shard_id, header->blob_id)) {
+                LOGE("Blob verification failed after modification, blob_id={}", blob_id);
+                iomanager.iobuf_free(reinterpret_cast< uint8_t* >(data_sgs.iovs[0].iov_base));
+                throw std::runtime_error(fmt::format("Blob verification failed for blob_id={}", blob_id));
+            }
+
+            return data_service.async_write(data_sgs, pbas).thenValue([data_sgs = std::move(data_sgs)](auto&& err) {
+                ASSERT_FALSE(err) << "Failed to write inconsistent blob data";
+                iomanager.iobuf_free(reinterpret_cast< uint8_t* >(data_sgs.iovs[0].iov_base));
+            });
+        })
+        .get();
+}
+
+// Helper function to verify missing blobs in scrub report
+static void verify_missing_blobs(const ScrubManager::DeepScrubReport* report, const peer_id_t& peer_id,
+                                 const BlobRoute& expected_blob) {
+    const auto& missing_blobs = report->get_missing_blobs();
+    auto it = missing_blobs.find(peer_id);
+    EXPECT_TRUE(it != missing_blobs.end()) << "Missing blob should be reported for peer_id=" << peer_id;
+    if (it != missing_blobs.end()) {
+        EXPECT_TRUE(it->second.count(expected_blob) == 1) << "Expected missing blob should be in the report";
+    }
+}
+
+// Helper function to verify corrupted blobs in scrub report
+static void verify_corrupted_blobs(const ScrubManager::DeepScrubReport* report, const peer_id_t& peer_id,
+                                   const BlobRoute& expected_blob) {
+    const auto& corrupted_blobs = report->get_corrupted_blobs();
+    auto it = corrupted_blobs.find(peer_id);
+    EXPECT_TRUE(it != corrupted_blobs.end()) << "Corrupted blob should be reported for peer_id=" << peer_id;
+    if (it != corrupted_blobs.end()) {
+        EXPECT_TRUE(it->second.count(expected_blob) == 1) << "Expected corrupted blob should be in the report";
+    }
+}
+
+// Helper function to verify missing shards in scrub report
+static void verify_missing_shards(const ScrubManager::DeepScrubReport* report, const peer_id_t& peer_id,
+                                  shard_id_t expected_shard) {
+    const auto& missing_shards = report->get_missing_shard_ids();
+    auto it = missing_shards.find(peer_id);
+    EXPECT_TRUE(it != missing_shards.end()) << "Missing shard should be reported for peer_id=" << peer_id;
+    if (it != missing_shards.end()) {
+        EXPECT_TRUE(it->second.count(expected_shard) == 1) << "Expected missing shard should be in the report";
+    }
+}
+
+TEST_F(HomeObjectFixture, BasicScrubTest) {
+    const pg_id_t pg_id = 1;
+    create_pg(pg_id);
+    auto scrub_mgr = _obj_inst->scrub_manager();
+
+    // empty pg scrub should report no issues
+    run_on_pg_leader(pg_id, [&]() {
+        // Deep scrub on empty PG should complete without errors
+        auto scrub_report =
+            scrub_mgr->submit_scrub_task(pg_id, true /* is_deep */, false /* force */, SCRUB_TRIGGER_TYPE::MANUALLY)
+                .get();
+
+        ASSERT_NE(scrub_report, nullptr) << "Deep scrub report should not be null for empty PG";
+        auto deep_scrub_report = std::dynamic_pointer_cast< ScrubManager::DeepScrubReport >(scrub_report);
+        ASSERT_NE(deep_scrub_report, nullptr) << "Should be DeepScrubReport";
+
+        // Empty PG should have no issues
+        EXPECT_TRUE(deep_scrub_report->get_missing_blobs().empty()) << "Empty PG should have no missing blobs";
+        EXPECT_TRUE(deep_scrub_report->get_missing_shard_ids().empty()) << "Empty PG should have no missing shards";
+        EXPECT_TRUE(deep_scrub_report->get_corrupted_blobs().empty()) << "Empty PG should have no corrupted blobs";
+        EXPECT_TRUE(deep_scrub_report->get_corrupted_shards().empty()) << "Empty PG should have no corrupted shards";
+        EXPECT_TRUE(deep_scrub_report->get_inconsistent_blobs().empty())
+            << "Empty PG should have no inconsistent blobs";
+
+        // Shallow scrub on empty PG
+        scrub_report =
+            scrub_mgr->submit_scrub_task(pg_id, false /* is_deep */, false /* force */, SCRUB_TRIGGER_TYPE::MANUALLY)
+                .get();
+
+        ASSERT_NE(scrub_report, nullptr) << "Shallow scrub report should not be null for empty PG";
+        auto shallow_scrub_report = std::dynamic_pointer_cast< ScrubManager::ShallowScrubReport >(scrub_report);
+        ASSERT_NE(shallow_scrub_report, nullptr) << "Should be ShallowScrubReport";
+
+        EXPECT_TRUE(shallow_scrub_report->get_missing_blobs().empty())
+            << "Empty PG should have no missing blobs in shallow scrub";
+        EXPECT_TRUE(shallow_scrub_report->get_missing_shard_ids().empty())
+            << "Empty PG should have no missing shards in shallow scrub";
+    });
+
+    const uint64_t num_shards = SISL_OPTIONS["num_shards"].as< uint64_t >();
+    const uint64_t num_blobs_per_shard = SISL_OPTIONS["num_blobs"].as< uint64_t >();
+    const uint64_t shard_size = 64 * Mi;
+
+    std::map< pg_id_t, std::vector< shard_id_t > > pg_shard_id_vec;
+    std::map< pg_id_t, blob_id_t > pg_blob_id;
+    pg_blob_id[pg_id] = 0;
+
+    std::map< shard_id_t, std::map< blob_id_t, uint64_t > > shard_blob_ids_map;
+
+    // Create multiple shards
+    for (uint64_t i = 0; i < num_shards; i++) {
+        auto shard_info = create_shard(pg_id, shard_size, "shard meta");
+        pg_shard_id_vec[pg_id].push_back(shard_info.id);
+        LOGINFO("Created pg={} shard={} (shard {}/{})", pg_id, shard_info.id, i + 1, num_shards);
+    }
+
+    // pg with empty shard scrub should report no issues
+    run_on_pg_leader(pg_id, [&]() {
+        // Deep scrub on empty PG should complete without errors
+        auto scrub_report =
+            scrub_mgr->submit_scrub_task(pg_id, true /* is_deep */, false /* force */, SCRUB_TRIGGER_TYPE::MANUALLY)
+                .get();
+
+        ASSERT_NE(scrub_report, nullptr) << "Deep scrub report should not be null for empty PG";
+        auto deep_scrub_report = std::dynamic_pointer_cast< ScrubManager::DeepScrubReport >(scrub_report);
+        ASSERT_NE(deep_scrub_report, nullptr) << "Should be DeepScrubReport";
+
+        // Empty PG should have no issues
+        EXPECT_TRUE(deep_scrub_report->get_missing_blobs().empty()) << "Empty PG should have no missing blobs";
+        EXPECT_TRUE(deep_scrub_report->get_missing_shard_ids().empty()) << "Empty PG should have no missing shards";
+        EXPECT_TRUE(deep_scrub_report->get_corrupted_blobs().empty()) << "Empty PG should have no corrupted blobs";
+        EXPECT_TRUE(deep_scrub_report->get_corrupted_shards().empty()) << "Empty PG should have no corrupted shards";
+        EXPECT_TRUE(deep_scrub_report->get_inconsistent_blobs().empty())
+            << "Empty PG should have no inconsistent blobs";
+
+        // Shallow scrub on empty PG
+        scrub_report =
+            scrub_mgr->submit_scrub_task(pg_id, false /* is_deep */, false /* force */, SCRUB_TRIGGER_TYPE::MANUALLY)
+                .get();
+
+        ASSERT_NE(scrub_report, nullptr) << "Shallow scrub report should not be null for empty PG";
+        auto shallow_scrub_report = std::dynamic_pointer_cast< ScrubManager::ShallowScrubReport >(scrub_report);
+        ASSERT_NE(shallow_scrub_report, nullptr) << "Should be ShallowScrubReport";
+
+        EXPECT_TRUE(shallow_scrub_report->get_missing_blobs().empty())
+            << "Empty PG should have no missing blobs in shallow scrub";
+        EXPECT_TRUE(shallow_scrub_report->get_missing_shard_ids().empty())
+            << "Empty PG should have no missing shards in shallow scrub";
+    });
+
+    g_helper->sync();
+
+    // Create blobs in all shards
+    shard_blob_ids_map = put_blobs(pg_shard_id_vec, num_blobs_per_shard, pg_blob_id);
+    LOGINFO("Created {} blobs per shard, total {} blobs", num_blobs_per_shard, num_shards * num_blobs_per_shard);
+
+    // Verify blobs were created
+    verify_get_blob(pg_shard_id_vec, num_blobs_per_shard);
+
+    // everything is healthy, deep scrub should report no issues.
+    run_on_pg_leader(pg_id, [&]() {
+        // do deep scrub
+        auto scrub_report =
+            scrub_mgr->submit_scrub_task(pg_id, true /* is_deep */, false /* force */, SCRUB_TRIGGER_TYPE::MANUALLY)
+                .get();
+
+        ASSERT_NE(scrub_report, nullptr) << "Deep scrub report should not be null";
+        auto deep_scrub_report = std::dynamic_pointer_cast< ScrubManager::DeepScrubReport >(scrub_report);
+        ASSERT_NE(deep_scrub_report, nullptr) << "Should be DeepScrubReport";
+
+        EXPECT_TRUE(deep_scrub_report->get_missing_blobs().empty()) << "No blobs should be missing in normal case";
+        EXPECT_TRUE(deep_scrub_report->get_missing_shard_ids().empty()) << "No shards should be missing in normal case";
+        EXPECT_TRUE(deep_scrub_report->get_corrupted_blobs().empty()) << "No blobs should be corrupted in normal case";
+        EXPECT_TRUE(deep_scrub_report->get_corrupted_shards().empty())
+            << "No shards should be corrupted in normal case";
+        EXPECT_TRUE(deep_scrub_report->get_inconsistent_blobs().empty())
+            << "No blobs should be inconsistent in normal case";
+        EXPECT_TRUE(deep_scrub_report->get_corrupted_pg_metas().empty())
+            << "No PG metas should be corrupted in normal case";
+
+        // do shallow scrub
+        scrub_report =
+            scrub_mgr->submit_scrub_task(pg_id, false, false /* force */, SCRUB_TRIGGER_TYPE::MANUALLY).get();
+        ASSERT_NE(scrub_report, nullptr) << "Shallow scrub report should not be null";
+        auto shallow_scrub_report = std::dynamic_pointer_cast< ScrubManager::ShallowScrubReport >(scrub_report);
+        ASSERT_NE(shallow_scrub_report, nullptr) << "Should be ShallowScrubReport";
+        EXPECT_TRUE(shallow_scrub_report->get_missing_blobs().empty()) << "No blobs should be missing in normal case";
+        EXPECT_TRUE(shallow_scrub_report->get_missing_shard_ids().empty())
+            << "No shards should be missing in normal case";
+    });
+
+    g_helper->sync();
+    const auto hs_pg = _obj_inst->get_hs_pg(pg_id);
+    ASSERT_TRUE(hs_pg) << "PG should exist for pg_id=" << pg_id;
+
+    const auto missing_shard_id = shard_blob_ids_map.begin()->first;
+    auto it = shard_blob_ids_map[missing_shard_id].begin();
+    const auto missing_blob_id = it->first;
+    const auto corrupted_blob_id = (++it)->first;
+    const auto inconsistent_blob_id = (++it)->first;
+
+    // TODO:: add corruptted shard and corrupted pg meta after we have the implementation for corrupting them.
+
+    // Corrupt data on followers
+    run_on_pg_follower(pg_id, [&]() {
+        auto& pg_index_table = hs_pg->index_table_;
+
+        // 1. Remove missing_shard_id to simulate missing shard
+        _obj_inst->delete_shard_from_map(missing_shard_id);
+
+        // 2. Delete missing_blob_id from pg_index table
+        delete_blob_from_index(pg_index_table, missing_shard_id, missing_blob_id);
+
+        // 3. Make corrupted_blob_id corrupted
+        corrupt_blob_data(pg_index_table, missing_shard_id, corrupted_blob_id);
+
+        // 4. Make inconsistent_blob_id inconsistent (valid but different hash)
+        make_blob_inconsistent(pg_index_table, missing_shard_id, inconsistent_blob_id, _obj_inst.get());
+    });
+
+    g_helper->sync();
+
+    run_on_pg_leader(pg_id, [&]() {
+        // do deep scrub and check the scrub report
+        auto scrub_report =
+            scrub_mgr->submit_scrub_task(pg_id, true /* is_deep */, false /* force */, SCRUB_TRIGGER_TYPE::MANUALLY)
+                .get();
+
+        ASSERT_NE(scrub_report, nullptr) << "Deep scrub report should not be null";
+        auto deep_scrub_report = std::dynamic_pointer_cast< ScrubManager::DeepScrubReport >(scrub_report);
+        ASSERT_NE(deep_scrub_report, nullptr) << "Should be DeepScrubReport";
+        deep_scrub_report->print();
+
+        const auto& members = (hs_pg->pg_info_).members;
+        std::set< peer_id_t > follower_peer_ids;
+        const auto& leader_uuid = _obj_inst->our_uuid();
+        for (const auto& member : members) {
+            if (member.id == leader_uuid) { continue; }
+            follower_peer_ids.insert(member.id);
+        }
+
+        // Verify missing blobs, missing shards, and corrupted blobs for all followers
+        for (const auto& peer_id : follower_peer_ids) {
+            verify_missing_blobs(deep_scrub_report.get(), peer_id, BlobRoute{missing_shard_id, missing_blob_id});
+            verify_missing_shards(deep_scrub_report.get(), peer_id, missing_shard_id);
+            verify_corrupted_blobs(deep_scrub_report.get(), peer_id, BlobRoute{missing_shard_id, corrupted_blob_id});
+        }
+
+        const auto inconsistent_blobs = deep_scrub_report->get_inconsistent_blobs();
+        EXPECT_TRUE(inconsistent_blobs.size() == 1)
+            << "Inconsistent blob should be reported in deep scrub report for one of the followers";
+        const auto it = inconsistent_blobs.find(BlobRoute{missing_shard_id, inconsistent_blob_id});
+        EXPECT_TRUE(it != inconsistent_blobs.end())
+            << "The inconsistent blob should be reported in deep scrub report for blob_id=" << inconsistent_blob_id;
+        auto& inconsistent_blob_peers = it->second;
+
+        // inconsistent_blob_peers should contains all the peers.
+        EXPECT_TRUE(inconsistent_blob_peers.size() == follower_peer_ids.size() + 1)
+            << "Inconsistent blob should be reported in deep scrub report for all followers";
+        for (const auto& peer_id : follower_peer_ids) {
+            EXPECT_TRUE(inconsistent_blob_peers.count(peer_id) == 1)
+                << "The inconsistent blob should be reported in deep scrub report for peer_id=" << peer_id;
+        }
+        EXPECT_TRUE(inconsistent_blob_peers.count(leader_uuid) == 1)
+            << "The inconsistent blob should be reported in deep scrub report for leader peer_id=" << leader_uuid;
+
+        // do shallow scrub， shallow scrub can only find missing blob/shard
+        scrub_report =
+            scrub_mgr->submit_scrub_task(pg_id, false, false /* force */, SCRUB_TRIGGER_TYPE::MANUALLY).get();
+        ASSERT_NE(scrub_report, nullptr) << "Shallow scrub report should not be null";
+        auto shallow_scrub_report = std::dynamic_pointer_cast< ScrubManager::ShallowScrubReport >(scrub_report);
+        ASSERT_NE(shallow_scrub_report, nullptr) << "Should be ShallowScrubReport";
+        shallow_scrub_report->print();
+
+        auto miss_blob_in_shallow_report = shallow_scrub_report->get_missing_blobs();
+        EXPECT_TRUE(miss_blob_in_shallow_report.size() == follower_peer_ids.size())
+            << "Missing blob should be reported in shallow scrub report for all followers";
+        for (const auto& peer_id : follower_peer_ids) {
+            auto it = miss_blob_in_shallow_report.find(peer_id);
+            EXPECT_TRUE(it != miss_blob_in_shallow_report.end())
+                << "Missing blob should be reported in shallow scrub report for peer_id=" << peer_id;
+            EXPECT_TRUE(it->second.size() == 1)
+                << "There should be one missing blob for each peer in shallow scrub report";
+            EXPECT_TRUE(it->second.count(BlobRoute{missing_shard_id, missing_blob_id}) == 1)
+                << "The missing blob should be reported in shallow scrub report for peer_id=" << peer_id;
+        }
+
+        // peers that have the missing shard should be reported in the shallow scrub report.
+        const auto missing_shards_in_shallow_report = shallow_scrub_report->get_missing_shard_ids();
+        EXPECT_TRUE(missing_shards_in_shallow_report.size() == follower_peer_ids.size())
+            << "Missing shard should be reported in shallow scrub report for all followers";
+        for (const auto& peer_id : follower_peer_ids) {
+            auto it = missing_shards_in_shallow_report.find(peer_id);
+            EXPECT_TRUE(it != missing_shards_in_shallow_report.end())
+                << "Missing shard should be reported in shallow scrub report for peer_id=" << peer_id;
+            EXPECT_TRUE(it->second.size() == 1)
+                << "There should be one missing shard for each peer in shallow scrub report";
+            EXPECT_TRUE(it->second.count(missing_shard_id) == 1)
+                << "The missing shard should be reported in shallow scrub report for peer_id=" << peer_id;
+        }
+    });
+
+    g_helper->sync();
+
+    // Test case for leader missing/corrupted
+    LOGINFO("Starting leader missing/corrupted test case");
+
+    // Get new blob ids for leader corruption test
+    auto& leader_shard_blobs = shard_blob_ids_map[missing_shard_id];
+    auto leader_it = leader_shard_blobs.begin();
+    std::advance(leader_it, 3); // Skip the first 3 blobs already used
+    const auto leader_missing_blob_id = leader_it->first;
+    const auto leader_corrupted_blob_id = (++leader_it)->first;
+    const auto leader_inconsistent_blob_id = (++leader_it)->first;
+
+    // Corrupt data on leader
+    run_on_pg_leader(pg_id, [&]() {
+        auto& pg_index_table = hs_pg->index_table_;
+
+        // 1. Delete leader_missing_blob_id from pg_index table on leader
+        delete_blob_from_index(pg_index_table, missing_shard_id, leader_missing_blob_id);
+        LOGINFO("Deleted blob {} from leader index table", leader_missing_blob_id);
+
+        // 2. Make leader_corrupted_blob_id corrupted on leader
+        corrupt_blob_data(pg_index_table, missing_shard_id, leader_corrupted_blob_id);
+        LOGINFO("Corrupted blob {} on leader", leader_corrupted_blob_id);
+
+        // 3. Make leader_inconsistent_blob_id inconsistent on leader
+        make_blob_inconsistent(pg_index_table, missing_shard_id, leader_inconsistent_blob_id, _obj_inst.get());
+        LOGINFO("Made blob {} inconsistent on leader", leader_inconsistent_blob_id);
+    });
+
+    g_helper->sync();
+
+    // Run scrub and verify both leader and follower corruptions are detected
+    run_on_pg_leader(pg_id, [&]() {
+        LOGINFO("Running deep scrub to detect both leader and follower corruptions");
+        auto scrub_report =
+            scrub_mgr->submit_scrub_task(pg_id, true /* is_deep */, false /* force */, SCRUB_TRIGGER_TYPE::MANUALLY)
+                .get();
+
+        ASSERT_NE(scrub_report, nullptr) << "Deep scrub report should not be null";
+        auto deep_scrub_report = std::dynamic_pointer_cast< ScrubManager::DeepScrubReport >(scrub_report);
+        ASSERT_NE(deep_scrub_report, nullptr) << "Should be DeepScrubReport";
+        deep_scrub_report->print();
+
+        const auto& leader_uuid = _obj_inst->our_uuid();
+        const auto& members = (hs_pg->pg_info_).members;
+        std::set< peer_id_t > follower_peer_ids;
+        for (const auto& member : members) {
+            if (member.id != leader_uuid) { follower_peer_ids.insert(member.id); }
+        }
+
+        // ========== Verify Missing Blobs ==========
+        LOGINFO("Verifying missing blobs detection");
+        verify_missing_blobs(deep_scrub_report.get(), leader_uuid, BlobRoute{missing_shard_id, leader_missing_blob_id});
+        for (const auto& peer_id : follower_peer_ids) {
+            verify_missing_blobs(deep_scrub_report.get(), peer_id, BlobRoute{missing_shard_id, missing_blob_id});
+        }
+
+        // ========== Verify Missing Shards ==========
+        LOGINFO("Verifying missing shards detection");
+        for (const auto& peer_id : follower_peer_ids) {
+            verify_missing_shards(deep_scrub_report.get(), peer_id, missing_shard_id);
+        }
+
+        // ========== Verify Corrupted Blobs ==========
+        LOGINFO("Verifying corrupted blobs detection");
+        verify_corrupted_blobs(deep_scrub_report.get(), leader_uuid,
+                               BlobRoute{missing_shard_id, leader_corrupted_blob_id});
+        for (const auto& peer_id : follower_peer_ids) {
+            verify_corrupted_blobs(deep_scrub_report.get(), peer_id, BlobRoute{missing_shard_id, corrupted_blob_id});
+        }
+
+        // ========== Verify Inconsistent Blobs ==========
+        const auto inconsistent_blobs = deep_scrub_report->get_inconsistent_blobs();
+        LOGINFO("Verifying inconsistent blobs detection, inconsistent_blobs.size()={}", inconsistent_blobs.size());
+
+        // Should have 2 inconsistent blobs: one from follower test, one from leader test
+        EXPECT_TRUE(inconsistent_blobs.size() == 2)
+            << "Should have 2 inconsistent blobs (1 from follower, 1 from leader)";
+
+        // Verify leader's inconsistent blob
+        auto leader_inconsistent_it = inconsistent_blobs.find(BlobRoute{missing_shard_id, leader_inconsistent_blob_id});
+        EXPECT_TRUE(leader_inconsistent_it != inconsistent_blobs.end())
+            << "The leader's inconsistent blob should be reported in deep scrub report";
+        if (leader_inconsistent_it != inconsistent_blobs.end()) {
+            auto& inconsistent_blob_peers = leader_inconsistent_it->second;
+            // All peers including leader should be in the inconsistent blob report
+            EXPECT_TRUE(inconsistent_blob_peers.size() == follower_peer_ids.size() + 1)
+                << "Leader's inconsistent blob should be reported for all peers including leader";
+            EXPECT_TRUE(inconsistent_blob_peers.count(leader_uuid) == 1)
+                << "Leader should be in the inconsistent blob peers";
+            for (const auto& peer_id : follower_peer_ids) {
+                EXPECT_TRUE(inconsistent_blob_peers.count(peer_id) == 1)
+                    << "Follower peer_id=" << peer_id << " should be in leader's inconsistent blob peers";
+            }
+        }
+
+        // Verify follower's inconsistent blob (from earlier test)
+        auto follower_inconsistent_it = inconsistent_blobs.find(BlobRoute{missing_shard_id, inconsistent_blob_id});
+        EXPECT_TRUE(follower_inconsistent_it != inconsistent_blobs.end())
+            << "The follower's inconsistent blob should be reported in deep scrub report";
+        if (follower_inconsistent_it != inconsistent_blobs.end()) {
+            auto& inconsistent_blob_peers = follower_inconsistent_it->second;
+            // All peers should be in the inconsistent blob report
+            EXPECT_TRUE(inconsistent_blob_peers.size() == follower_peer_ids.size() + 1)
+                << "Follower's inconsistent blob should be reported for all peers";
+            EXPECT_TRUE(inconsistent_blob_peers.count(leader_uuid) == 1)
+                << "Leader should be in follower's inconsistent blob peers";
+            for (const auto& peer_id : follower_peer_ids) {
+                EXPECT_TRUE(inconsistent_blob_peers.count(peer_id) == 1)
+                    << "Follower peer_id=" << peer_id << " should be in follower's inconsistent blob peers";
+            }
+        }
+
+        LOGINFO("Leader and follower corruption test completed successfully");
+    });
+
+    g_helper->sync();
+}
+
+// Test scrub superblock persistence across deep and shallow scrubs
+TEST_F(HomeObjectFixture, ScrubSuperblockPersistenceTest) {
+    const pg_id_t pg_id = 1;
+    create_pg(pg_id);
+
+    const uint64_t shard_size = 64 * Mi;
+    create_shard(pg_id, shard_size, "shard_meta");
+
+    auto scrub_mgr = _obj_inst->scrub_manager();
+
+    run_on_pg_leader(pg_id, [&]() {
+        // Get initial scrub superblock (should be newly created)
+        auto initial_sb = scrub_mgr->get_scrub_superblk(pg_id);
+        ASSERT_TRUE(initial_sb.has_value()) << "Should have scrub superblock";
+
+        auto initial_deep_scrub_time = initial_sb->last_deep_scrub_timestamp;
+        auto initial_shallow_scrub_time = initial_sb->last_shallow_scrub_timestamp;
+
+        // Give some time to ensure timestamps will be different
+        std::this_thread::sleep_for(std::chrono::seconds(2));
+
+        // Run a deep scrub
+        scrub_mgr->submit_scrub_task(pg_id, true /* is_deep */, false /* force */, SCRUB_TRIGGER_TYPE::MANUALLY).get();
+
+        // Check that deep scrub timestamp updated
+        auto after_deep_sb = scrub_mgr->get_scrub_superblk(pg_id);
+        ASSERT_TRUE(after_deep_sb.has_value());
+        EXPECT_GT(after_deep_sb->last_deep_scrub_timestamp, initial_deep_scrub_time)
+            << "Deep scrub timestamp should be updated";
+        EXPECT_EQ(after_deep_sb->last_shallow_scrub_timestamp, initial_shallow_scrub_time)
+            << "Shallow scrub timestamp should not change after deep scrub";
+
+        std::this_thread::sleep_for(std::chrono::seconds(2));
+
+        // Run a shallow scrub
+        scrub_mgr->submit_scrub_task(pg_id, false /* is_deep */, false /* force */, SCRUB_TRIGGER_TYPE::MANUALLY).get();
+
+        // Check that shallow scrub timestamp updated
+        auto after_shallow_sb = scrub_mgr->get_scrub_superblk(pg_id);
+        ASSERT_TRUE(after_shallow_sb.has_value());
+        EXPECT_EQ(after_shallow_sb->last_deep_scrub_timestamp, after_deep_sb->last_deep_scrub_timestamp)
+            << "Deep scrub timestamp should not change after shallow scrub";
+        EXPECT_GT(after_shallow_sb->last_shallow_scrub_timestamp, after_deep_sb->last_shallow_scrub_timestamp)
+            << "Shallow scrub timestamp should be updated";
+    });
+
+    g_helper->sync();
+}
\ No newline at end of file
diff --git a/src/lib/homestore_backend/tests/test_mpmc_priority_queue.cpp b/src/lib/homestore_backend/tests/test_mpmc_priority_queue.cpp
new file mode 100644
index 000000000..8e8b698f3
--- /dev/null
+++ b/src/lib/homestore_backend/tests/test_mpmc_priority_queue.cpp
@@ -0,0 +1,417 @@
+#include <gtest/gtest.h>
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <thread>
+#include <vector>
+
+#include "../MPMCPriorityQueue.hpp"
+
+using namespace homeobject;
+using namespace std::chrono_literals;
+
+// ============================================================================
+// Basic Functionality Tests
+// ============================================================================
+
+TEST(MPMCPriorityQueueTest, BasicPushPop) {
+    MPMCPriorityQueue< int > queue;
+
+    // Push elements
+    queue.push(5);
+    queue.push(2);
+    queue.push(8);
+    queue.push(1);
+
+    EXPECT_EQ(queue.size(), 4);
+    EXPECT_FALSE(queue.empty());
+
+    // Pop in priority order (max heap by default)
+    auto r1 = queue.pop();
+    EXPECT_TRUE(r1.is_ok());
+    EXPECT_EQ(r1.value.value(), 8);
+
+    auto r2 = queue.pop();
+    EXPECT_TRUE(r2.is_ok());
+    EXPECT_EQ(r2.value.value(), 5);
+
+    auto r3 = queue.pop();
+    EXPECT_TRUE(r3.is_ok());
+    EXPECT_EQ(r3.value.value(), 2);
+
+    auto r4 = queue.pop();
+    EXPECT_TRUE(r4.is_ok());
+    EXPECT_EQ(r4.value.value(), 1);
+
+    EXPECT_EQ(queue.size(), 0);
+    EXPECT_TRUE(queue.empty());
+}
+
+TEST(MPMCPriorityQueueTest, CustomComparator) {
+    // Min-heap using std::greater
+    MPMCPriorityQueue< int, std::greater< int > > queue;
+
+    queue.push(5);
+    queue.push(2);
+    queue.push(8);
+    queue.push(1);
+
+    // Pop in ascending order
+    EXPECT_EQ(queue.pop().value.value(), 1);
+    EXPECT_EQ(queue.pop().value.value(), 2);
+    EXPECT_EQ(queue.pop().value.value(), 5);
+    EXPECT_EQ(queue.pop().value.value(), 8);
+}
+
+// Note: MPMCPriorityQueue requires std::regular<T>, which includes copy constructibility.
+// Move-only types are not supported due to the std::regular constraint.
+// This test is commented out as it violates the template requirements.
+//
+// TEST(MPMCPriorityQueueTest, MoveSemantics) {
+//     struct MoveOnly {
+//         int value;
+//
+//         explicit MoveOnly(int v) : value(v) {}
+//         MoveOnly(const MoveOnly&) = delete;
+//         MoveOnly& operator=(const MoveOnly&) = delete;
+//         MoveOnly(MoveOnly&&) = default;
+//         MoveOnly& operator=(MoveOnly&&) = default;
+//
+//         bool operator<(const MoveOnly& other) const { return value < other.value; }
+//     };
+//
+//     MPMCPriorityQueue< MoveOnly > queue;
+//
+//     queue.push(MoveOnly(5));
+//     queue.push(MoveOnly(2));
+//     queue.push(MoveOnly(8));
+//
+//     EXPECT_EQ(queue.pop().value.value().value, 8);
+//     EXPECT_EQ(queue.pop().value.value().value, 5);
+//     EXPECT_EQ(queue.pop().value.value().value, 2);
+// }
+
+// ============================================================================
+// Close Operation Tests
+// ============================================================================
+
+TEST(MPMCPriorityQueueTest, Close) {
+    MPMCPriorityQueue< int > queue;
+
+    queue.push(1);
+    queue.push(2);
+    queue.push(3);
+
+    EXPECT_FALSE(queue.is_closed());
+    queue.close();
+    EXPECT_TRUE(queue.is_closed());
+
+    // Can still pop existing elements
+    EXPECT_EQ(queue.pop().value.value(), 3);
+    EXPECT_EQ(queue.pop().value.value(), 2);
+    EXPECT_EQ(queue.pop().value.value(), 1);
+
+    // Now should return Closed status
+    auto result = queue.pop();
+    EXPECT_TRUE(result.is_closed());
+    EXPECT_FALSE(result.value.has_value());
+}
+
+TEST(MPMCPriorityQueueTest, PushAfterClose) {
+    MPMCPriorityQueue< int > queue;
+
+    queue.push(1);
+    queue.close();
+
+    // Pushes after close are ignored
+    queue.push(2);
+    queue.push(3);
+
+    EXPECT_EQ(queue.size(), 1);
+
+    auto r1 = queue.pop();
+    EXPECT_TRUE(r1.is_ok());
+    EXPECT_EQ(r1.value.value(), 1);
+
+    auto r2 = queue.pop();
+    EXPECT_TRUE(r2.is_closed());
+}
+
+TEST(MPMCPriorityQueueTest, CloseIdempotent) {
+    MPMCPriorityQueue< int > queue;
+
+    queue.push(1);
+    queue.close();
+    queue.close(); // Should be safe
+    queue.close();
+
+    EXPECT_TRUE(queue.is_closed());
+    EXPECT_EQ(queue.size(), 1);
+}
+
+// ============================================================================
+// Blocking Behavior Tests
+// ============================================================================
+
+TEST(MPMCPriorityQueueTest, BlockingPop) {
+    MPMCPriorityQueue< int > queue;
+    std::atomic< bool > pop_started{false};
+    std::atomic< bool > pop_completed{false};
+
+    // Consumer thread that will block
+    std::thread consumer([&]() {
+        pop_started = true;
+        auto result = queue.pop();
+        pop_completed = true;
+
+        EXPECT_TRUE(result.is_ok());
+        EXPECT_EQ(result.value.value(), 42);
+    });
+
+    // Wait for consumer to start
+    while (!pop_started) {
+        std::this_thread::yield();
+    }
+
+    std::this_thread::sleep_for(50ms);
+    EXPECT_FALSE(pop_completed);
+
+    // Unblock consumer by pushing
+    queue.push(42);
+
+    consumer.join();
+    EXPECT_TRUE(pop_completed);
+}
+
+TEST(MPMCPriorityQueueTest, CloseUnblocksWaiters) {
+    MPMCPriorityQueue< int > queue;
+    std::atomic< int > closed_count{0};
+
+    // Start multiple waiting consumers
+    std::vector< std::thread > consumers;
+    for (int i = 0; i < 5; ++i) {
+        consumers.emplace_back([&]() {
+            auto result = queue.pop();
+            if (result.is_closed()) { closed_count.fetch_add(1, std::memory_order_relaxed); }
+        });
+    }
+
+    std::this_thread::sleep_for(100ms);
+
+    // Close should wake all waiters
+    queue.close();
+
+    for (auto& t : consumers) {
+        t.join();
+    }
+
+    EXPECT_EQ(closed_count.load(), 5);
+}
+
+// ============================================================================
+// Multi-threaded Producer Tests
+// ============================================================================
+
+TEST(MPMCPriorityQueueTest, MultipleProducers) {
+    MPMCPriorityQueue< int > queue;
+    constexpr int num_producers = 4;
+    constexpr int items_per_producer = 250;
+
+    std::vector< std::thread > producers;
+    for (int i = 0; i < num_producers; ++i) {
+        producers.emplace_back([&, i]() {
+            for (int j = 0; j < items_per_producer; ++j) {
+                queue.push(i * items_per_producer + j);
+            }
+        });
+    }
+
+    for (auto& t : producers) {
+        t.join();
+    }
+
+    EXPECT_EQ(queue.size(), num_producers * items_per_producer);
+
+    // Verify all elements come out in descending order
+    std::vector< int > popped;
+    for (int i = 0; i < num_producers * items_per_producer; ++i) {
+        auto result = queue.pop();
+        ASSERT_TRUE(result.is_ok());
+        popped.push_back(result.value.value());
+    }
+
+    EXPECT_TRUE(std::is_sorted(popped.rbegin(), popped.rend()));
+}
+
+// ============================================================================
+// Multi-threaded Consumer Tests
+// ============================================================================
+
+TEST(MPMCPriorityQueueTest, MultipleConsumers) {
+    MPMCPriorityQueue< int > queue;
+    constexpr int num_items = 1000;
+
+    // Fill queue
+    for (int i = 0; i < num_items; ++i) {
+        queue.push(i);
+    }
+
+    constexpr int num_consumers = 4;
+    std::vector< std::thread > consumers;
+    std::atomic< int > total_consumed{0};
+
+    for (int i = 0; i < num_consumers; ++i) {
+        consumers.emplace_back([&]() {
+            int count = 0;
+            while (true) {
+                auto result = queue.pop();
+                if (result.is_closed()) { break; }
+                ++count;
+            }
+            total_consumed.fetch_add(count, std::memory_order_relaxed);
+        });
+    }
+
+    // Give consumers time to start
+    std::this_thread::sleep_for(50ms);
+
+    // Close to signal completion
+    queue.close();
+
+    for (auto& t : consumers) {
+        t.join();
+    }
+
+    EXPECT_EQ(total_consumed.load(), num_items);
+}
+
+// ============================================================================
+// Concurrent Producers and Consumers
+// ============================================================================
+
+TEST(MPMCPriorityQueueTest, ConcurrentProducersConsumers) {
+    MPMCPriorityQueue< int > queue;
+    constexpr int num_producers = 3;
+    constexpr int num_consumers = 3;
+    constexpr int items_per_producer = 200;
+
+    std::atomic< int > total_consumed{0};
+    std::vector< std::thread > threads;
+
+    // Start consumers
+    for (int i = 0; i < num_consumers; ++i) {
+        threads.emplace_back([&]() {
+            int count = 0;
+            while (true) {
+                auto result = queue.pop();
+                if (result.is_closed()) { break; }
+                ++count;
+            }
+            total_consumed.fetch_add(count, std::memory_order_relaxed);
+        });
+    }
+
+    // Start producers
+    for (int i = 0; i < num_producers; ++i) {
+        threads.emplace_back([&, i]() {
+            for (int j = 0; j < items_per_producer; ++j) {
+                queue.push(i * items_per_producer + j);
+                std::this_thread::sleep_for(10us); // Simulate work
+            }
+        });
+    }
+
+    // Wait for producers
+    for (int i = num_consumers; i < num_consumers + num_producers; ++i) {
+        threads[i].join();
+    }
+
+    // Close and wait for consumers
+    queue.close();
+    for (int i = 0; i < num_consumers; ++i) {
+        threads[i].join();
+    }
+
+    EXPECT_EQ(total_consumed.load(), num_producers * items_per_producer);
+}
+
+// ============================================================================
+// Stress Test
+// ============================================================================
+
+TEST(MPMCPriorityQueueTest, StressTest) {
+    MPMCPriorityQueue< int > queue;
+    constexpr int num_threads = 8;
+    constexpr int operations_per_thread = 1000;
+
+    std::atomic< int > push_count{0};
+    std::atomic< int > pop_count{0};
+    std::vector< std::thread > threads;
+
+    // Half producers, half consumers
+    for (int i = 0; i < num_threads / 2; ++i) {
+        threads.emplace_back([&]() {
+            for (int j = 0; j < operations_per_thread; ++j) {
+                queue.push(j);
+                push_count.fetch_add(1, std::memory_order_relaxed);
+            }
+        });
+    }
+
+    for (int i = 0; i < num_threads / 2; ++i) {
+        threads.emplace_back([&]() {
+            for (int j = 0; j < operations_per_thread; ++j) {
+                auto result = queue.pop();
+                if (result.is_ok()) { pop_count.fetch_add(1, std::memory_order_relaxed); }
+            }
+        });
+    }
+
+    for (auto& t : threads) {
+        t.join();
+    }
+
+    EXPECT_EQ(push_count.load(), (num_threads / 2) * operations_per_thread);
+
+    // Pop remaining elements
+    while (!queue.empty()) {
+        auto result = queue.pop();
+        if (result.is_ok()) { pop_count.fetch_add(1, std::memory_order_relaxed); }
+    }
+
+    EXPECT_EQ(pop_count.load(), push_count.load());
+}
+
+// ============================================================================
+// Destructor Test
+// ============================================================================
+
+TEST(MPMCPriorityQueueTest, DestructorClosesQueue) {
+    std::atomic< bool > consumer_unblocked{false};
+
+    std::thread consumer([&]() {
+        auto queue = std::make_unique< MPMCPriorityQueue< int > >();
+        queue->push(1);
+
+        std::thread waiter([&, q = queue.get()]() {
+            auto first_result = q->pop(); // Pop the 1
+            (void)first_result;           // Explicitly ignore the result
+            auto result = q->pop();       // This will block until destructor closes queue
+            if (result.is_closed()) { consumer_unblocked = true; }
+        });
+
+        std::this_thread::sleep_for(100ms);
+        // Destructor will be called here
+        queue.reset();
+
+        waiter.join();
+    });
+
+    consumer.join();
+    EXPECT_TRUE(consumer_unblocked);
+}
+
+int main(int argc, char** argv) {
+    ::testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}

From 27b08aa21b11a17d385ec4eebf804074c31d37ff Mon Sep 17 00:00:00 2001
From: Jie Yao <jyao3@ebaychina.com>
Date: Fri, 20 Mar 2026 16:36:13 +0800
Subject: [PATCH 2/4] add http interface for scrub

---
 .../homestore_backend/MPMCPriorityQueue.hpp   |  14 +-
 src/lib/homestore_backend/hs_http_manager.cpp | 375 +++++++++++++++++-
 src/lib/homestore_backend/hs_http_manager.hpp |  54 +++
 src/lib/homestore_backend/scrub_manager.cpp   |  44 +-
 4 files changed, 473 insertions(+), 14 deletions(-)

diff --git a/src/lib/homestore_backend/MPMCPriorityQueue.hpp b/src/lib/homestore_backend/MPMCPriorityQueue.hpp
index 585e2925e..cf5fea548 100644
--- a/src/lib/homestore_backend/MPMCPriorityQueue.hpp
+++ b/src/lib/homestore_backend/MPMCPriorityQueue.hpp
@@ -72,32 +72,34 @@ class MPMCPriorityQueue {
      * @brief Thread-safe push operation (copy)
      *
      * @param value Element to insert
-     * @note No-op if queue is closed
+     * @return true if pushed successfully, false if queue is closed
      */
-    void push(const T& value) {
+    bool push(const T& value) {
         {
             std::scoped_lock lock(mutex_);
             if (closed_) [[unlikely]] {
-                return; // Silently ignore pushes to closed queue
+                return false; // Queue is closed, cannot push
             }
             pq_.push(value);
         }
         cv_.notify_one(); // Wake one waiting consumer
+        return true;
     }
 
     /**
      * @brief Thread-safe push operation (move)
      *
      * @param value Element to insert (will be moved)
-     * @note No-op if queue is closed
+     * @return true if pushed successfully, false if queue is closed
      */
-    void push(T&& value) {
+    bool push(T&& value) {
         {
             std::scoped_lock lock(mutex_);
-            if (closed_) [[unlikely]] { return; }
+            if (closed_) [[unlikely]] { return false; }
             pq_.push(std::move(value));
         }
         cv_.notify_one();
+        return true;
     }
 
     /**
diff --git a/src/lib/homestore_backend/hs_http_manager.cpp b/src/lib/homestore_backend/hs_http_manager.cpp
index 5dd9a865a..6b8e71c9a 100644
--- a/src/lib/homestore_backend/hs_http_manager.cpp
+++ b/src/lib/homestore_backend/hs_http_manager.cpp
@@ -17,12 +17,38 @@
 #include <sisl/version.hpp>
 #include <sisl/settings/settings.hpp>
 #include <boost/uuid/string_generator.hpp>
+#include <boost/uuid/uuid_io.hpp>
+#include <ctime>
+#include <limits>
+#include <string>
 
 #include "hs_http_manager.hpp"
 #include "hs_homeobject.hpp"
 
 namespace homeobject {
 
+namespace {
+// Helper function to format time as ISO 8601
+std::string format_iso8601_time(const std::chrono::system_clock::time_point& tp) {
+    auto time_t = std::chrono::system_clock::to_time_t(tp);
+    std::tm tm;
+    gmtime_r(&time_t, &tm); // Thread-safe version
+    char buf[32];
+    std::strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%SZ", &tm);
+    return std::string(buf);
+}
+
+// Helper to count total items across peer map
+template < typename PeerMap >
+size_t count_peer_map_items(const PeerMap& peer_map) {
+    size_t count = 0;
+    for (const auto& [peer_id, items] : peer_map) {
+        count += items.size();
+    }
+    return count;
+}
+} // anonymous namespace
+
 HttpManager::HttpManager(HSHomeObject& ho) : ho_(ho) {
     using namespace Pistache;
     using namespace Pistache::Rest;
@@ -74,7 +100,13 @@ HttpManager::HttpManager(HSHomeObject& ho) : ho_(ho) {
         {Pistache::Http::Method::Post, "/api/v1/trigger_gc",
          Pistache::Rest::Routes::bind(&HttpManager::trigger_gc, this)},
         {Pistache::Http::Method::Get, "/api/v1/gc_job_status",
-         Pistache::Rest::Routes::bind(&HttpManager::get_gc_job_status, this)}};
+         Pistache::Rest::Routes::bind(&HttpManager::get_gc_job_status, this)},
+        {Pistache::Http::Method::Post, "/api/v1/trigger_pg_scrub",
+         Pistache::Rest::Routes::bind(&HttpManager::trigger_pg_scrub, this)},
+        {Pistache::Http::Method::Get, "/api/v1/scrub_job_status",
+         Pistache::Rest::Routes::bind(&HttpManager::get_scrub_job_status, this)},
+        {Pistache::Http::Method::Post, "/api/v1/cancel_scrub_job",
+         Pistache::Rest::Routes::bind(&HttpManager::cancel_scrub_job, this)}};
 
     auto http_server = ioenvironment.get_http_server();
     if (!http_server) {
@@ -486,6 +518,162 @@ void HttpManager::exit_pg(const Pistache::Rest::Request& request, Pistache::Http
     response.send(Pistache::Http::Code::Ok, "Exit pg request submitted");
 }
 
+void HttpManager::trigger_pg_scrub(const Pistache::Rest::Request& request, Pistache::Http::ResponseWriter response) {
+    auto scrub_mgr = ho_.scrub_manager();
+    if (!scrub_mgr) {
+        response.send(Pistache::Http::Code::Internal_Server_Error, "Scrub manager not available");
+        return;
+    }
+
+    // Get query parameters
+    const auto pg_id_param = request.query().get("pg_id");
+    const auto is_deep_param = request.query().get("deep");
+    const auto force_param = request.query().get("force");
+
+    // Validate pg_id parameter (required)
+    if (!pg_id_param || pg_id_param.value().empty()) {
+        nlohmann::json error;
+        error["error"] = "Missing required parameter: pg_id";
+        error["usage"] = "POST /api/v1/trigger_pg_scrub?pg_id=<id>&deep=<true|false>&force=<true|false>";
+        response.send(Pistache::Http::Code::Bad_Request, error.dump());
+        return;
+    }
+
+    uint16_t pg_id;
+    try {
+        auto val = std::stoul(pg_id_param.value());
+        if (val > std::numeric_limits< uint16_t >::max()) {
+            nlohmann::json error;
+            error["error"] = "pg_id out of range";
+            error["pg_id"] = pg_id_param.value();
+            response.send(Pistache::Http::Code::Bad_Request, error.dump());
+            return;
+        }
+        pg_id = static_cast< uint16_t >(val);
+    } catch (const std::invalid_argument& e) {
+        nlohmann::json error;
+        error["error"] = "Invalid pg_id format: not a number";
+        error["pg_id"] = pg_id_param.value();
+        response.send(Pistache::Http::Code::Bad_Request, error.dump());
+        return;
+    } catch (const std::out_of_range& e) {
+        nlohmann::json error;
+        error["error"] = "pg_id out of range";
+        error["pg_id"] = pg_id_param.value();
+        response.send(Pistache::Http::Code::Bad_Request, error.dump());
+        return;
+    }
+
+    // Parse optional parameters
+    bool is_deep = false;
+    if (is_deep_param && !is_deep_param.value().empty()) {
+        const auto& value = is_deep_param.value();
+        is_deep = (value == "true" || value == "1" || value == "yes");
+    }
+
+    bool force = false;
+    if (force_param && !force_param.value().empty()) {
+        const auto& value = force_param.value();
+        force = (value == "true" || value == "1" || value == "yes");
+    }
+
+    LOGINFO("Received trigger_pg_scrub request for pg_id={}, deep={}, force={}", pg_id, is_deep, force);
+
+    // Verify PG exists
+    auto hs_pg = ho_.get_hs_pg(pg_id);
+    if (!hs_pg) {
+        nlohmann::json error;
+        error["error"] = "PG not found";
+        error["pg_id"] = pg_id;
+        response.send(Pistache::Http::Code::Not_Found, error.dump());
+        return;
+    }
+
+    // Generate job ID and create job info
+    const auto job_id = generate_job_id();
+    auto job_info = std::make_shared< ScrubJobInfo >(job_id, pg_id, is_deep);
+
+    {
+        std::lock_guard< std::shared_mutex > lock(scrub_job_mutex_);
+        scrub_jobs_map_.set(job_id, job_info);
+    }
+
+    // Prepare immediate response
+    nlohmann::json result;
+    result["job_id"] = job_id;
+    result["pg_id"] = pg_id;
+    result["scrub_type"] = is_deep ? "deep" : "shallow";
+    result["force"] = force;
+    result["message"] = "Scrub task submitted, query status using /api/v1/scrub_job_status?job_id=" + job_id;
+
+    // Return immediately with HTTP 202 Accepted
+    response.send(Pistache::Http::Code::Accepted, result.dump());
+
+    // Submit scrub task (MANUALLY trigger type) - runs asynchronously
+    scrub_mgr->submit_scrub_task(pg_id, is_deep, force, SCRUB_TRIGGER_TYPE::MANUALLY)
+        .via(&folly::InlineExecutor::instance())
+        .thenValue([job_info, is_deep](std::shared_ptr< ScrubManager::ShallowScrubReport > report) {
+            if (!report) {
+                job_info->try_complete(ScrubJobStatus::FAILED, "Scrub task failed or was cancelled");
+                return;
+            }
+
+            // Build report summary
+            nlohmann::json report_summary;
+            report_summary["pg_id"] = report->get_pg_id();
+
+            // Add missing shards info
+            const auto& missing_shards = report->get_missing_shard_ids();
+            if (!missing_shards.empty()) {
+                nlohmann::json missing_shards_json;
+                for (const auto& [peer_id, shard_ids] : missing_shards) {
+                    missing_shards_json[boost::uuids::to_string(peer_id)] = shard_ids;
+                }
+                report_summary["missing_shards"] = missing_shards_json;
+            }
+
+            // Add missing blobs info
+            const auto& missing_blobs = report->get_missing_blobs();
+            if (!missing_blobs.empty()) { report_summary["missing_blobs_count"] = count_peer_map_items(missing_blobs); }
+
+            // If it's a deep scrub report, add additional info
+            if (is_deep) {
+                auto deep_report = std::dynamic_pointer_cast< ScrubManager::DeepScrubReport >(report);
+                if (deep_report) {
+                    // Add corrupted blobs count
+                    const auto& corrupted_blobs = deep_report->get_corrupted_blobs();
+                    if (!corrupted_blobs.empty()) {
+                        report_summary["corrupted_blobs_count"] = count_peer_map_items(corrupted_blobs);
+                    }
+
+                    // Add inconsistent blobs count
+                    const auto& inconsistent_blobs = deep_report->get_inconsistent_blobs();
+                    if (!inconsistent_blobs.empty()) {
+                        report_summary["inconsistent_blobs_count"] = inconsistent_blobs.size();
+                    }
+
+                    // Add corrupted shards count
+                    const auto& corrupted_shards = deep_report->get_corrupted_shards();
+                    if (!corrupted_shards.empty()) {
+                        report_summary["corrupted_shards_count"] = count_peer_map_items(corrupted_shards);
+                    }
+
+                    // Add corrupted PG meta info
+                    const auto& corrupted_pg_metas = deep_report->get_corrupted_pg_metas();
+                    if (!corrupted_pg_metas.empty()) {
+                        report_summary["corrupted_pg_metas_count"] = corrupted_pg_metas.size();
+                    }
+                }
+            }
+
+            // Complete the job with success status and report
+            job_info->try_complete(ScrubJobStatus::COMPLETED, "", report_summary);
+        })
+        .thenError([job_info](const folly::exception_wrapper& ew) {
+            job_info->try_complete(ScrubJobStatus::FAILED, ew.what().c_str());
+        });
+}
+
 void HttpManager::trigger_gc(const Pistache::Rest::Request& request, Pistache::Http::ResponseWriter response) {
     auto gc_mgr = ho_.gc_manager();
     if (!gc_mgr) {
@@ -633,7 +821,7 @@ void HttpManager::trigger_gc(const Pistache::Rest::Request& request, Pistache::H
 
 std::string HttpManager::generate_job_id() {
     auto counter = job_counter_.fetch_add(1, std::memory_order_relaxed);
-    return fmt::format("trigger-gc-task-{}", counter);
+    return fmt::format("job-{}", counter);
 }
 
 void HttpManager::get_job_status(const std::string& job_id, nlohmann::json& result) {
@@ -770,6 +958,189 @@ void HttpManager::trigger_gc_for_pg(uint16_t pg_id, const std::string& job_id) {
         .get();
 }
 
+void HttpManager::get_scrub_job_status(const Pistache::Rest::Request& request,
+                                       Pistache::Http::ResponseWriter response) {
+    auto job_id_param = request.query().get("job_id");
+
+    if (job_id_param && !job_id_param.value().empty()) {
+        // Query specific job
+        const auto job_id = job_id_param.value();
+        LOGINFO("Query scrub job {} status", job_id);
+
+        std::shared_ptr< ScrubJobInfo > job_info;
+        {
+            std::shared_lock lock(scrub_job_mutex_);
+            job_info = scrub_jobs_map_.get(job_id);
+        }
+
+        if (!job_info) {
+            nlohmann::json error;
+            error["error"] = "Job not found";
+            error["job_id"] = job_id;
+            response.send(Pistache::Http::Code::Not_Found, error.dump());
+            return;
+        }
+
+        nlohmann::json result = build_scrub_job_json(job_info);
+        response.send(Pistache::Http::Code::Ok, result.dump());
+        return;
+    }
+
+    // Query all jobs
+    LOGINFO("Query all scrub job status");
+    nlohmann::json result;
+    std::vector< std::shared_ptr< ScrubJobInfo > > all_jobs;
+
+    {
+        std::shared_lock lock(scrub_job_mutex_);
+        for (const auto& [k, v] : scrub_jobs_map_) {
+            all_jobs.push_back(v);
+        }
+    }
+
+    for (const auto& job_info : all_jobs) {
+        result["jobs"].push_back(build_scrub_job_json(job_info));
+    }
+
+    response.send(Pistache::Http::Code::Ok, result.dump());
+}
+
+nlohmann::json HttpManager::build_scrub_job_json(const std::shared_ptr< ScrubJobInfo >& job_info) {
+    nlohmann::json result;
+
+    // Helper to convert status enum to string
+    auto status_to_string = [](ScrubJobStatus status) -> std::string {
+        switch (status) {
+        case ScrubJobStatus::RUNNING:
+            return "running";
+        case ScrubJobStatus::COMPLETED:
+            return "completed";
+        case ScrubJobStatus::FAILED:
+            return "failed";
+        case ScrubJobStatus::CANCELLED:
+            return "cancelled";
+        default:
+            return "unknown";
+        }
+    };
+
+    // Thread-unsafe fields (read-only after construction)
+    result["job_id"] = job_info->job_id;
+    result["pg_id"] = job_info->pg_id;
+    result["scrub_type"] = job_info->is_deep ? "deep" : "shallow";
+
+    // Thread-safe fields (protected by mutex)
+    {
+        std::lock_guard< std::mutex > lock(job_info->mtx_);
+
+        // Status
+        result["status"] = status_to_string(job_info->status);
+
+        // Timestamps - convert to ISO 8601 format (no newline)
+        result["start_time"] = format_iso8601_time(job_info->start_time);
+
+        if (job_info->status != ScrubJobStatus::RUNNING) {
+            result["end_time"] = format_iso8601_time(job_info->end_time);
+
+            auto duration =
+                std::chrono::duration_cast< std::chrono::seconds >(job_info->end_time - job_info->start_time);
+            result["duration_seconds"] = duration.count();
+        }
+
+        // Error message (if any)
+        if (!job_info->error_message.empty()) { result["error_message"] = job_info->error_message; }
+
+        // Report summary (if completed)
+        if (job_info->status == ScrubJobStatus::COMPLETED && !job_info->report_summary.empty()) {
+            result["report"] = job_info->report_summary;
+        }
+    }
+
+    return result;
+}
+
+void HttpManager::cancel_scrub_job(const Pistache::Rest::Request& request, Pistache::Http::ResponseWriter response) {
+    auto job_id_param = request.query().get("job_id");
+
+    if (!job_id_param || job_id_param.value().empty()) {
+        nlohmann::json error;
+        error["error"] = "Missing required parameter: job_id";
+        error["usage"] = "POST /api/v1/cancel_scrub_job?job_id=<id>";
+        response.send(Pistache::Http::Code::Bad_Request, error.dump());
+        return;
+    }
+
+    const auto job_id = job_id_param.value();
+    LOGINFO("Cancel scrub job {}", job_id);
+
+    std::shared_ptr< ScrubJobInfo > job_info;
+    {
+        std::shared_lock lock(scrub_job_mutex_);
+        job_info = scrub_jobs_map_.get(job_id);
+    }
+
+    if (!job_info) {
+        nlohmann::json error;
+        error["error"] = "Job not found";
+        error["job_id"] = job_id;
+        response.send(Pistache::Http::Code::Not_Found, error.dump());
+        return;
+    }
+
+    // Check if job is still running (thread-safe)
+    bool can_cancel = false;
+    std::string current_status_str;
+    {
+        std::lock_guard< std::mutex > lock(job_info->mtx_);
+        can_cancel = (job_info->status == ScrubJobStatus::RUNNING);
+        if (!can_cancel) {
+            // Get status string for error message
+            switch (job_info->status) {
+            case ScrubJobStatus::COMPLETED:
+                current_status_str = "completed";
+                break;
+            case ScrubJobStatus::FAILED:
+                current_status_str = "failed";
+                break;
+            case ScrubJobStatus::CANCELLED:
+                current_status_str = "cancelled";
+                break;
+            default:
+                current_status_str = "unknown";
+            }
+        }
+    }
+
+    if (!can_cancel) {
+        nlohmann::json result;
+        result["job_id"] = job_id;
+        result["message"] = "Job is not running, cannot cancel";
+        result["current_status"] = current_status_str;
+        response.send(Pistache::Http::Code::Bad_Request, result.dump());
+        return;
+    }
+
+    // Cancel the scrub task
+    auto scrub_mgr = ho_.scrub_manager();
+    if (!scrub_mgr) {
+        nlohmann::json error;
+        error["error"] = "Scrub manager not available";
+        response.send(Pistache::Http::Code::Internal_Server_Error, error.dump());
+        return;
+    }
+
+    // Cancel in scrub manager first (this will stop ongoing work)
+    scrub_mgr->cancel_scrub_task(job_info->pg_id);
+
+    // Update job status (thread-safe)
+    job_info->cancel();
+
+    nlohmann::json result;
+    result["job_id"] = job_id;
+    result["message"] = "Scrub job cancelled successfully";
+    response.send(Pistache::Http::Code::Ok, result.dump());
+}
+
 #ifdef _PRERELEASE
 void HttpManager::crash_system(const Pistache::Rest::Request& request, Pistache::Http::ResponseWriter response) {
     std::string crash_type;
diff --git a/src/lib/homestore_backend/hs_http_manager.hpp b/src/lib/homestore_backend/hs_http_manager.hpp
index 9a6ee0b97..2681aeccb 100644
--- a/src/lib/homestore_backend/hs_http_manager.hpp
+++ b/src/lib/homestore_backend/hs_http_manager.hpp
@@ -50,6 +50,9 @@ class HttpManager {
     void get_gc_job_status(const Pistache::Rest::Request& request, Pistache::Http::ResponseWriter response);
     void trigger_gc_for_pg(uint16_t pg_id, const std::string& job_id);
     void get_job_status(const std::string& job_id, nlohmann::json& result);
+    void trigger_pg_scrub(const Pistache::Rest::Request& request, Pistache::Http::ResponseWriter response);
+    void get_scrub_job_status(const Pistache::Rest::Request& request, Pistache::Http::ResponseWriter response);
+    void cancel_scrub_job(const Pistache::Rest::Request& request, Pistache::Http::ResponseWriter response);
 
 #ifdef _PRERELEASE
     void crash_system(const Pistache::Rest::Request& request, Pistache::Http::ResponseWriter response);
@@ -74,15 +77,66 @@ class HttpManager {
                 job_id(id), status(GCJobStatus::RUNNING), pg_id(pgid), chunk_id(cid) {}
     };
 
+    enum class ScrubJobStatus { RUNNING, COMPLETED, FAILED, CANCELLED };
+
+    struct ScrubJobInfo {
+        std::string job_id;
+        uint16_t pg_id;
+        bool is_deep;
+
+        // Mutable fields protected by mutex
+        mutable std::mutex mtx_;
+        ScrubJobStatus status;
+        std::chrono::system_clock::time_point start_time;
+        std::chrono::system_clock::time_point end_time;
+        std::string error_message;
+        nlohmann::json report_summary;
+
+        // Flag to prevent status update after cancellation
+        std::atomic< bool > is_cancelled{false};
+
+        ScrubJobInfo(const std::string& id, uint16_t pgid, bool deep) :
+                job_id(id),
+                pg_id(pgid),
+                is_deep(deep),
+                status(ScrubJobStatus::RUNNING),
+                start_time(std::chrono::system_clock::now()) {}
+
+        // Thread-safe status update - returns false if already cancelled
+        bool try_complete(ScrubJobStatus new_status, const std::string& error_msg = "",
+                          const nlohmann::json& summary = nlohmann::json()) {
+            std::lock_guard< std::mutex > lock(mtx_);
+            if (is_cancelled.load(std::memory_order_acquire)) { return false; } // Already cancelled, reject update
+
+            status = new_status;
+            end_time = std::chrono::system_clock::now();
+            error_message = error_msg;
+            if (!summary.empty()) { report_summary = summary; }
+            return true;
+        }
+
+        // Thread-safe cancel
+        void cancel() {
+            std::lock_guard< std::mutex > lock(mtx_);
+            is_cancelled.store(true, std::memory_order_release);
+            status = ScrubJobStatus::CANCELLED;
+            end_time = std::chrono::system_clock::now();
+            error_message = "Cancelled by user";
+        }
+    };
+
     std::string generate_job_id();
+    nlohmann::json build_scrub_job_json(const std::shared_ptr< ScrubJobInfo >& job_info);
 
 private:
     HSHomeObject& ho_;
     std::atomic< uint64_t > job_counter_{0};
     std::shared_mutex gc_job_mutex_;
+    std::shared_mutex scrub_job_mutex_;
 
     // we don`t have an external DB to store the job status, so we only keep the status of the lastest 100 jobs for
     // query. or, we can evict the job after it is completed after a timeout period.
     folly::EvictingCacheMap< std::string, std::shared_ptr< GCJobInfo > > gc_jobs_map_{100};
+    folly::EvictingCacheMap< std::string, std::shared_ptr< ScrubJobInfo > > scrub_jobs_map_{100};
 };
 } // namespace homeobject
\ No newline at end of file
diff --git a/src/lib/homestore_backend/scrub_manager.cpp b/src/lib/homestore_backend/scrub_manager.cpp
index 5b9452fa7..4908782a4 100644
--- a/src/lib/homestore_backend/scrub_manager.cpp
+++ b/src/lib/homestore_backend/scrub_manager.cpp
@@ -63,7 +63,7 @@ void ScrubManager::scan_pg_for_scrub() {
                 .via(&folly::InlineExecutor::instance())
                 .thenValue([this, pg_id](std::shared_ptr< ShallowScrubReport > report) {
                     if (!report) {
-                        LOGERRORMOD(scrubmgr, "deep scrub failed for pg={}", pg_id);
+                        LOGERRORMOD(scrubmgr, "shallow scrub failed for pg={}", pg_id);
                         return;
                     }
                     LOGINFOMOD(scrubmgr, "shallow scrub is completed for pg={}", pg_id);
@@ -615,7 +615,12 @@ bool ScrubManager::send_scrub_req_and_wait(pg_id_t pg_id, uint64_t task_id,
     }
 
     // Check if cancelled or incomplete
-    if (scrub_ctx->is_cancelled() || scrub_ctx->peer_sm_map_.size() != scrub_ctx->member_peer_ids_.size()) {
+    bool is_incomplete = false;
+    {
+        std::lock_guard< std::mutex > lock(scrub_ctx->mtx_);
+        is_incomplete = scrub_ctx->peer_sm_map_.size() != scrub_ctx->member_peer_ids_.size();
+    }
+    if (scrub_ctx->is_cancelled() || is_incomplete) {
         SCRUBLOGD(pg_id, task_id, "scrub task is cancelled or incomplete when scrubbing {}!", scrub_type_name);
         return false;
     }
@@ -728,10 +733,31 @@ void ScrubManager::handle_pg_scrub_task(scrub_task task) {
         }
 
         // Merge PG meta scrub results
-        pg_scrub_report->merge(scrub_ctx->peer_sm_map_);
+        {
+            std::lock_guard< std::mutex > lock(scrub_ctx->mtx_);
+            pg_scrub_report->merge(scrub_ctx->peer_sm_map_);
+        }
         SCRUBLOGD(pg_id, task_id, "PG meta scrub completed");
     }
 
+    // scrubbing probably goes with blob deletion, and thus some of blobs might be not present on some
+    // peers even if we wait for the same scrub_lsn. Theoretically, without a strong consistent snapshot , there is not
+    // a mechanism to distinguish whether a blob/shard is missing due to deletion or due to lost, this is the
+    // predicament we are in now with oure current design:
+
+    // 1 no blob delete lsn，
+    // 2 no shard sealed lsn
+    // 3 no snapshot which can provide a strong consistent view of the
+    // blob/shard existence at the scrub_lsn.
+
+    // we can only rely on the best effort of waiting for all peers to reach the same scrub_lsn, but it is not
+    // guaranteed. As a result, we might have false positive missing blobs due to deletion!!!!
+
+    // TODO: figure out a solution to mitigate the false positive issue, for example, we can add a "blob delete lsn" and
+    // "shard sealed lsn". for all the missblobs, if its deletd lsn is after scrub_lsn, then it is a false positive
+    // missing blob, and we can move it out of missblobs. this can be done by leader when merging all the scrub maps for
+    // a specific scrub req.
+
     // Step 2: Scrub Shard Range
     SCRUBLOGD(pg_id, task_id, "Starting shard range {} scrub", is_deep_scrub ? "deep" : "shallow");
     {
@@ -791,7 +817,10 @@ void ScrubManager::handle_pg_scrub_task(scrub_task task) {
             }
 
             SCRUBLOGD(pg_id, task_id, "Merging shard scrub results for range [{}, {}]", shard_start, shard_end);
-            pg_scrub_report->merge(scrub_ctx->peer_sm_map_);
+            {
+                std::lock_guard< std::mutex > lock(scrub_ctx->mtx_);
+                pg_scrub_report->merge(scrub_ctx->peer_sm_map_);
+            }
         }
         SCRUBLOGD(pg_id, task_id, "shard scrub completed, total ranges scrubbed: {}", shard_range_count);
     }
@@ -850,7 +879,10 @@ void ScrubManager::handle_pg_scrub_task(scrub_task task) {
             }
 
             SCRUBLOGD(pg_id, task_id, "Merging blob scrub results for range [{}, {}]", blob_start, blob_end);
-            pg_scrub_report->merge(scrub_ctx->peer_sm_map_);
+            {
+                std::lock_guard< std::mutex > lock(scrub_ctx->mtx_);
+                pg_scrub_report->merge(scrub_ctx->peer_sm_map_);
+            }
         }
         SCRUBLOGD(pg_id, task_id, "blob scrub completed, total ranges scrubbed: {}", blob_range_count);
     }
@@ -920,7 +952,7 @@ void ScrubManager::save_scrub_superblk(const pg_id_t pg_id, const bool is_deep_s
         (*sb).create(sizeof(pg_scrub_superblk));
         (*sb)->pg_id = pg_id;
         (*sb)->last_deep_scrub_timestamp = current_time;
-        (*sb)->last_deep_scrub_timestamp = current_time;
+        (*sb)->last_shallow_scrub_timestamp = current_time;
         (*sb).write();
         m_pg_scrub_sb_map.emplace(pg_id, std::move(sb));
         return;

From 3b62d42275aad3eb89a333e74ebb757b56856c00 Mon Sep 17 00:00:00 2001
From: Jie Yao <jyao3@ebaychina.com>
Date: Mon, 23 Mar 2026 07:48:03 +0800
Subject: [PATCH 3/4] filter out deleted blobs when scrubbing

---
 src/lib/homestore_backend/hs_blob_manager.cpp |  27 +++-
 src/lib/homestore_backend/hs_homeobject.hpp   |   2 +
 .../replication_state_machine.cpp             |   3 +
 src/lib/homestore_backend/scrub_manager.cpp   | 126 +++++++++++++++---
 src/lib/homestore_backend/scrub_manager.hpp   |   9 ++
 5 files changed, 143 insertions(+), 24 deletions(-)

diff --git a/src/lib/homestore_backend/hs_blob_manager.cpp b/src/lib/homestore_backend/hs_blob_manager.cpp
index c89c32ec5..b3526bd54 100644
--- a/src/lib/homestore_backend/hs_blob_manager.cpp
+++ b/src/lib/homestore_backend/hs_blob_manager.cpp
@@ -88,7 +88,7 @@ BlobManager::AsyncResult< blob_id_t > HSHomeObject::_put_blob(ShardInfo const& s
         return folly::makeUnexpected(BlobErrorCode::SHUTTING_DOWN);
     }
     incr_pending_request_num();
-        // check user key size
+    // check user key size
     if (blob.user_key.size() > BlobHeader::max_user_key_length) {
         BLOGE(tid, shard.id, 0, "input user key length > max_user_key_length {}", blob.user_key.size(),
               BlobHeader::max_user_key_length);
@@ -167,8 +167,7 @@ BlobManager::AsyncResult< blob_id_t > HSHomeObject::_put_blob(ShardInfo const& s
 
     // Set offset of actual data after the blob header and user key (rounded off)
     req->blob_header()->data_offset = req->blob_header_buf().size();
-    RELEASE_ASSERT(req->blob_header()->data_offset == _data_block_size,
-                       "blob header should equals _data_block_size");
+    RELEASE_ASSERT(req->blob_header()->data_offset == _data_block_size, "blob header should equals _data_block_size");
     // In case blob body is not aligned, create a new aligned buffer and copy the blob body.
     if (((r_cast< uintptr_t >(blob.body.cbytes()) % io_align) != 0) || ((blob_size % io_align) != 0)) {
         // If address or size is not aligned, create a separate aligned buffer and do expensive memcpy.
@@ -367,9 +366,7 @@ BlobManager::AsyncResult< Blob > HSHomeObject::_get_blob_data(const shared< home
             }
 
             auto verify_result = do_verify_blob(read_buf.cbytes(), shard_id, 0 /* no blob_id check */);
-            if (!verify_result.hasValue()) {
-                return folly::makeUnexpected(verify_result.error());
-            }
+            if (!verify_result.hasValue()) { return folly::makeUnexpected(verify_result.error()); }
             std::string user_key = std::move(verify_result.value());
 
             BlobHeader const* header = r_cast< BlobHeader const* >(read_buf.cbytes());
@@ -742,4 +739,22 @@ bool HSHomeObject::verify_blob(const void* blob, const shard_id_t shard_id, cons
     auto result = do_verify_blob(blob, shard_id, blob_id);
     return result.hasValue();
 }
+
+bool HSHomeObject::on_blob_del_pre_commit(int64_t lsn, sisl::blob const& header, sisl::blob const& key,
+                                          cintrusive< homestore::repl_req_ctx >& hs_ctx) {
+    auto msg_header = r_cast< ReplicationMessageHeader* >(const_cast< uint8_t* >(header.cbytes()));
+    if (msg_header->corrupted()) {
+        // since log has been appended, we crash here immediately.
+        RELEASE_ASSERT(false, "corrupted header caught in on_blob_del_commit , lsn={}", lsn);
+        return false;
+    }
+
+    const auto& pg_id = msg_header->pg_id;
+    const auto& shard_id = msg_header->shard_id;
+    const auto& blob_id = *r_cast< blob_id_t const* >(key.cbytes());
+    LOGD("Received del_blob pre-commit for pg={}, shard=0x{:x}, blob_id={}, lsn={}", pg_id, shard_id, blob_id, lsn);
+
+    if (scrub_mgr_) { scrub_mgr_->add_pg_deleted_blob(pg_id, {shard_id, blob_id}, lsn); }
+    return true;
+}
 } // namespace homeobject
diff --git a/src/lib/homestore_backend/hs_homeobject.hpp b/src/lib/homestore_backend/hs_homeobject.hpp
index d5a11c78d..c156bb04f 100644
--- a/src/lib/homestore_backend/hs_homeobject.hpp
+++ b/src/lib/homestore_backend/hs_homeobject.hpp
@@ -942,6 +942,8 @@ class HSHomeObject : public HomeObjectImpl {
 
     bool on_shard_message_pre_commit(int64_t lsn, sisl::blob const& header, sisl::blob const& key,
                                      cintrusive< homestore::repl_req_ctx >& hs_ctx);
+    bool on_blob_del_pre_commit(int64_t lsn, sisl::blob const& header, sisl::blob const& key,
+                                cintrusive< homestore::repl_req_ctx >& hs_ctx);
     void on_shard_message_rollback(int64_t lsn, sisl::blob const& header, sisl::blob const& key,
                                    cintrusive< homestore::repl_req_ctx >& hs_ctx);
 
diff --git a/src/lib/homestore_backend/replication_state_machine.cpp b/src/lib/homestore_backend/replication_state_machine.cpp
index 5d022213b..f3c62fa43 100644
--- a/src/lib/homestore_backend/replication_state_machine.cpp
+++ b/src/lib/homestore_backend/replication_state_machine.cpp
@@ -97,6 +97,9 @@ bool ReplicationStateMachine::on_pre_commit(int64_t lsn, sisl::blob const& heade
     case ReplicationMessageType::SEAL_SHARD_MSG: {
         return home_object_->on_shard_message_pre_commit(lsn, header, key, ctx);
     }
+    case ReplicationMessageType::DEL_BLOB_MSG: {
+        return home_object_->on_blob_del_pre_commit(lsn, header, key, ctx);
+    }
     default: {
         break;
     }
diff --git a/src/lib/homestore_backend/scrub_manager.cpp b/src/lib/homestore_backend/scrub_manager.cpp
index 4908782a4..2d8f6947a 100644
--- a/src/lib/homestore_backend/scrub_manager.cpp
+++ b/src/lib/homestore_backend/scrub_manager.cpp
@@ -187,6 +187,17 @@ bool ScrubManager::add_scrub_map(const pg_id_t pg_id, std::shared_ptr< BaseScrub
     return pg_scrub_ctx->add_scrub_map(std::move(bsm));
 }
 
+void ScrubManager::add_pg_deleted_blob(const pg_id_t pg_id, const BlobRoute& blob_route, int64_t delete_lsn) {
+    auto pg_scrub_ctx_it = m_pg_scrub_ctx_map.find(pg_id);
+    if (pg_scrub_ctx_it == m_pg_scrub_ctx_map.end()) {
+        LOGDEBUGMOD(scrubmgr, "can not find scrub context for pg_id={}, fail to add deleted blob!", pg_id);
+        return;
+    }
+
+    auto& pg_scrub_ctx = pg_scrub_ctx_it->second;
+    pg_scrub_ctx->add_deleted_blob(blob_route, delete_lsn);
+}
+
 void ScrubManager::handle_scrub_req(std::shared_ptr< base_scrub_req > req) {
     if (!req) {
         LOGERRORMOD(scrubmgr, "scrub req is null, can not handle it!");
@@ -649,6 +660,24 @@ void ScrubManager::handle_pg_scrub_task(scrub_task task) {
         const pg_id_t& pg_id;
 
         ~scrub_task_guard() {
+            const auto scrub_ctx_it = pg_scrub_ctx_map.find(pg_id);
+            if (scrub_ctx_it != pg_scrub_ctx_map.end()) {
+                // filter out those deleted blobs in the scrub report.
+
+                // we capture all the deleted blobs when scrubbing is ongoing, and filter them out from scrub report at
+                // the end of scrub task to make sure we will not report those deleted blobs as problematic blobs in
+                // scrub report.
+                // 1 we only capture the deleted blobs when scrubbing, since we only care about those
+                // blobs deleted during scrubbing.
+                // 2 on_pre_commit is called after log append. so, if leader switch happens during pg scrub, the delete
+                // blob log will be rollbacked , but the scrub task will also be cancelled, and will not give a final
+                // full scrub report for this scrub task.
+                // 3 if leader swith not happens during scrub, thee all the deleted blobs captured in on_pre_commit
+                // should finally be commited.
+                const auto& scrub_ctx = scrub_ctx_it->second;
+                scrub_report->filter_out_deleted_blobs(scrub_ctx->deleted_blobs_when_scrubbing_);
+            }
+
             pg_scrub_ctx_map.erase(pg_id);
             task.scrub_report_promise->setValue(scrub_report);
 
@@ -740,24 +769,6 @@ void ScrubManager::handle_pg_scrub_task(scrub_task task) {
         SCRUBLOGD(pg_id, task_id, "PG meta scrub completed");
     }
 
-    // scrubbing probably goes with blob deletion, and thus some of blobs might be not present on some
-    // peers even if we wait for the same scrub_lsn. Theoretically, without a strong consistent snapshot , there is not
-    // a mechanism to distinguish whether a blob/shard is missing due to deletion or due to lost, this is the
-    // predicament we are in now with oure current design:
-
-    // 1 no blob delete lsn，
-    // 2 no shard sealed lsn
-    // 3 no snapshot which can provide a strong consistent view of the
-    // blob/shard existence at the scrub_lsn.
-
-    // we can only rely on the best effort of waiting for all peers to reach the same scrub_lsn, but it is not
-    // guaranteed. As a result, we might have false positive missing blobs due to deletion!!!!
-
-    // TODO: figure out a solution to mitigate the false positive issue, for example, we can add a "blob delete lsn" and
-    // "shard sealed lsn". for all the missblobs, if its deletd lsn is after scrub_lsn, then it is a false positive
-    // missing blob, and we can move it out of missblobs. this can be done by leader when merging all the scrub maps for
-    // a specific scrub req.
-
     // Step 2: Scrub Shard Range
     SCRUBLOGD(pg_id, task_id, "Starting shard range {} scrub", is_deep_scrub ? "deep" : "shallow");
     {
@@ -1907,4 +1918,83 @@ void ScrubManager::DeepScrubReport::merge(const std::map< peer_id_t, std::shared
                corrupted_pg_metas.size());
 }
 
+void ScrubManager::ShallowScrubReport::filter_out_deleted_blobs(
+    const folly::ConcurrentHashMap< BlobRoute, int64_t >& deleted_blobs_when_scrubbing) {
+    size_t total_filtered = 0;
+
+    // Filter out deleted blobs from missing_blobs
+    for (auto& [peer_id, blob_set] : missing_blobs) {
+        auto it = blob_set.begin();
+        while (it != blob_set.end()) {
+            if (deleted_blobs_when_scrubbing.find(*it) != deleted_blobs_when_scrubbing.end()) {
+                // This blob was deleted during scrubbing, remove it from missing blobs
+                it = blob_set.erase(it);
+                ++total_filtered;
+            } else {
+                ++it;
+            }
+        }
+    }
+
+    // Remove peers with no missing blobs
+    auto peer_it = missing_blobs.begin();
+    while (peer_it != missing_blobs.end()) {
+        if (peer_it->second.empty()) {
+            peer_it = missing_blobs.erase(peer_it);
+        } else {
+            ++peer_it;
+        }
+    }
+
+    if (total_filtered > 0) {
+        LOGINFOMOD(scrubmgr, "[pg={}] Filtered out {} deleted blobs from shallow scrub report", pg_id_, total_filtered);
+    }
+}
+
+void ScrubManager::DeepScrubReport::filter_out_deleted_blobs(
+    const folly::ConcurrentHashMap< BlobRoute, int64_t >& deleted_blobs_when_scrubbing) {
+    size_t total_filtered = 0;
+
+    // First filter the base class missing_blobs
+    ShallowScrubReport::filter_out_deleted_blobs(deleted_blobs_when_scrubbing);
+
+    // Filter out deleted blobs from corrupted_blobs
+    for (auto& [peer_id, blob_map] : corrupted_blobs) {
+        auto it = blob_map.begin();
+        while (it != blob_map.end()) {
+            if (deleted_blobs_when_scrubbing.find(it->first) != deleted_blobs_when_scrubbing.end()) {
+                it = blob_map.erase(it);
+                ++total_filtered;
+            } else {
+                ++it;
+            }
+        }
+    }
+
+    // Remove peers with no corrupted blobs
+    auto peer_it = corrupted_blobs.begin();
+    while (peer_it != corrupted_blobs.end()) {
+        if (peer_it->second.empty()) {
+            peer_it = corrupted_blobs.erase(peer_it);
+        } else {
+            ++peer_it;
+        }
+    }
+
+    // Filter out deleted blobs from inconsistent_blobs
+    auto blob_it = inconsistent_blobs.begin();
+    while (blob_it != inconsistent_blobs.end()) {
+        if (deleted_blobs_when_scrubbing.find(blob_it->first) != deleted_blobs_when_scrubbing.end()) {
+            blob_it = inconsistent_blobs.erase(blob_it);
+            ++total_filtered;
+        } else {
+            ++blob_it;
+        }
+    }
+
+    if (total_filtered > 0) {
+        LOGINFOMOD(scrubmgr, "[pg={}] Filtered out {} deleted blobs from deep scrub report", pg_id_, total_filtered);
+    }
+}
+
 } // namespace homeobject
\ No newline at end of file
diff --git a/src/lib/homestore_backend/scrub_manager.hpp b/src/lib/homestore_backend/scrub_manager.hpp
index 771ec5263..f6bf37276 100644
--- a/src/lib/homestore_backend/scrub_manager.hpp
+++ b/src/lib/homestore_backend/scrub_manager.hpp
@@ -260,6 +260,8 @@ class ScrubManager {
         const auto& get_missing_blobs() const { return missing_blobs; }
         virtual void merge(const std::map< peer_id_t, std::shared_ptr< BaseScrubMap > >& peer_sm_map);
         virtual void print() const;
+        virtual void
+        filter_out_deleted_blobs(const folly::ConcurrentHashMap< BlobRoute, int64_t >& deleted_blobs_when_scrubbing);
 
     public:
         std::map< peer_id_t, std::set< shard_id_t > > missing_shard_ids;
@@ -291,6 +293,8 @@ class ScrubManager {
         const auto& get_corrupted_pg_metas() const { return corrupted_pg_metas; }
         void merge(const std::map< peer_id_t, std::shared_ptr< BaseScrubMap > >& peer_sm_map) override;
         void print() const override;
+        virtual void filter_out_deleted_blobs(
+            const folly::ConcurrentHashMap< BlobRoute, int64_t >& deleted_blobs_when_scrubbing) override;
 
     private:
         std::map< peer_id_t, std::map< BlobRoute, ScrubResult > > corrupted_blobs;
@@ -389,6 +393,9 @@ class ScrubManager {
         std::vector< peer_id_t > get_peers_to_retry() const;
         void cancel();
         bool is_cancelled() const { return cancelled.load(); }
+        void add_deleted_blob(const BlobRoute& blob_route, int64_t delete_lsn) {
+            deleted_blobs_when_scrubbing_.try_emplace(blob_route, delete_lsn);
+        }
 
     public:
         uint64_t task_id{0};
@@ -397,6 +404,7 @@ class ScrubManager {
         atomic_uint64_t req_id{0};
         mutable std::mutex mtx_;
         std::map< peer_id_t, std::shared_ptr< BaseScrubMap > > peer_sm_map_;
+        folly::ConcurrentHashMap< BlobRoute, int64_t > deleted_blobs_when_scrubbing_;
 
     private:
         std::atomic_bool cancelled{false};
@@ -424,6 +432,7 @@ class ScrubManager {
     std::optional< pg_scrub_superblk > get_scrub_superblk(const pg_id_t pg_id) const;
     void save_scrub_superblk(const pg_id_t pg_id, const bool is_deep_scrub, bool force_update = true);
     void add_scrub_req(std::shared_ptr< base_scrub_req > req);
+    void add_pg_deleted_blob(const pg_id_t pg_id, const BlobRoute& blob_route, int64_t delete_lsn);
 
     /*local scrub*/
 public:

From 40463ac34020dd3f06cf26c8bb80d2dbfe77e9d6 Mon Sep 17 00:00:00 2001
From: Jie Yao <jyao3@ebaychina.com>
Date: Fri, 3 Apr 2026 14:36:55 +0800
Subject: [PATCH 4/4] fix bug and add UT

---
 .../homestore_backend/hs_shard_manager.cpp    |   2 +-
 src/lib/homestore_backend/scrub_manager.cpp   | 123 +++--
 src/lib/homestore_backend/scrub_manager.hpp   |   6 +-
 .../tests/homeobj_fixture.hpp                 |  23 +-
 .../tests/hs_scrubber_tests.cpp               | 445 +++++++++++++++++-
 5 files changed, 540 insertions(+), 59 deletions(-)

diff --git a/src/lib/homestore_backend/hs_shard_manager.cpp b/src/lib/homestore_backend/hs_shard_manager.cpp
index 387515b89..a1567849f 100644
--- a/src/lib/homestore_backend/hs_shard_manager.cpp
+++ b/src/lib/homestore_backend/hs_shard_manager.cpp
@@ -745,6 +745,7 @@ void HSHomeObject::delete_shard_from_map(shard_id_t shard_id) {
                    (shard_id >> homeobject::shard_width), (shard_id & homeobject::shard_mask));
     auto hs_shard = d_cast< HS_Shard* >((*shard_iter->second).get());
     const auto pg_id = hs_shard->info.placement_group;
+    auto p_chunk_id = hs_shard->p_chunk_id();
 
     auto hs_pg = const_cast< HS_PG* >(_get_hs_pg_unlocked(pg_id));
     RELEASE_ASSERT(hs_pg, "Missing pg info, pg={}", pg_id);
@@ -752,7 +753,6 @@ void HSHomeObject::delete_shard_from_map(shard_id_t shard_id) {
     shards.remove_if([shard_id](auto& shard_it) { return (shard_it->info).id == shard_id; });
     _shard_map.erase(shard_id);
 
-    auto p_chunk_id = hs_shard->p_chunk_id();
     chunk_to_shards_map_[p_chunk_id].erase(shard_id);
     // TODO:: delete shard meta blk
 }
diff --git a/src/lib/homestore_backend/scrub_manager.cpp b/src/lib/homestore_backend/scrub_manager.cpp
index 2d8f6947a..92fb77033 100644
--- a/src/lib/homestore_backend/scrub_manager.cpp
+++ b/src/lib/homestore_backend/scrub_manager.cpp
@@ -260,15 +260,18 @@ void ScrubManager::handle_scrub_req(std::shared_ptr< base_scrub_req > req) {
     }
 
     // 2 send scrub map back to leader
-    auto flatbuffer = scrub_map->build_flat_buffer();
+    auto flatbuffer = std::make_shared< flatbuffers::DetachedBuffer >(scrub_map->build_flat_buffer());
+    auto scrub_type_buffer = std::make_shared< SCRUB_TYPE >(scrub_type);
+
     sisl::io_blob_list_t blob_list;
-    blob_list.emplace_back(reinterpret_cast< const uint8_t* >(&scrub_type), sizeof(scrub_type), false);
-    blob_list.emplace_back(flatbuffer.data(), flatbuffer.size(), false);
+    blob_list.emplace_back(reinterpret_cast< const uint8_t* >(scrub_type_buffer.get()), sizeof(scrub_type), false);
+    blob_list.emplace_back(flatbuffer->data(), flatbuffer->size(), false);
 
     // no need to retry, leader will handle retries
     pg_repl_dev->data_request_unidirectional(remote_peer_id, HSHomeObject::PUSH_SCRUB_MAP, blob_list)
         .via(&folly::InlineExecutor::instance())
-        .thenValue([pg_id, remote_peer_id, scrub_type, task_id](auto&& response) {
+        .thenValue([pg_id, remote_peer_id, task_id, flatbuffer, scrub_type_buffer](auto&& response) {
+            const auto scrub_type = *scrub_type_buffer;
             if (response.hasError()) {
                 SCRUBLOGD(pg_id, task_id, "failed to send scrub map to peer {}, scrub_type:{}, error={}",
                           remote_peer_id, scrub_type, response.error());
@@ -528,12 +531,12 @@ folly::SemiFuture< std::shared_ptr< ScrubManager::ShallowScrubReport > >
 ScrubManager::submit_scrub_task(const pg_id_t& pg_id, const bool is_deep, const bool force,
                                 SCRUB_TRIGGER_TYPE trigger_type) {
     LOGINFOMOD(scrubmgr, "submit a scrub task for pg={}, deep_scrub:{}", pg_id, is_deep);
+
+    // Check if a scrub task is already running for this PG
+    // Note: There's still a small race window between this check and task execution in handle_pg_scrub_task,
+    // but the try_emplace there provides the final guard. This check prevents unnecessary task queueing.
     auto it = m_pg_scrub_ctx_map.find(pg_id);
     if (it != m_pg_scrub_ctx_map.end()) {
-        // TODO:: there is case that two thread try to submit scrub task for the same pg at the same time, we can
-        // optimize it by adding a lock for each pg or using atomic operation to make sure only one scrub task can be
-        // submitted for each pg, and other threads can get the existing scrub task if they want to submit another scrub
-        // task for the same pg.
         LOGWARNMOD(scrubmgr, "a scrub task is already running for pg={}, no need to submit another one!", pg_id);
         return folly::makeFuture(std::shared_ptr< ScrubManager::ShallowScrubReport >(nullptr));
     }
@@ -588,22 +591,27 @@ void ScrubManager::cancel_scrub_task(const pg_id_t& pg_id) {
 }
 
 // Helper function to send scrub requests to all peers and handle retries
-bool ScrubManager::send_scrub_req_and_wait(pg_id_t pg_id, uint64_t task_id,
+bool ScrubManager::send_scrub_req_and_wait(pg_id_t pg_id, uint64_t task_id, shared< homestore::ReplDev > pg_repl_dev,
                                            const std::unordered_set< peer_id_t >& all_member_peer_ids,
-                                           const peer_id_t& my_uuid, shared< homestore::ReplDev > pg_repl_dev,
-                                           const sisl::io_blob_list_t& req_blob_list,
-                                           std::shared_ptr< PGScrubContext > scrub_ctx, uint32_t max_retries,
-                                           std::chrono::seconds timeout, const std::string& scrub_type_name) {
+                                           const peer_id_t& my_uuid,
+                                           std::shared_ptr< flatbuffers::DetachedBuffer > flat_buffer,
+                                           SCRUB_TYPE scrub_type, std::shared_ptr< PGScrubContext > scrub_ctx,
+                                           uint32_t max_retries, std::chrono::seconds timeout) {
+    auto scrub_type_buffer = std::make_shared< SCRUB_TYPE >(scrub_type);
+    sisl::io_blob_list_t blob_list;
+    blob_list.emplace_back(reinterpret_cast< const uint8_t* >(scrub_type_buffer.get()), sizeof(scrub_type), false);
+    blob_list.emplace_back(flat_buffer->data(), flat_buffer->size(), false);
+
     // Lambda to send requests to a list of peers
-    auto send_requests_to_remote_peers = [&](const auto& peer_list, bool is_retry) {
+    auto send_requests_to_remote_peers = [&, flat_buffer, scrub_type_buffer](const auto& peer_list, bool is_retry) {
         for (const auto& peer_id : peer_list) {
             if (peer_id == my_uuid) continue;
-            pg_repl_dev->data_request_unidirectional(peer_id, HSHomeObject::PUSH_SCRUB_REQ, req_blob_list)
+            pg_repl_dev->data_request_unidirectional(peer_id, HSHomeObject::PUSH_SCRUB_REQ, blob_list)
                 .via(&folly::InlineExecutor::instance())
-                .thenValue([pg_id, peer_id, task_id, scrub_type_name, is_retry](auto&& response) {
+                .thenValue([pg_id, peer_id, task_id, flat_buffer, scrub_type_buffer, is_retry](auto&& response) {
                     if (response.hasError()) {
                         SCRUBLOGE(pg_id, task_id, "{} to send {} scrub request to peer {}",
-                                  is_retry ? "retry failed" : "failed", scrub_type_name, peer_id);
+                                  is_retry ? "retry failed" : "failed", *scrub_type_buffer, peer_id);
                     }
                 });
         }
@@ -618,7 +626,7 @@ bool ScrubManager::send_scrub_req_and_wait(pg_id_t pg_id, uint64_t task_id,
             auto peers_to_retry = scrub_ctx->get_peers_to_retry();
             if (peers_to_retry.empty()) break;
 
-            SCRUBLOGD(pg_id, task_id, "Retrying {} scrub for {} peers", scrub_type_name, peers_to_retry.size());
+            SCRUBLOGD(pg_id, task_id, "Retrying {} scrub for {} peers", scrub_type, peers_to_retry.size());
             send_requests_to_remote_peers(peers_to_retry, true);
 
             if (scrub_ctx->wait_for_all_req_sms(timeout)) break;
@@ -632,7 +640,7 @@ bool ScrubManager::send_scrub_req_and_wait(pg_id_t pg_id, uint64_t task_id,
         is_incomplete = scrub_ctx->peer_sm_map_.size() != scrub_ctx->member_peer_ids_.size();
     }
     if (scrub_ctx->is_cancelled() || is_incomplete) {
-        SCRUBLOGD(pg_id, task_id, "scrub task is cancelled or incomplete when scrubbing {}!", scrub_type_name);
+        SCRUBLOGD(pg_id, task_id, "scrub task is cancelled or incomplete when scrubbing {}!", scrub_type);
         return false;
     }
     return true;
@@ -707,6 +715,8 @@ void ScrubManager::handle_pg_scrub_task(scrub_task task) {
     RELEASE_ASSERT(all_member_peer_ids.find(my_uuid) != all_member_peer_ids.end(),
                    "my uuid={} is not in the member list of this pg, something is wrong!", my_uuid);
 
+    // Use try_emplace for atomic check-and-insert to avoid race condition
+    // This is safe because try_emplace is atomic in folly::ConcurrentHashMap
     auto [ctx_it, happened] =
         m_pg_scrub_ctx_map.try_emplace(pg_id, std::make_shared< PGScrubContext >(task_id, all_member_peer_ids));
     if (!happened) {
@@ -736,14 +746,11 @@ void ScrubManager::handle_pg_scrub_task(scrub_task task) {
         SCRUBLOGD(pg_id, task_id, "Starting PG meta scrub");
         auto pg_meta_req = std::make_shared< base_scrub_req >(task_id, scrub_ctx->req_id.fetch_add(1), scrub_lsn,
                                                               my_uuid, pg_id, true);
-        // TODO:: add a lock here to protect add_scrub_map when changing current_req.
-        scrub_ctx->current_req = pg_meta_req;
-        // Send requests to all peers
-        auto flatbuffer = pg_meta_req->build_flat_buffer();
-        sisl::io_blob_list_t req_blob_list;
-        const auto scrub_type = SCRUB_TYPE::PG_META;
-        req_blob_list.emplace_back(reinterpret_cast< const uint8_t* >(&scrub_type), sizeof(scrub_type), false);
-        req_blob_list.emplace_back(flatbuffer.data(), flatbuffer.size(), false);
+        // Protect current_req update with lock to avoid race with add_scrub_map
+        {
+            std::lock_guard lock(scrub_ctx->mtx_);
+            scrub_ctx->current_req = pg_meta_req;
+        }
 
         // Scrub locally async (runs in parallel with remote requests)
         m_scrub_req_executor->add([this, pg_meta_req, scrub_ctx, pg_id, task_id]() {
@@ -756,8 +763,10 @@ void ScrubManager::handle_pg_scrub_task(scrub_task task) {
         });
 
         // Send requests to all peers and wait for responses
-        if (!send_scrub_req_and_wait(pg_id, task_id, all_member_peer_ids, my_uuid, pg_repl_dev, req_blob_list,
-                                     scrub_ctx, MAX_RETRIES, SM_REQUEST_TIMEOUT, "PG meta")) {
+        auto flatbuffer = std::make_shared< flatbuffers::DetachedBuffer >(pg_meta_req->build_flat_buffer());
+        const auto scrub_type = SCRUB_TYPE::PG_META;
+        if (!send_scrub_req_and_wait(pg_id, task_id, pg_repl_dev, all_member_peer_ids, my_uuid, flatbuffer, scrub_type,
+                                     scrub_ctx, MAX_RETRIES, SM_REQUEST_TIMEOUT)) {
             return;
         }
 
@@ -791,17 +800,21 @@ void ScrubManager::handle_pg_scrub_task(scrub_task task) {
 
         // Scrub shard range
         uint64_t shard_start = 0;
-        uint64_t shard_end = shard_scrub_range_size;
         uint64_t shard_range_count = 0;
-        for (; shard_start <= last_shard_id;
-             shard_start = shard_end + 1, shard_end = std::min(shard_end + shard_scrub_range_size, last_shard_id)) {
+        while (shard_start <= last_shard_id) {
+            // Calculate end of current range (inclusive), ensuring it doesn't exceed last_shard_id
+            uint64_t shard_end = std::min(shard_start + shard_scrub_range_size - 1, last_shard_id);
             ++shard_range_count;
             SCRUBLOGD(pg_id, task_id, "Scrubbing shard range {}: [{}, {}]", shard_range_count, shard_start, shard_end);
 
             auto shard_req = std::make_shared< shard_scrub_req >(task_id, scrub_ctx->req_id.fetch_add(1), scrub_lsn,
                                                                  my_uuid, pg_id, shard_start, shard_end, is_deep_scrub);
             scrub_ctx->reset_for_new_req();
-            scrub_ctx->current_req = shard_req;
+            // Protect current_req update with lock to avoid race with add_scrub_map
+            {
+                std::lock_guard lock(scrub_ctx->mtx_);
+                scrub_ctx->current_req = shard_req;
+            }
 
             // scrub locally async (runs in parallel with remote requests)
             m_scrub_req_executor->add([this, shard_req, scrub_ctx, pg_id, task_id, is_deep_scrub]() {
@@ -815,23 +828,23 @@ void ScrubManager::handle_pg_scrub_task(scrub_task task) {
             });
 
             // request remote peers to scrub this shard range and wait for responses
-            auto flatbuffer = shard_req->build_flat_buffer();
-            sisl::io_blob_list_t req_blob_list;
+            auto flatbuffer = std::make_shared< flatbuffers::DetachedBuffer >(shard_req->build_flat_buffer());
             const auto scrub_type = is_deep_scrub ? SCRUB_TYPE::DEEP_SHARD : SCRUB_TYPE::SHALLOW_SHARD;
-            req_blob_list.emplace_back(reinterpret_cast< const uint8_t* >(&scrub_type), sizeof(scrub_type), false);
-            req_blob_list.emplace_back(flatbuffer.data(), flatbuffer.size(), false);
 
-            if (!send_scrub_req_and_wait(pg_id, task_id, all_member_peer_ids, my_uuid, pg_repl_dev, req_blob_list,
-                                         scrub_ctx, MAX_RETRIES, SM_REQUEST_TIMEOUT, "shard")) {
+            if (!send_scrub_req_and_wait(pg_id, task_id, pg_repl_dev, all_member_peer_ids, my_uuid, flatbuffer,
+                                         scrub_type, scrub_ctx, MAX_RETRIES, SM_REQUEST_TIMEOUT)) {
                 SCRUBLOGE(pg_id, task_id, "shard scrub failed or was cancelled");
                 return;
             }
 
             SCRUBLOGD(pg_id, task_id, "Merging shard scrub results for range [{}, {}]", shard_start, shard_end);
             {
-                std::lock_guard< std::mutex > lock(scrub_ctx->mtx_);
+                std::lock_guard lock(scrub_ctx->mtx_);
                 pg_scrub_report->merge(scrub_ctx->peer_sm_map_);
             }
+
+            // Move to next range
+            shard_start = shard_end + 1;
         }
         SCRUBLOGD(pg_id, task_id, "shard scrub completed, total ranges scrubbed: {}", shard_range_count);
     }
@@ -853,17 +866,21 @@ void ScrubManager::handle_pg_scrub_task(scrub_task task) {
 
         // Scrub blob range
         uint64_t blob_start = 0;
-        uint64_t blob_end = blob_scrub_range_size;
         uint64_t blob_range_count = 0;
-        for (; blob_start <= last_blob_id;
-             blob_start = blob_end + 1, blob_end = std::min(blob_end + blob_scrub_range_size, last_blob_id)) {
+        while (blob_start <= last_blob_id) {
+            // Calculate end of current range (inclusive), ensuring it doesn't exceed last_blob_id
+            uint64_t blob_end = std::min(blob_start + blob_scrub_range_size - 1, last_blob_id);
             ++blob_range_count;
             SCRUBLOGD(pg_id, task_id, "Scrubbing blob range {}: [{}, {}]", blob_range_count, blob_start, blob_end);
 
             auto blob_req = std::make_shared< blob_scrub_req >(task_id, scrub_ctx->req_id.fetch_add(1), scrub_lsn,
                                                                my_uuid, pg_id, blob_start, blob_end, is_deep_scrub);
             scrub_ctx->reset_for_new_req();
-            scrub_ctx->current_req = blob_req;
+            // Protect current_req update with lock to avoid race with add_scrub_map
+            {
+                std::lock_guard< std::mutex > lock(scrub_ctx->mtx_);
+                scrub_ctx->current_req = blob_req;
+            }
 
             // locally scrub this blob range async (runs in parallel with remote requests)
             m_scrub_req_executor->add([this, blob_req, scrub_ctx, pg_id, task_id, is_deep_scrub]() {
@@ -877,14 +894,10 @@ void ScrubManager::handle_pg_scrub_task(scrub_task task) {
             });
 
             // request remote peers to scrub this blob range and wait for responses
-            auto flatbuffer = blob_req->build_flat_buffer();
-            sisl::io_blob_list_t req_blob_list;
+            auto flatbuffer = std::make_shared< flatbuffers::DetachedBuffer >(blob_req->build_flat_buffer());
             const auto scrub_type = is_deep_scrub ? SCRUB_TYPE::DEEP_BLOB : SCRUB_TYPE::SHALLOW_BLOB;
-            req_blob_list.emplace_back(reinterpret_cast< const uint8_t* >(&scrub_type), sizeof(scrub_type), false);
-            req_blob_list.emplace_back(flatbuffer.data(), flatbuffer.size(), false);
-
-            if (!send_scrub_req_and_wait(pg_id, task_id, all_member_peer_ids, my_uuid, pg_repl_dev, req_blob_list,
-                                         scrub_ctx, MAX_RETRIES, SM_REQUEST_TIMEOUT, "blob")) {
+            if (!send_scrub_req_and_wait(pg_id, task_id, pg_repl_dev, all_member_peer_ids, my_uuid, flatbuffer,
+                                         scrub_type, scrub_ctx, MAX_RETRIES, SM_REQUEST_TIMEOUT)) {
                 SCRUBLOGE(pg_id, task_id, "blob scrub failed or was cancelled");
                 return;
             }
@@ -894,6 +907,9 @@ void ScrubManager::handle_pg_scrub_task(scrub_task task) {
                 std::lock_guard< std::mutex > lock(scrub_ctx->mtx_);
                 pg_scrub_report->merge(scrub_ctx->peer_sm_map_);
             }
+
+            // Move to next range
+            blob_start = blob_end + 1;
         }
         SCRUBLOGD(pg_id, task_id, "blob scrub completed, total ranges scrubbed: {}", blob_range_count);
     }
@@ -901,6 +917,11 @@ void ScrubManager::handle_pg_scrub_task(scrub_task task) {
     // only if pg is successfully scrubbed, we persist scrub metablk.
     save_scrub_superblk(pg_id, is_deep_scrub, true);
     SCRUBLOGD(pg_id, task_id, "successfully complete {} scrub task!", is_deep_scrub ? "deep" : "shallow");
+
+#ifdef _PRERELEASE
+    // Trigger the callback flip to delete missing blob during scrub if enabled
+    iomgr_flip::instance()->callback_flip("delete_missing_blob_through_raft");
+#endif
 }
 
 void ScrubManager::add_pg(const pg_id_t pg_id) {
diff --git a/src/lib/homestore_backend/scrub_manager.hpp b/src/lib/homestore_backend/scrub_manager.hpp
index f6bf37276..c4e79d46b 100644
--- a/src/lib/homestore_backend/scrub_manager.hpp
+++ b/src/lib/homestore_backend/scrub_manager.hpp
@@ -445,11 +445,11 @@ class ScrubManager {
     void scan_pg_for_scrub();
     void handle_pg_scrub_task(scrub_task task);
 
-    bool send_scrub_req_and_wait(pg_id_t pg_id, uint64_t task_id,
+    bool send_scrub_req_and_wait(pg_id_t pg_id, uint64_t task_id, shared< homestore::ReplDev > pg_repl_dev,
                                  const std::unordered_set< peer_id_t >& all_member_peer_ids, const peer_id_t& my_uuid,
-                                 shared< homestore::ReplDev > pg_repl_dev, const sisl::io_blob_list_t& req_blob_list,
+                                 std::shared_ptr< flatbuffers::DetachedBuffer > flat_buffer, SCRUB_TYPE scrub_type,
                                  std::shared_ptr< PGScrubContext > scrub_ctx, uint32_t max_retries,
-                                 std::chrono::seconds timeout, const std::string& scrub_type_name);
+                                 std::chrono::seconds timeout);
 
     bool is_eligible_for_deep_scrub(const pg_id_t& pg_id);
     bool is_eligible_for_shallow_scrub(const pg_id_t& pg_id);
diff --git a/src/lib/homestore_backend/tests/homeobj_fixture.hpp b/src/lib/homestore_backend/tests/homeobj_fixture.hpp
index 499968ab3..955884847 100644
--- a/src/lib/homestore_backend/tests/homeobj_fixture.hpp
+++ b/src/lib/homestore_backend/tests/homeobj_fixture.hpp
@@ -49,7 +49,7 @@ class HomeObjectFixture : public ::testing::Test {
 
         HSHomeObject::_hs_chunk_size = SISL_OPTIONS["chunk_size"].as< uint64_t >() * Mi;
         _obj_inst = std::dynamic_pointer_cast< HSHomeObject >(g_helper->build_new_homeobject());
-        
+
         // Used to export metrics, it should be called after init_homeobject
         if (SISL_OPTIONS["enable_http"].as< bool >()) { g_helper->app->start_http_server(); }
         if (!g_helper->is_current_testcase_restarted()) {
@@ -906,6 +906,27 @@ class HomeObjectFixture : public ::testing::Test {
         LOGINFO("Flip {} set", flip_name);
     }
 
+    void set_callback_flip(const std::string flip_name, std::function< void() > callback, uint32_t count = 1,
+                           uint32_t percent = 100) {
+        flip::FlipCondition null_cond;
+        flip::FlipFrequency freq;
+        freq.set_count(count);
+        freq.set_percent(percent);
+        m_fc.inject_callback_flip(flip_name, {null_cond}, freq, callback);
+        LOGINFO("Flip {} with callback set", flip_name);
+    }
+
+    template < typename T >
+    void set_callback_retval_flip(const std::string flip_name, std::function< T() > callback, uint32_t count = 1,
+                                  uint32_t percent = 100) {
+        flip::FlipCondition null_cond;
+        flip::FlipFrequency freq;
+        freq.set_count(count);
+        freq.set_percent(percent);
+        ASSERT_TRUE(m_fc.inject_callback_retval_flip(flip_name, {null_cond}, freq, callback));
+        LOGINFO("Flip {} with callback retval set", flip_name);
+    }
+
     void remove_flip(const std::string flip_name) {
         m_fc.remove_flip(flip_name);
         LOGINFO("Flip {} removed", flip_name);
diff --git a/src/lib/homestore_backend/tests/hs_scrubber_tests.cpp b/src/lib/homestore_backend/tests/hs_scrubber_tests.cpp
index ab7636180..7b6881e37 100644
--- a/src/lib/homestore_backend/tests/hs_scrubber_tests.cpp
+++ b/src/lib/homestore_backend/tests/hs_scrubber_tests.cpp
@@ -247,8 +247,6 @@ TEST_F(HomeObjectFixture, BasicScrubTest) {
             << "Empty PG should have no missing shards in shallow scrub";
     });
 
-    g_helper->sync();
-
     // Create blobs in all shards
     shard_blob_ids_map = put_blobs(pg_shard_id_vec, num_blobs_per_shard, pg_blob_id);
     LOGINFO("Created {} blobs per shard, total {} blobs", num_blobs_per_shard, num_shards * num_blobs_per_shard);
@@ -289,6 +287,7 @@ TEST_F(HomeObjectFixture, BasicScrubTest) {
     });
 
     g_helper->sync();
+
     const auto hs_pg = _obj_inst->get_hs_pg(pg_id);
     ASSERT_TRUE(hs_pg) << "PG should exist for pg_id=" << pg_id;
 
@@ -526,7 +525,6 @@ TEST_F(HomeObjectFixture, ScrubSuperblockPersistenceTest) {
 
     const uint64_t shard_size = 64 * Mi;
     create_shard(pg_id, shard_size, "shard_meta");
-
     auto scrub_mgr = _obj_inst->scrub_manager();
 
     run_on_pg_leader(pg_id, [&]() {
@@ -565,5 +563,446 @@ TEST_F(HomeObjectFixture, ScrubSuperblockPersistenceTest) {
             << "Shallow scrub timestamp should be updated";
     });
 
+    g_helper->sync();
+}
+
+// Test cancel scrub task
+TEST_F(HomeObjectFixture, CancelScrubTaskTest) {
+    const pg_id_t pg_id = 1;
+    create_pg(pg_id);
+    auto scrub_mgr = _obj_inst->scrub_manager();
+
+    const uint64_t shard_size = 64 * Mi;
+    auto shard_info = create_shard(pg_id, shard_size, "shard meta");
+
+    std::map< pg_id_t, std::vector< shard_id_t > > pg_shard_id_vec;
+    std::map< pg_id_t, blob_id_t > pg_blob_id;
+    pg_shard_id_vec[pg_id].push_back(shard_info.id);
+    pg_blob_id[pg_id] = 0;
+
+    const uint64_t num_blobs = 10;
+    put_blobs(pg_shard_id_vec, num_blobs, pg_blob_id);
+    g_helper->sync();
+
+    // Submit a scrub task and then cancel it
+    run_on_pg_leader(pg_id, [&]() {
+        auto scrub_future = scrub_mgr->submit_scrub_task(pg_id, true, false, SCRUB_TRIGGER_TYPE::MANUALLY);
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+        scrub_mgr->cancel_scrub_task(pg_id);
+        LOGINFO("Cancelled scrub task for pg={}", pg_id);
+        auto scrub_report = std::move(scrub_future).get();
+
+        // The report might be null or have partial results due to cancellation
+        // We just verify that cancel doesn't cause crash
+        LOGINFO("Scrub task cancelled, report: {}", scrub_report ? "present" : "null");
+    });
+
+    // Test canceling when no task is running - should not crash
+    run_on_pg_leader(pg_id, [&]() {
+        scrub_mgr->cancel_scrub_task(pg_id);
+        LOGINFO("Cancel non-existent scrub task for pg={} - should not crash", pg_id);
+    });
+
+    g_helper->sync();
+}
+
+// Test concurrent scrubs on multiple PGs
+TEST_F(HomeObjectFixture, ConcurrentScrubsOnMultiplePGsTest) {
+    const uint64_t num_pgs = 3;
+    const uint64_t shard_size = 64 * Mi;
+
+    std::vector< pg_id_t > pg_ids;
+    std::map< pg_id_t, std::vector< shard_id_t > > pg_shard_id_vec;
+    std::map< pg_id_t, blob_id_t > pg_blob_id;
+
+    // Create multiple PGs with shards and blobs
+    for (uint64_t i = 1; i <= num_pgs; ++i) {
+        pg_id_t pg_id = i;
+        pg_ids.push_back(pg_id);
+        create_pg(pg_id);
+        auto shard_info = create_shard(pg_id, shard_size, "shard meta " + std::to_string(pg_id));
+        pg_shard_id_vec[pg_id].push_back(shard_info.id);
+        pg_blob_id[pg_id] = 0;
+        put_blobs(pg_shard_id_vec, 5, pg_blob_id);
+    }
+
+    auto scrub_mgr = _obj_inst->scrub_manager();
+
+    // Submit scrub tasks for all PGs concurrently
+    std::vector< folly::SemiFuture< std::shared_ptr< ScrubManager::ShallowScrubReport > > > scrub_futures;
+
+    for (const auto& pg_id : pg_ids) {
+        run_on_pg_leader(pg_id, [&]() {
+            auto future = scrub_mgr->submit_scrub_task(pg_id, true, false, SCRUB_TRIGGER_TYPE::MANUALLY);
+            scrub_futures.push_back(std::move(future));
+            LOGINFO("Submitted deep scrub for pg={}", pg_id);
+        });
+    }
+
+    // Wait for all scrub tasks to complete
+    for (size_t i = 0; i < scrub_futures.size(); ++i) {
+        auto report = std::move(scrub_futures[i]).get();
+        if (report) {
+            LOGINFO("PG {} scrub completed, report present", pg_ids[i]);
+        } else {
+            LOGWARN("PG {} scrub returned null report", pg_ids[i]);
+        }
+    }
+
+    g_helper->sync();
+}
+
+// Test deleted blob filter in scrub report
+TEST_F(HomeObjectFixture, DeletedBlobFilterTest) {
+    const pg_id_t pg_id = 1;
+    create_pg(pg_id);
+    auto scrub_mgr = _obj_inst->scrub_manager();
+
+    const uint64_t shard_size = 64 * Mi;
+    auto shard_info = create_shard(pg_id, shard_size, "shard meta");
+
+    std::map< pg_id_t, std::vector< shard_id_t > > pg_shard_id_vec;
+    std::map< pg_id_t, blob_id_t > pg_blob_id;
+    pg_shard_id_vec[pg_id].push_back(shard_info.id);
+    pg_blob_id[pg_id] = 0;
+
+    std::map< shard_id_t, std::map< blob_id_t, uint64_t > > shard_blob_ids_map;
+
+    // Create some blobs
+    const uint64_t num_blobs = 10;
+    shard_blob_ids_map = put_blobs(pg_shard_id_vec, num_blobs, pg_blob_id);
+    const auto hs_pg = _obj_inst->get_hs_pg(pg_id);
+    ASSERT_TRUE(hs_pg) << "PG should exist for pg_id=" << pg_id;
+
+    const auto shard_id = shard_info.id;
+    auto& shard_blobs = shard_blob_ids_map[shard_id];
+
+    // Select blobs to test:
+    // - missing_blob_to_delete: will be missing from leader index AND deleted via blob delete
+    // - missing_blob_not_deleted: will be missing from leader index but NOT deleted
+    auto it = shard_blobs.begin();
+    const auto missing_blob_to_delete = it->first;       // First blob: will be deleted via blob delete
+    const auto missing_blob_not_deleted = (++it)->first; // Second blob: will NOT be deleted
+
+    // Delete both blobs from index table to simulate missing blobs on followers
+    run_on_pg_follower(pg_id, [&]() {
+        auto& pg_index_table = hs_pg->index_table_;
+        delete_blob_from_index(pg_index_table, shard_id, missing_blob_to_delete);
+        delete_blob_from_index(pg_index_table, shard_id, missing_blob_not_deleted);
+        LOGINFO("Deleted blobs {} and {} from leader index table", missing_blob_to_delete, missing_blob_not_deleted);
+    });
+
+    g_helper->sync();
+
+    run_on_pg_leader(pg_id, [&]() {
+        // only the blob that was deleted via blob delete should be filtered out, the other missing blob should be
+        // reported in the scrub report
+        std::set< peer_id_t > follower_peer_ids;
+        const auto& leader_uuid = _obj_inst->our_uuid();
+        const auto& members = (hs_pg->pg_info_).members;
+        for (const auto& member : members) {
+            if (member.id == leader_uuid) { continue; }
+            follower_peer_ids.insert(member.id);
+        }
+
+        auto scrub_report =
+            scrub_mgr->submit_scrub_task(pg_id, false /* shallow */, false /* force */, SCRUB_TRIGGER_TYPE::MANUALLY)
+                .get();
+
+        auto missing_blobs = scrub_report->get_missing_blobs();
+        for (const auto& peer_id : follower_peer_ids) {
+            auto it = missing_blobs.find(peer_id);
+            ASSERT_TRUE(it != missing_blobs.end()) << "Missing blob for follower should be reported in scrub report";
+            EXPECT_TRUE(it->second.size() == 2) << "There should be two missing blobs for leader in scrub report";
+            EXPECT_TRUE(it->second.count(BlobRoute{shard_id, missing_blob_to_delete}) == 1)
+                << "The missing blob that will be deleted should be reported in scrub report";
+            EXPECT_TRUE(it->second.count(BlobRoute{shard_id, missing_blob_not_deleted}) == 1)
+                << "The missing blob that will NOT be deleted should be reported in scrub report";
+        }
+
+#ifdef _PRERELEASE
+        set_callback_flip(
+            "delete_missing_blob_through_raft", std::function< void() >([this, missing_blob_to_delete, shard_id]() {
+                auto ret =
+                    _obj_inst->blob_manager()->del(shard_id, missing_blob_to_delete, generateRandomTraceId()).get();
+                LOGINFO("Blob delete via callback flip completed, ret={}", ret.hasValue());
+            }));
+
+        scrub_report =
+            scrub_mgr->submit_scrub_task(pg_id, false /* shallow */, false /* force */, SCRUB_TRIGGER_TYPE::MANUALLY)
+                .get();
+
+        remove_flip("delete_missing_blob_through_raft");
+
+        // Verify the scrub report
+        ASSERT_NE(scrub_report, nullptr) << "Scrub report should not be null";
+
+        missing_blobs = scrub_report->get_missing_blobs();
+        for (const auto& peer_id : follower_peer_ids) {
+            auto it = missing_blobs.find(peer_id);
+            ASSERT_TRUE(it != missing_blobs.end()) << "Missing blob for follower should be reported in scrub report";
+            EXPECT_TRUE(it->second.size() == 1) << "There should be one missing blob for leader in scrub report";
+            EXPECT_TRUE(it->second.count(BlobRoute{shard_id, missing_blob_not_deleted}) == 1)
+                << "The missing blob that was not deleted should be reported in scrub report";
+        }
+#endif
+    });
+
+    g_helper->sync();
+    LOGINFO("DeletedBlobFilterTest completed successfully");
+}
+
+// Test add and remove PG from scrub manager
+TEST_F(HomeObjectFixture, AddRemovePGScrubTest) {
+    const pg_id_t pg_id = 1;
+    const uint64_t shard_size = 64 * Mi;
+
+    // Create PG and verify scrub superblock is created
+    create_pg(pg_id);
+    create_shard(pg_id, shard_size, "shard meta");
+
+    auto scrub_mgr = _obj_inst->scrub_manager();
+
+    // Verify scrub superblock exists
+    run_on_pg_leader(pg_id, [&]() {
+        auto sb = scrub_mgr->get_scrub_superblk(pg_id);
+        ASSERT_TRUE(sb.has_value()) << "Scrub superblock should exist after PG creation";
+        LOGINFO("Scrub superblock created for pg={}", pg_id);
+    });
+
+    // Run a scrub to update timestamps
+    run_on_pg_leader(pg_id, [&]() {
+        // Get initial timestamp before scrub
+        auto sb_before = scrub_mgr->get_scrub_superblk(pg_id);
+        ASSERT_TRUE(sb_before.has_value()) << "Scrub superblock should exist before scrub";
+        uint64_t timestamp_before = sb_before->last_shallow_scrub_timestamp;
+        LOGINFO("Timestamp before scrub: {}", timestamp_before);
+
+        // Wait a bit to ensure timestamp will be different
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+
+        auto report = scrub_mgr->submit_scrub_task(pg_id, false, false, SCRUB_TRIGGER_TYPE::MANUALLY).get();
+        ASSERT_NE(report, nullptr) << "Scrub report should not be null";
+
+        // Verify timestamp was updated after scrub
+        auto sb_after = scrub_mgr->get_scrub_superblk(pg_id);
+        ASSERT_TRUE(sb_after.has_value()) << "Scrub superblock should exist after scrub";
+        uint64_t timestamp_after = sb_after->last_shallow_scrub_timestamp;
+        EXPECT_GT(timestamp_after, timestamp_before) << "Shallow scrub timestamp should be updated after scrub";
+        LOGINFO("Timestamp after scrub: {} (updated from {})", timestamp_after, timestamp_before);
+    });
+
+    // Now delete the PG - this should cancel any running scrub and remove superblock
+    run_on_pg_leader(pg_id, [&]() {
+        _obj_inst->pg_manager()->destroy_pg(pg_id);
+        LOGINFO("Deleted pg={}", pg_id);
+    });
+
+    // Run a scrub to update timestamps
+    run_on_pg_leader(pg_id, [&]() {
+        auto report = scrub_mgr->submit_scrub_task(pg_id, false, false, SCRUB_TRIGGER_TYPE::MANUALLY).get();
+        ASSERT_EQ(report, nullptr) << "Scrub report should be null after PG deletion";
+        LOGINFO("Scrub task for deleted pg={} returned null report as expected", pg_id);
+    });
+
+    // Wait for PG to be deleted
+    std::this_thread::sleep_for(std::chrono::seconds(2));
+    g_helper->sync();
+
+    // Verify scrub superblock is cleaned up - get_scrub_superblk should return nullopt
+    // Note: This might not be directly testable without internal access, so we just verify no crash
+    LOGINFO("PG deleted, scrub manager should have cleaned up");
+}
+
+// Test local scrub methods
+TEST_F(HomeObjectFixture, LocalScrubMethodsTest) {
+    const pg_id_t pg_id = 1;
+    create_pg(pg_id);
+    auto scrub_mgr = _obj_inst->scrub_manager();
+
+    const uint64_t shard_size = 64 * Mi;
+    auto shard_info = create_shard(pg_id, shard_size, "shard meta");
+
+    std::map< pg_id_t, std::vector< shard_id_t > > pg_shard_id_vec;
+    std::map< pg_id_t, blob_id_t > pg_blob_id;
+    pg_shard_id_vec[pg_id].push_back(shard_info.id);
+    pg_blob_id[pg_id] = 0;
+
+    // Create blobs first
+    const uint64_t num_blobs = 10;
+    auto shard_blob_ids_map = put_blobs(pg_shard_id_vec, num_blobs, pg_blob_id);
+    LOGINFO("Created {} blobs for local scrub test", num_blobs);
+
+    g_helper->sync();
+
+    const auto hs_pg = _obj_inst->get_hs_pg(pg_id);
+    ASSERT_TRUE(hs_pg) << "PG should exist for pg_id=" << pg_id;
+
+    const auto shard_id = shard_info.id;
+    auto& shard_blobs = shard_blob_ids_map[shard_id];
+
+    // Select blobs to corrupt
+    auto it = shard_blobs.begin();
+    const auto corrupted_blob_id = it->first;
+
+    // Corrupt blobs on the local node
+    run_on_pg_leader(pg_id, [&]() {
+        auto& pg_index_table = hs_pg->index_table_;
+
+        // Make corrupted_blob_id corrupted (corrupt data)
+        corrupt_blob_data(pg_index_table, shard_id, corrupted_blob_id);
+        LOGINFO("Corrupted blob {} on leader", corrupted_blob_id);
+    });
+
+    g_helper->sync();
+
+    run_on_pg_leader(pg_id, [&]() {
+        // Create a shard scrub request
+        auto shard_req =
+            std::make_shared< ScrubManager::shard_scrub_req >(1, 1, 0, _obj_inst->our_uuid(), pg_id, 0, 100, false);
+
+        // Test local_scrub_shard (shallow)
+        auto shallow_shard_map = scrub_mgr->local_scrub_shard(shard_req);
+        ASSERT_NE(shallow_shard_map, nullptr);
+        EXPECT_EQ(shallow_shard_map->get_scrub_type(), SCRUB_TYPE::SHALLOW_SHARD);
+        LOGINFO("Shallow shard scrub returned map with {} shards", shallow_shard_map->shards.size());
+
+        // Create a deep shard scrub request
+        auto deep_shard_req =
+            std::make_shared< ScrubManager::shard_scrub_req >(1, 1, 0, _obj_inst->our_uuid(), pg_id, 0, 100, true);
+
+        // Test local_scrub_shard (deep)
+        auto deep_shard_map = scrub_mgr->local_scrub_shard(deep_shard_req);
+        ASSERT_NE(deep_shard_map, nullptr);
+        EXPECT_EQ(deep_shard_map->get_scrub_type(), SCRUB_TYPE::DEEP_SHARD);
+        LOGINFO("Deep shard scrub returned map with {} shards", deep_shard_map->shards.size());
+
+        // Test scrub_pg_meta
+        auto pg_meta_req =
+            std::make_shared< ScrubManager::base_scrub_req >(1, 1, 0, _obj_inst->our_uuid(), pg_id, true);
+        auto pg_meta_map = scrub_mgr->scrub_pg_meta(pg_meta_req);
+        ASSERT_NE(pg_meta_map, nullptr);
+        EXPECT_EQ(pg_meta_map->get_scrub_type(), SCRUB_TYPE::PG_META);
+        LOGINFO("PG meta scrub completed");
+
+        // Test local_scrub_blob (shallow)
+        auto shallow_blob_req =
+            std::make_shared< ScrubManager::blob_scrub_req >(1, 1, 0, _obj_inst->our_uuid(), pg_id, 0, 100, false);
+        auto shallow_blob_map = scrub_mgr->local_scrub_blob(shallow_blob_req);
+        // May be null if no blobs exist in range
+        if (shallow_blob_map) {
+            EXPECT_EQ(shallow_blob_map->get_scrub_type(), SCRUB_TYPE::SHALLOW_BLOB);
+            LOGINFO("Shallow blob scrub completed");
+        }
+
+        // Test local_scrub_blob (deep) - should detect corrupted and inconsistent blobs
+        auto deep_blob_req =
+            std::make_shared< ScrubManager::blob_scrub_req >(1, 1, 0, _obj_inst->our_uuid(), pg_id, 0, 100, true);
+        auto deep_blob_map = scrub_mgr->local_scrub_blob(deep_blob_req);
+        ASSERT_NE(deep_blob_map, nullptr);
+        EXPECT_EQ(deep_blob_map->get_scrub_type(), SCRUB_TYPE::DEEP_BLOB);
+        auto deep_blob_map_cast = std::dynamic_pointer_cast< ScrubManager::DeepBlobScrubMap >(deep_blob_map);
+        LOGINFO("Deep blob scrub completed, found {} blobs", deep_blob_map_cast->blobs.size());
+
+        // Check for corrupted blob
+        auto corrupted_it = deep_blob_map_cast->blobs.find(BlobRoute{shard_id, corrupted_blob_id});
+        EXPECT_TRUE(corrupted_it != deep_blob_map_cast->blobs.end()) << "Corrupted blob should be in deep scrub result";
+        if (corrupted_it != deep_blob_map_cast->blobs.end()) {
+            auto result = std::get_if< ScrubResult >(&corrupted_it->second);
+            ASSERT_TRUE(result != nullptr) << "Corrupted blob result should be ScrubResult";
+            EXPECT_EQ(*result, ScrubResult::MISMATCH) << "Corrupted blob should have MISMATCH result";
+            LOGINFO("Deep scrub correctly detected corrupted blob {}", corrupted_blob_id);
+        }
+    });
+
+    g_helper->sync();
+}
+
+// Test scrub request serialization and deserialization
+TEST_F(HomeObjectFixture, ScrubRequestSerializationTest) {
+    const pg_id_t pg_id = 1;
+    create_pg(pg_id);
+    auto scrub_mgr = _obj_inst->scrub_manager();
+
+    const uint64_t shard_size = 64 * Mi;
+    create_shard(pg_id, shard_size, "shard meta");
+    run_on_pg_leader(pg_id, [&]() {
+        auto my_uuid = _obj_inst->our_uuid();
+
+        // Test base_scrub_req serialization
+        {
+            auto req = std::make_shared< ScrubManager::base_scrub_req >(1, 1, 100, my_uuid, pg_id, true);
+
+            // Serialize
+            auto buffer = req->build_flat_buffer();
+            EXPECT_GT(buffer.size(), 0) << "Serialized buffer should not be empty";
+
+            // Deserialize
+            auto req_loaded = std::make_shared< ScrubManager::base_scrub_req >();
+            bool load_success = req_loaded->load(buffer.data(), buffer.size());
+            EXPECT_TRUE(load_success) << "Deserialization should succeed";
+
+            // Verify fields
+            EXPECT_EQ(req_loaded->pg_id, pg_id);
+            EXPECT_EQ(req_loaded->task_id, 1);
+            EXPECT_EQ(req_loaded->req_id, 1);
+            EXPECT_EQ(req_loaded->scrub_lsn, 100);
+
+            LOGINFO("base_scrub_req serialization test passed");
+        }
+
+        // Test blob_scrub_req serialization
+        {
+            auto req = std::make_shared< ScrubManager::blob_scrub_req >(1, 2, 200, my_uuid, pg_id, 100, 200, true);
+
+            // Serialize
+            auto buffer = req->build_flat_buffer();
+            EXPECT_GT(buffer.size(), 0);
+
+            // Deserialize
+            auto req_loaded = std::make_shared< ScrubManager::blob_scrub_req >();
+            bool load_success = req_loaded->load(buffer.data(), buffer.size());
+            EXPECT_TRUE(load_success);
+
+            // Verify fields
+            EXPECT_EQ(req_loaded->pg_id, pg_id);
+            EXPECT_EQ(req_loaded->task_id, 1);
+            EXPECT_EQ(req_loaded->req_id, 2);
+            EXPECT_EQ(req_loaded->scrub_lsn, 200);
+            EXPECT_EQ(req_loaded->start, 100);
+            EXPECT_EQ(req_loaded->end, 200);
+            EXPECT_TRUE(req_loaded->is_deep_scrub());
+            EXPECT_EQ(req_loaded->get_scrub_type(), SCRUB_TYPE::DEEP_BLOB);
+
+            LOGINFO("blob_scrub_req serialization test passed");
+        }
+
+        // Test shard_scrub_req serialization
+        {
+            auto req = std::make_shared< ScrubManager::shard_scrub_req >(1, 3, 300, my_uuid, pg_id, 0, 100, false);
+
+            // Serialize
+            auto buffer = req->build_flat_buffer();
+            EXPECT_GT(buffer.size(), 0);
+
+            // Deserialize
+            auto req_loaded = std::make_shared< ScrubManager::shard_scrub_req >();
+            bool load_success = req_loaded->load(buffer.data(), buffer.size());
+            EXPECT_TRUE(load_success);
+
+            // Verify fields
+            EXPECT_EQ(req_loaded->pg_id, pg_id);
+            EXPECT_EQ(req_loaded->task_id, 1);
+            EXPECT_EQ(req_loaded->req_id, 3);
+            EXPECT_EQ(req_loaded->scrub_lsn, 300);
+            EXPECT_EQ(req_loaded->start, 0);
+            EXPECT_EQ(req_loaded->end, 100);
+            EXPECT_FALSE(req_loaded->is_deep_scrub());
+            EXPECT_EQ(req_loaded->get_scrub_type(), SCRUB_TYPE::SHALLOW_SHARD);
+
+            LOGINFO("shard_scrub_req serialization test passed");
+        }
+    });
+
     g_helper->sync();
 }
\ No newline at end of file