From 98c43256eae708c582b6c51e66fe460dafae051a Mon Sep 17 00:00:00 2001 From: root Date: Sat, 16 May 2026 19:53:24 +0800 Subject: [PATCH] [fix](fe) Fix stale timestamp in CatalogRecycleBin erase daemon ### What problem does this PR solve? Issue Number: close #xxx Problem Summary: In CatalogRecycleBin.runAfterCatalogReady(), a single currentTimeMs timestamp is captured at the start and shared across erasePartition, eraseTable, and eraseDatabase. Since each erase operation may take significant time (I/O, log writes, lock acquisition), the subsequent erase methods use a stale timestamp for expiry checks, causing cleanup delay for tables and databases. ### Release note Fix stale timestamp issue in CatalogRecycleBin that causes delayed cleanup of tables and databases in the recycle bin. ### Check List (For Author) - Test: Unit Test - Behavior changed: No - Does this need documentation: No --- .../doris/catalog/CatalogRecycleBin.java | 9 +- .../doris/catalog/CatalogRecycleBinTest.java | 132 ++++++++++++++++++ 2 files changed, 137 insertions(+), 4 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java index 60af9c64591fa7..4f5b1e3d85f6ca 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java @@ -1394,13 +1394,14 @@ public void addTabletToInvertedIndex() { @Override protected void runAfterCatalogReady() { - long currentTimeMs = System.currentTimeMillis(); // should follow the partition/table/db order // in case of partition(table) is still in recycle bin but table(db) is missing + // Each erase method gets its own currentTimeMs to avoid using a stale timestamp, + // since previous erase operations may take significant time. int keepNum = Config.max_same_name_catalog_trash_num; - erasePartition(currentTimeMs, keepNum); - eraseTable(currentTimeMs, keepNum); - eraseDatabase(currentTimeMs, keepNum); + erasePartition(System.currentTimeMillis(), keepNum); + eraseTable(System.currentTimeMillis(), keepNum); + eraseDatabase(System.currentTimeMillis(), keepNum); } public List> getInfo() { diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/CatalogRecycleBinTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/CatalogRecycleBinTest.java index 99831c65578064..180d6d75542d83 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/CatalogRecycleBinTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/CatalogRecycleBinTest.java @@ -28,6 +28,7 @@ import org.apache.doris.common.util.URI; import org.apache.doris.nereids.trees.expressions.functions.FunctionBuilder; import org.apache.doris.thrift.TStorageMedium; +import org.apache.doris.thrift.TStorageType; import org.apache.doris.utframe.UtFrameUtils; import com.google.common.collect.Lists; @@ -1058,4 +1059,135 @@ public void testMicrobatchEraseReleasesLockBetweenItems() throws Exception { CatalogTestUtil.testTableId1, 9000)); } } + + @Test + public void testEraseUsesFreshCurrentTime() throws Exception { + CatalogRecycleBin recycleBin = Env.getCurrentRecycleBin(); + Config.max_same_name_catalog_trash_num = -1; + long origExpireSecond = Config.catalog_trash_expire_second; + boolean origIgnoreMinErase = Config.catalog_trash_ignore_min_erase_latency; + Config.catalog_trash_ignore_min_erase_latency = true; + try { + // Set a short expire time so items expire quickly + Config.catalog_trash_expire_second = 1; + + long dbId = 90001; + long tableId = 90002; + long basePartitionId = 100000; + long baseIndexId = 200000; + long baseTabletId = 300000; + int numPartitions = 1000; + + // Create a table with many partitions + List columns = new ArrayList<>(); + Column k1 = new Column("k1", PrimitiveType.INT); + k1.setIsKey(true); + columns.add(k1); + Column v = new Column("v", ScalarType.createType(PrimitiveType.DOUBLE), false, + AggregateType.SUM, "0", ""); + columns.add(v); + + List keysColumn = new ArrayList<>(); + Column kk1 = new Column("k1", PrimitiveType.INT); + kk1.setIsKey(true); + keysColumn.add(kk1); + + HashDistributionInfo distributionInfo = new HashDistributionInfo(10, keysColumn); + PartitionInfo partitionInfo = new SinglePartitionInfo(); + OlapTable table = new OlapTable(tableId, "test_many_parts_table", columns, + KeysType.AGG_KEYS, partitionInfo, distributionInfo); + + for (int i = 0; i < numPartitions; i++) { + long partitionId = basePartitionId + i; + long indexId = baseIndexId + i; + long tabletId = baseTabletId + i; + + Tablet tablet = new LocalTablet(tabletId); + MaterializedIndex index = new MaterializedIndex(indexId, IndexState.NORMAL); + TabletMeta tabletMeta = new TabletMeta(dbId, tableId, partitionId, indexId, 0, + TStorageMedium.HDD); + index.addTablet(tablet, tabletMeta); + + Replica replica1 = new LocalReplica(400001 + i, CatalogTestUtil.testBackendId1, + CatalogTestUtil.testStartVersion, 0, 0L, 0L, 0L, + Replica.ReplicaState.NORMAL, -1, 0); + Replica replica2 = new LocalReplica(500001 + i, CatalogTestUtil.testBackendId2, + CatalogTestUtil.testStartVersion, 0, 0L, 0L, 0L, + Replica.ReplicaState.NORMAL, -1, 0); + Replica replica3 = new LocalReplica(600001 + i, CatalogTestUtil.testBackendId3, + CatalogTestUtil.testStartVersion, 0, 0L, 0L, 0L, + Replica.ReplicaState.NORMAL, -1, 0); + tablet.addReplica(replica1); + tablet.addReplica(replica2); + tablet.addReplica(replica3); + + Partition partition = new Partition(partitionId, "p" + i, index, distributionInfo); + partition.updateVisibleVersion(CatalogTestUtil.testStartVersion); + partition.setNextVersion(CatalogTestUtil.testStartVersion + 1); + + partitionInfo.setDataProperty(partitionId, + new DataProperty(DataProperty.DEFAULT_STORAGE_MEDIUM)); + partitionInfo.setReplicaAllocation(partitionId, new ReplicaAllocation((short) 3)); + table.addPartition(partition); + } + table.setIndexMeta(baseIndexId, CatalogTestUtil.testIndex1, columns, 0, + CatalogTestUtil.testSchemaHash1, (short) 1, TStorageType.COLUMN, KeysType.AGG_KEYS); + table.setBaseIndexId(baseIndexId); + + // Force drop all partitions (recycleTime=0, immediately erasable) + for (Partition partition : table.getAllPartitions()) { + recycleBin.recyclePartition(dbId, tableId, table.getName(), partition, + null, null, new DataProperty(TStorageMedium.HDD), + new ReplicaAllocation((short) 3), false, false); + // Set recycleTime to 0 to simulate force drop + recycleBin.setRecycleTimeByIdForReplay(partition.getId(), 0L); + } + + // Recycle a table and a database with current time as recycleTime + // They will expire after catalog_trash_expire_second (1s) + Database db2 = CatalogTestUtil.createSimpleDb( + CatalogTestUtil.testDbId1, CatalogTestUtil.testTableId1, + CatalogTestUtil.testPartitionId1, CatalogTestUtil.testIndexId1, + CatalogTestUtil.testTabletId1, CatalogTestUtil.testStartVersion); + Optional tableOpt = db2.getTable(CatalogTestUtil.testTableId1); + Assert.assertTrue(tableOpt.isPresent()); + Assert.assertTrue(tableOpt.get() instanceof OlapTable); + OlapTable olapTable = (OlapTable) tableOpt.get(); + db2.unregisterTable(CatalogTestUtil.testTableId1); + recycleBin.recycleTable(CatalogTestUtil.testDbId1, olapTable, false, false, 0); + + Database emptyDb = new Database(CatalogTestUtil.testDbId1, CatalogTestUtil.testDb1); + recycleBin.recycleDatabase(emptyDb, Sets.newHashSet(), Sets.newHashSet(), false, false, 0); + + // Verify all items are in the recycle bin + Set partitionIds = Sets.newHashSet(); + recycleBin.getRecycleIds(partitionIds, Sets.newHashSet(), Sets.newHashSet()); + Assert.assertFalse(partitionIds.isEmpty()); + Assert.assertTrue(recycleBin.isRecycleTable( + CatalogTestUtil.testDbId1, CatalogTestUtil.testTableId1)); + Assert.assertTrue(recycleBin.isRecycleDatabase(CatalogTestUtil.testDbId1)); + + // Wait for table and database to expire (catalog_trash_expire_second = 1s) + Thread.sleep(1500); + + // Run the erase daemon. With 1000 force-dropped partitions, erasePartition + // takes significant time. If eraseTable/eraseDatabase used a stale shared + // startTime captured before erasePartition, the table and database would + // appear not-yet-expired (latency < expire_second at that earlier time). + // With the fix, each erase method gets its own fresh currentTimeMs, so + // the table and database are correctly identified as expired and cleaned up. + recycleBin.runAfterCatalogReady(); + + // All items should be cleaned up + Set remainingPartitionIds = Sets.newHashSet(); + recycleBin.getRecycleIds(remainingPartitionIds, Sets.newHashSet(), Sets.newHashSet()); + Assert.assertTrue(remainingPartitionIds.isEmpty()); + Assert.assertFalse(recycleBin.isRecycleTable( + CatalogTestUtil.testDbId1, CatalogTestUtil.testTableId1)); + Assert.assertFalse(recycleBin.isRecycleDatabase(CatalogTestUtil.testDbId1)); + } finally { + Config.catalog_trash_expire_second = origExpireSecond; + Config.catalog_trash_ignore_min_erase_latency = origIgnoreMinErase; + } + } }