@@ -191,4 +191,108 @@ TEST(AggregateMergeFunctionTest, TestSequenceFields) {
191191 KeyValueChecker::CheckResult (expected, result_kv, /* key_arity=*/ 1 , /* value_arity=*/ 4 );
192192}
193193
194+ TEST (AggregateMergeFunctionTest, TestRemoveRecordOnDelete) {
195+ arrow::FieldVector fields = {arrow::field (" k0" , arrow::int32 ()),
196+ arrow::field (" v0" , arrow::int32 ())};
197+ auto value_schema = arrow::schema (fields);
198+ ASSERT_OK_AND_ASSIGN (
199+ CoreOptions core_options,
200+ CoreOptions::FromMap ({{Options::FIELDS_DEFAULT_AGG_FUNC, " sum" },
201+ {Options::AGGREGATION_REMOVE_RECORD_ON_DELETE, " true" }}));
202+ ASSERT_OK_AND_ASSIGN (
203+ std::unique_ptr<AggregateMergeFunction> merge_func,
204+ AggregateMergeFunction::Create (value_schema, /* primary_keys=*/ {" k0" }, core_options));
205+
206+ auto pool = GetDefaultPool ();
207+
208+ // Case 1: INSERT + INSERT, then DELETE -> result should be RowKind::Delete
209+ {
210+ merge_func->Reset ();
211+ KeyValue kv1 (RowKind::Insert (), /* sequence_number=*/ 0 , /* level=*/ 0 ,
212+ BinaryRowGenerator::GenerateRowPtr ({10 }, pool.get ()),
213+ BinaryRowGenerator::GenerateRowPtr ({10 , 100 }, pool.get ()));
214+ KeyValue kv2 (RowKind::Insert (), /* sequence_number=*/ 1 , /* level=*/ 0 ,
215+ BinaryRowGenerator::GenerateRowPtr ({10 }, pool.get ()),
216+ BinaryRowGenerator::GenerateRowPtr ({10 , 200 }, pool.get ()));
217+ KeyValue kv3 (RowKind::Delete (), /* sequence_number=*/ 2 , /* level=*/ 0 ,
218+ BinaryRowGenerator::GenerateRowPtr ({10 }, pool.get ()),
219+ BinaryRowGenerator::GenerateRowPtr ({10 , 300 }, pool.get ()));
220+ ASSERT_OK (merge_func->Add (std::move (kv1)));
221+ ASSERT_OK (merge_func->Add (std::move (kv2)));
222+ ASSERT_OK (merge_func->Add (std::move (kv3)));
223+ auto result_kv = std::move (merge_func->GetResult ().value ().value ());
224+ // Should return DELETE row kind with the original values from the delete record
225+ KeyValue expected (RowKind::Delete (), /* sequence_number=*/ 2 ,
226+ /* level=*/ KeyValue::UNKNOWN_LEVEL,
227+ BinaryRowGenerator::GenerateRowPtr ({10 }, pool.get ()),
228+ BinaryRowGenerator::GenerateRowPtr ({10 , 300 }, pool.get ()));
229+ KeyValueChecker::CheckResult (expected, result_kv, /* key_arity=*/ 1 , /* value_arity=*/ 2 );
230+ }
231+
232+ // Case 2: Only INSERT rows, no DELETE -> result should be RowKind::Insert with aggregated values
233+ {
234+ merge_func->Reset ();
235+ KeyValue kv1 (RowKind::Insert (), /* sequence_number=*/ 0 , /* level=*/ 0 ,
236+ BinaryRowGenerator::GenerateRowPtr ({10 }, pool.get ()),
237+ BinaryRowGenerator::GenerateRowPtr ({10 , 100 }, pool.get ()));
238+ KeyValue kv2 (RowKind::Insert (), /* sequence_number=*/ 1 , /* level=*/ 0 ,
239+ BinaryRowGenerator::GenerateRowPtr ({10 }, pool.get ()),
240+ BinaryRowGenerator::GenerateRowPtr ({10 , 200 }, pool.get ()));
241+ ASSERT_OK (merge_func->Add (std::move (kv1)));
242+ ASSERT_OK (merge_func->Add (std::move (kv2)));
243+ auto result_kv = std::move (merge_func->GetResult ().value ().value ());
244+ // Should return INSERT with sum aggregation: 100 + 200 = 300
245+ KeyValue expected (RowKind::Insert (), /* sequence_number=*/ 1 ,
246+ /* level=*/ KeyValue::UNKNOWN_LEVEL,
247+ BinaryRowGenerator::GenerateRowPtr ({10 }, pool.get ()),
248+ BinaryRowGenerator::GenerateRowPtr ({10 , 300 }, pool.get ()));
249+ KeyValueChecker::CheckResult (expected, result_kv, /* key_arity=*/ 1 , /* value_arity=*/ 2 );
250+ }
251+
252+ // Case 3: DELETE only -> result should be RowKind::Delete
253+ {
254+ merge_func->Reset ();
255+ KeyValue kv1 (RowKind::Delete (), /* sequence_number=*/ 0 , /* level=*/ 0 ,
256+ BinaryRowGenerator::GenerateRowPtr ({10 }, pool.get ()),
257+ BinaryRowGenerator::GenerateRowPtr ({10 , 100 }, pool.get ()));
258+ ASSERT_OK (merge_func->Add (std::move (kv1)));
259+ auto result_kv = std::move (merge_func->GetResult ().value ().value ());
260+ KeyValue expected (RowKind::Delete (), /* sequence_number=*/ 0 ,
261+ /* level=*/ KeyValue::UNKNOWN_LEVEL,
262+ BinaryRowGenerator::GenerateRowPtr ({10 }, pool.get ()),
263+ BinaryRowGenerator::GenerateRowPtr ({10 , 100 }, pool.get ()));
264+ KeyValueChecker::CheckResult (expected, result_kv, /* key_arity=*/ 1 , /* value_arity=*/ 2 );
265+ }
266+ }
267+
268+ TEST (AggregateMergeFunctionTest, TestDeleteWithoutRemoveRecordOnDelete) {
269+ // Without removeRecordOnDelete, DELETE row should be treated as retract (subtract)
270+ arrow::FieldVector fields = {arrow::field (" k0" , arrow::int32 ()),
271+ arrow::field (" v0" , arrow::int32 ())};
272+ auto value_schema = arrow::schema (fields);
273+ ASSERT_OK_AND_ASSIGN (CoreOptions core_options,
274+ CoreOptions::FromMap ({{Options::FIELDS_DEFAULT_AGG_FUNC, " sum" }}));
275+ ASSERT_OK_AND_ASSIGN (
276+ std::unique_ptr<AggregateMergeFunction> merge_func,
277+ AggregateMergeFunction::Create (value_schema, /* primary_keys=*/ {" k0" }, core_options));
278+
279+ auto pool = GetDefaultPool ();
280+ merge_func->Reset ();
281+ KeyValue kv1 (RowKind::Insert (), /* sequence_number=*/ 0 , /* level=*/ 0 ,
282+ BinaryRowGenerator::GenerateRowPtr ({10 }, pool.get ()),
283+ BinaryRowGenerator::GenerateRowPtr ({10 , 200 }, pool.get ()));
284+ KeyValue kv2 (RowKind::Delete (), /* sequence_number=*/ 1 , /* level=*/ 0 ,
285+ BinaryRowGenerator::GenerateRowPtr ({10 }, pool.get ()),
286+ BinaryRowGenerator::GenerateRowPtr ({10 , 300 }, pool.get ()));
287+ ASSERT_OK (merge_func->Add (std::move (kv1)));
288+ ASSERT_OK (merge_func->Add (std::move (kv2)));
289+ auto result_kv = std::move (merge_func->GetResult ().value ().value ());
290+ // Without removeRecordOnDelete, DELETE is retract: 200 - 300 = -100, result is INSERT
291+ KeyValue expected (RowKind::Insert (), /* sequence_number=*/ 1 ,
292+ /* level=*/ KeyValue::UNKNOWN_LEVEL,
293+ BinaryRowGenerator::GenerateRowPtr ({10 }, pool.get ()),
294+ BinaryRowGenerator::GenerateRowPtr ({10 , -100 }, pool.get ()));
295+ KeyValueChecker::CheckResult (expected, result_kv, /* key_arity=*/ 1 , /* value_arity=*/ 2 );
296+ }
297+
194298} // namespace paimon::test
0 commit comments