diff --git a/be/src/exprs/aggregate/aggregate_function_group_array_set_op_impl.h b/be/src/exprs/aggregate/aggregate_function_group_array_set_op_impl.h index b6fc8082538380..cc992137f6f253 100644 --- a/be/src/exprs/aggregate/aggregate_function_group_array_set_op_impl.h +++ b/be/src/exprs/aggregate/aggregate_function_group_array_set_op_impl.h @@ -19,6 +19,7 @@ #include +#include "core/call_on_type_index.h" #include "core/data_type/define_primitive_type.h" #include "core/data_type/primitive_type.h" #include "exprs/aggregate/aggregate_function.h" @@ -35,81 +36,26 @@ inline AggregateFunctionPtr create_aggregate_function_group_array_impl( const AggregateFunctionAttr& attr) { const auto& nested_type = remove_nullable( assert_cast(*(argument_types[0])).get_nested_type()); + auto pt = nested_type->get_primitive_type(); - switch (nested_type->get_primitive_type()) { - case doris::PrimitiveType::TYPE_BOOLEAN: - return creator_without_type::create< - AggregateFunctionGroupArraySetOp>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_TINYINT: - return creator_without_type::create< - AggregateFunctionGroupArraySetOp>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_SMALLINT: - return creator_without_type::create< - AggregateFunctionGroupArraySetOp>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_INT: - return creator_without_type::create< - AggregateFunctionGroupArraySetOp>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_BIGINT: - return creator_without_type::create< - AggregateFunctionGroupArraySetOp>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_LARGEINT: - return creator_without_type::create< - AggregateFunctionGroupArraySetOp>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_DATEV2: - return creator_without_type::create< - AggregateFunctionGroupArraySetOp>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_DATETIMEV2: - return creator_without_type::create< - AggregateFunctionGroupArraySetOp>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_DOUBLE: - return creator_without_type::create< - AggregateFunctionGroupArraySetOp>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_FLOAT: - return creator_without_type::create< - AggregateFunctionGroupArraySetOp>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_DECIMAL32: - return creator_without_type::create< - AggregateFunctionGroupArraySetOp>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_DECIMAL64: - return creator_without_type::create< - AggregateFunctionGroupArraySetOp>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_DECIMAL128I: - return creator_without_type::create< - AggregateFunctionGroupArraySetOp>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_DECIMAL256: - return creator_without_type::create< - AggregateFunctionGroupArraySetOp>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_IPV4: - return creator_without_type::create< - AggregateFunctionGroupArraySetOp>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_IPV6: - return creator_without_type::create< - AggregateFunctionGroupArraySetOp>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_STRING: - case PrimitiveType::TYPE_VARCHAR: - case PrimitiveType::TYPE_CHAR: + AggregateFunctionPtr result; + auto call = [&](const auto& dispatch_type) -> bool { + using DispatchType = std::decay_t; + constexpr auto PT = DispatchType::PType; + result = + creator_without_type::create>>( + argument_types, result_is_nullable, attr); + return true; + }; + if (dispatch_switch_scalar(pt, call)) { + return result; + } + if (is_string_type(pt)) { return creator_without_type::create>( argument_types, result_is_nullable, attr); - default: - LOG(WARNING) << " got invalid of nested type: " << nested_type->get_name(); - return nullptr; } + LOG(WARNING) << " got invalid of nested type: " << nested_type->get_name(); + return nullptr; } } // namespace doris diff --git a/be/src/exprs/aggregate/aggregate_function_histogram.cpp b/be/src/exprs/aggregate/aggregate_function_histogram.cpp index 82a55e7d6b00e0..696d711fc6503d 100644 --- a/be/src/exprs/aggregate/aggregate_function_histogram.cpp +++ b/be/src/exprs/aggregate/aggregate_function_histogram.cpp @@ -44,7 +44,7 @@ AggregateFunctionPtr create_aggregate_function_histogram(const std::string& name creator_with_type_list; + TYPE_DATEV2, TYPE_DATETIMEV2, TYPE_TIMESTAMPTZ>; if (argument_types.size() == 2) { return creator::create( argument_types, result_is_nullable, attr); diff --git a/be/src/exprs/aggregate/aggregate_function_map_v2.cpp b/be/src/exprs/aggregate/aggregate_function_map_v2.cpp index d8f81909956879..309c079a198472 100644 --- a/be/src/exprs/aggregate/aggregate_function_map_v2.cpp +++ b/be/src/exprs/aggregate/aggregate_function_map_v2.cpp @@ -55,6 +55,7 @@ AggregateFunctionPtr create_aggregate_function_map_agg_v2(const std::string& nam case PrimitiveType::TYPE_DATEV2: case PrimitiveType::TYPE_DATETIMEV2: case PrimitiveType::TYPE_TIMEV2: + case PrimitiveType::TYPE_TIMESTAMPTZ: return create_agg_function_map_agg_v2(argument_types, result_is_nullable, attr); default: LOG(WARNING) << fmt::format("unsupported input type {} for aggregate function {}", diff --git a/be/src/exprs/aggregate/aggregate_function_min_max_by.cpp b/be/src/exprs/aggregate/aggregate_function_min_max_by.cpp index fda6effb1b74d1..fd2a6ec611919d 100644 --- a/be/src/exprs/aggregate/aggregate_function_min_max_by.cpp +++ b/be/src/exprs/aggregate/aggregate_function_min_max_by.cpp @@ -17,52 +17,39 @@ #include "exprs/aggregate/aggregate_function_min_max_by.h" +#include "core/call_on_type_index.h" +#include "core/data_type/primitive_type.h" #include "exprs/aggregate/aggregate_function_simple_factory.h" namespace doris { #include "common/compile_check_begin.h" -std::unique_ptr create_max_min_value(const DataTypePtr& type) { +std::unique_ptr create_max_min_value(const DataTypePtr& type, int be_version) { + std::unique_ptr result; + auto call = [&](const auto& dispatch_type) -> bool { + using DispatchType = std::decay_t; + constexpr auto PT = DispatchType::PType; + if constexpr (is_decimal(PT)) { + result = std::make_unique>>(); + } else { + result = std::make_unique>>(); + } + return true; + }; + if (dispatch_switch_scalar(type->get_primitive_type(), call)) { + return result; + } switch (type->get_primitive_type()) { - case PrimitiveType::TYPE_BOOLEAN: - return std::make_unique>>(); - case PrimitiveType::TYPE_TINYINT: - return std::make_unique>>(); - case PrimitiveType::TYPE_SMALLINT: - return std::make_unique>>(); - case PrimitiveType::TYPE_INT: - return std::make_unique>>(); - case PrimitiveType::TYPE_BIGINT: - return std::make_unique>>(); - case PrimitiveType::TYPE_LARGEINT: - return std::make_unique>>(); - case PrimitiveType::TYPE_FLOAT: - return std::make_unique>>(); - case PrimitiveType::TYPE_DOUBLE: - return std::make_unique>>(); - case PrimitiveType::TYPE_DECIMAL32: - return std::make_unique>>(); - case PrimitiveType::TYPE_DECIMAL64: - return std::make_unique>>(); - case PrimitiveType::TYPE_DECIMAL128I: - return std::make_unique>>(); - case PrimitiveType::TYPE_DECIMALV2: - return std::make_unique>>(); - case PrimitiveType::TYPE_DECIMAL256: - return std::make_unique>>(); case PrimitiveType::TYPE_STRING: case PrimitiveType::TYPE_CHAR: case PrimitiveType::TYPE_VARCHAR: return std::make_unique>(); - case PrimitiveType::TYPE_DATE: - return std::make_unique>>(); - case PrimitiveType::TYPE_DATETIME: - return std::make_unique>>(); - case PrimitiveType::TYPE_DATEV2: - return std::make_unique>>(); - case PrimitiveType::TYPE_DATETIMEV2: - return std::make_unique>>(); case PrimitiveType::TYPE_BITMAP: return std::make_unique>(); + case PrimitiveType::TYPE_ARRAY: + case PrimitiveType::TYPE_MAP: + case PrimitiveType::TYPE_STRUCT: + return std::make_unique>(DataTypes {type}, + be_version); default: throw doris::Exception(ErrorCode::INTERNAL_ERROR, "Illegal type {} of argument of aggregate function min/max_by", @@ -71,6 +58,15 @@ std::unique_ptr create_max_min_value(const DataTypePtr& type) { } } +void register_aggregate_function_max_min_by(AggregateFunctionSimpleFactory& factory) { + factory.register_function_both( + "min_by", create_aggregate_function_min_max_by); + factory.register_function_both( + "max_by", create_aggregate_function_min_max_by); +} + } // namespace doris #include "common/compile_check_end.h" diff --git a/be/src/exprs/aggregate/aggregate_function_min_max_by.h b/be/src/exprs/aggregate/aggregate_function_min_max_by.h index a67e3d62511c18..30fd4b27aece01 100644 --- a/be/src/exprs/aggregate/aggregate_function_min_max_by.h +++ b/be/src/exprs/aggregate/aggregate_function_min_max_by.h @@ -17,12 +17,13 @@ #pragma once -#include "common/logging.h" #include "core/assert_cast.h" +#include "core/call_on_type_index.h" #include "core/column/column_complex.h" #include "core/column/column_decimal.h" #include "core/column/column_vector.h" #include "core/data_type/data_type_bitmap.h" +#include "core/data_type/primitive_type.h" #include "core/value/bitmap_value.h" #include "exprs/aggregate/aggregate_function.h" #include "exprs/aggregate/aggregate_function_min_max.h" @@ -47,6 +48,9 @@ struct MaxMinValue : public MaxMinValueBase { MaxMinValue() = default; + MaxMinValue(const DataTypes& argument_types, int be_version) + : value(argument_types, be_version) {} + ~MaxMinValue() override = default; void write(BufferWritable& buf) const override { value.write(buf); } @@ -67,7 +71,7 @@ struct MaxMinValue : public MaxMinValueBase { } }; -std::unique_ptr create_max_min_value(const DataTypePtr& type); +std::unique_ptr create_max_min_value(const DataTypePtr& type, int be_version); /// For bitmap value struct BitmapValueData { @@ -120,6 +124,25 @@ struct BitmapValueData { } }; +/** + * The template parameter KT is introduced here primarily for performance reasons. + * + * Without using a template parameter, the key type would have to be + * std::unique_ptr. Since MaxMinValueBase is a polymorphic base + * class with virtual methods, comparing keys would inevitably involve virtual + * function calls, which can introduce significant runtime overhead. + * + * By making KT a template parameter, the concrete key type is known at compile + * time, allowing static dispatch and avoiding virtual function calls. This + * substantially reduces the cost of key comparisons. + * + * In contrast, the value type VT is intentionally not made a template parameter. + * On one hand, templating both key and value types would lead to an n x n + * explosion in template instantiations, increasing compile time and code size. + * On the other hand, value objects typically only invoke the change method; for + * random data, this method is called approximately log(x) times (where x is the + * data size), making the overhead acceptable. + */ template struct AggregateFunctionMinMaxByBaseData { protected: @@ -127,8 +150,18 @@ struct AggregateFunctionMinMaxByBaseData { KT key; public: - AggregateFunctionMinMaxByBaseData(const DataTypes argument_types) { - value = create_max_min_value(argument_types[0]); + AggregateFunctionMinMaxByBaseData() = default; + + AggregateFunctionMinMaxByBaseData(const DataTypes argument_types, int be_version) + requires(std::is_same_v) + : key(SingleValueDataComplexType(DataTypes {argument_types[1]}, be_version)) { + value = create_max_min_value(argument_types[0], be_version); + } + + AggregateFunctionMinMaxByBaseData(const DataTypes argument_types, int be_version) + requires(!std::is_same_v) + { + value = create_max_min_value(argument_types[0], be_version); } void insert_result_into(IColumn& to) const { value->insert_result_into(to); } @@ -152,8 +185,10 @@ template struct AggregateFunctionMaxByData : public AggregateFunctionMinMaxByBaseData { using Self = AggregateFunctionMaxByData; - AggregateFunctionMaxByData(const DataTypes argument_types) - : AggregateFunctionMinMaxByBaseData(argument_types) {} + AggregateFunctionMaxByData() = default; + + AggregateFunctionMaxByData(const DataTypes argument_types, int be_version) + : AggregateFunctionMinMaxByBaseData(argument_types, be_version) {} void change_if_better(const IColumn& value_column, const IColumn& key_column, size_t row_num, Arena& arena) { @@ -188,8 +223,11 @@ template struct AggregateFunctionMinByData : public AggregateFunctionMinMaxByBaseData { using Self = AggregateFunctionMinByData; - AggregateFunctionMinByData(const DataTypes argument_types) - : AggregateFunctionMinMaxByBaseData(argument_types) {} + AggregateFunctionMinByData() = default; + + AggregateFunctionMinByData(const DataTypes argument_types, int be_version) + : AggregateFunctionMinMaxByBaseData(argument_types, be_version) {} + void change_if_better(const IColumn& value_column, const IColumn& key_column, size_t row_num, Arena& arena) { if (this->key.change_if_less(key_column, row_num, arena)) { @@ -221,7 +259,7 @@ struct AggregateFunctionMinByData : public AggregateFunctionMinMaxByBaseData template class AggregateFunctionsMinMaxBy final - : public IAggregateFunctionDataHelper, true>, + : public IAggregateFunctionDataHelper>, MultiExpression, NullableAggregateFunction { private: @@ -230,11 +268,15 @@ class AggregateFunctionsMinMaxBy final public: AggregateFunctionsMinMaxBy(const DataTypes& arguments) - : IAggregateFunctionDataHelper, true>( + : IAggregateFunctionDataHelper>( {arguments[0], arguments[1]}), value_type(this->argument_types[0]), key_type(this->argument_types[1]) {} + void create(AggregateDataPtr __restrict place) const override { + new (place) Data(IAggregateFunction::argument_types, IAggregateFunction::version); + } + String get_name() const override { return Data::name(); } DataTypePtr get_return_type() const override { return value_type; } @@ -280,80 +322,37 @@ AggregateFunctionPtr create_aggregate_function_min_max_by(const String& name, return nullptr; } + AggregateFunctionPtr result; + auto call = [&](const auto& dispatch_type) -> bool { + using DispatchType = std::decay_t; + constexpr auto PT = DispatchType::PType; + if constexpr (is_decimal(PT)) { + result = creator_without_type::create_multi_arguments< + AggregateFunctionTemplate>>>( + argument_types, result_is_nullable, attr); + } else { + result = creator_without_type::create_multi_arguments< + AggregateFunctionTemplate>>>( + argument_types, result_is_nullable, attr); + } + return true; + }; + if (dispatch_switch_scalar(argument_types[1]->get_primitive_type(), call)) { + return result; + } + switch (argument_types[1]->get_primitive_type()) { - case PrimitiveType::TYPE_BOOLEAN: - return creator_without_type::create_multi_arguments< - AggregateFunctionTemplate>>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_TINYINT: - return creator_without_type::create_multi_arguments< - AggregateFunctionTemplate>>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_SMALLINT: - return creator_without_type::create_multi_arguments< - AggregateFunctionTemplate>>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_INT: - return creator_without_type::create_multi_arguments< - AggregateFunctionTemplate>>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_BIGINT: - return creator_without_type::create_multi_arguments< - AggregateFunctionTemplate>>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_LARGEINT: - return creator_without_type::create_multi_arguments< - AggregateFunctionTemplate>>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_FLOAT: - return creator_without_type::create_multi_arguments< - AggregateFunctionTemplate>>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_DOUBLE: - return creator_without_type::create_multi_arguments< - AggregateFunctionTemplate>>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_DECIMAL32: - return creator_without_type::create_multi_arguments< - AggregateFunctionTemplate>>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_DECIMAL64: - return creator_without_type::create_multi_arguments< - AggregateFunctionTemplate>>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_DECIMAL128I: - return creator_without_type::create_multi_arguments< - AggregateFunctionTemplate>>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_DECIMALV2: - return creator_without_type::create_multi_arguments< - AggregateFunctionTemplate>>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_DECIMAL256: - return creator_without_type::create_multi_arguments< - AggregateFunctionTemplate>>>( - argument_types, result_is_nullable, attr); case PrimitiveType::TYPE_CHAR: case PrimitiveType::TYPE_VARCHAR: case PrimitiveType::TYPE_STRING: return creator_without_type::create_multi_arguments< AggregateFunctionTemplate>>(argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_DATE: - return creator_without_type::create_multi_arguments< - AggregateFunctionTemplate>>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_DATETIME: - return creator_without_type::create_multi_arguments< - AggregateFunctionTemplate>>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_DATEV2: - return creator_without_type::create_multi_arguments< - AggregateFunctionTemplate>>>( - argument_types, result_is_nullable, attr); - case PrimitiveType::TYPE_DATETIMEV2: + case PrimitiveType::TYPE_ARRAY: + case PrimitiveType::TYPE_MAP: + case PrimitiveType::TYPE_STRUCT: return creator_without_type::create_multi_arguments< - AggregateFunctionTemplate>>>( + AggregateFunctionTemplate>>( argument_types, result_is_nullable, attr); default: return nullptr; diff --git a/be/src/exprs/function/array/function_array_index.h b/be/src/exprs/function/array/function_array_index.h index 488f1c27226577..67c08d92283a19 100644 --- a/be/src/exprs/function/array/function_array_index.h +++ b/be/src/exprs/function/array/function_array_index.h @@ -417,105 +417,19 @@ class FunctionArrayIndex : public IFunction { is_string_type(left_element_type->get_primitive_type())) { return_column = _execute_string(offsets, nested_null_map, *nested_column, *right_column, right_nested_null_map, array_null_map); - } else if (is_number(right_type->get_primitive_type()) && - is_number(left_element_type->get_primitive_type())) { - switch (left_element_type->get_primitive_type()) { - case TYPE_BOOLEAN: - return_column = _execute_number_expanded( + } else if (right_type->get_primitive_type() == left_element_type->get_primitive_type()) { + auto call = [&](const auto& type) -> bool { + using DispatchType = std::decay_t; + auto col = _execute_number_expanded( offsets, nested_null_map, *nested_column, *right_column, right_nested_null_map, array_null_map); - break; - case TYPE_TINYINT: - return_column = _execute_number_expanded( - offsets, nested_null_map, *nested_column, *right_column, - right_nested_null_map, array_null_map); - break; - case TYPE_SMALLINT: - return_column = _execute_number_expanded( - offsets, nested_null_map, *nested_column, *right_column, - right_nested_null_map, array_null_map); - break; - case TYPE_INT: - return_column = _execute_number_expanded( - offsets, nested_null_map, *nested_column, *right_column, - right_nested_null_map, array_null_map); - break; - case TYPE_BIGINT: - return_column = _execute_number_expanded( - offsets, nested_null_map, *nested_column, *right_column, - right_nested_null_map, array_null_map); - break; - case TYPE_LARGEINT: - return_column = _execute_number_expanded( - offsets, nested_null_map, *nested_column, *right_column, - right_nested_null_map, array_null_map); - break; - case TYPE_FLOAT: - return_column = _execute_number_expanded( - offsets, nested_null_map, *nested_column, *right_column, - right_nested_null_map, array_null_map); - break; - case TYPE_DOUBLE: - return_column = _execute_number_expanded( - offsets, nested_null_map, *nested_column, *right_column, - right_nested_null_map, array_null_map); - break; - case TYPE_DECIMAL32: - return_column = _execute_number_expanded( - offsets, nested_null_map, *nested_column, *right_column, - right_nested_null_map, array_null_map); - break; - case TYPE_DECIMAL64: - return_column = _execute_number_expanded( - offsets, nested_null_map, *nested_column, *right_column, - right_nested_null_map, array_null_map); - break; - case TYPE_DECIMAL128I: - return_column = _execute_number_expanded( - offsets, nested_null_map, *nested_column, *right_column, - right_nested_null_map, array_null_map); - break; - case TYPE_DECIMALV2: - return_column = _execute_number_expanded( - offsets, nested_null_map, *nested_column, *right_column, - right_nested_null_map, array_null_map); - break; - case TYPE_DECIMAL256: - return_column = _execute_number_expanded( - offsets, nested_null_map, *nested_column, *right_column, - right_nested_null_map, array_null_map); - break; - default: - break; - } - } else if ((is_date_v2_or_datetime_v2(right_type->get_primitive_type()) || - right_type->get_primitive_type() == TYPE_TIMEV2) && - (is_date_v2_or_datetime_v2(left_element_type->get_primitive_type()) || - left_element_type->get_primitive_type() == TYPE_TIMEV2)) { - if (left_element_type->get_primitive_type() == TYPE_DATEV2) { - return_column = _execute_number_expanded( - offsets, nested_null_map, *nested_column, *right_column, - right_nested_null_map, array_null_map); - } else if (left_element_type->get_primitive_type() == TYPE_DATETIMEV2) { - return_column = _execute_number_expanded( - offsets, nested_null_map, *nested_column, *right_column, - right_nested_null_map, array_null_map); - } else if (left_element_type->get_primitive_type() == TYPE_TIMEV2) { - return_column = _execute_number_expanded( - offsets, nested_null_map, *nested_column, *right_column, - right_nested_null_map, array_null_map); - } - } else if (is_ip(right_type->get_primitive_type()) && - is_ip(left_element_type->get_primitive_type())) { - if (left_element_type->get_primitive_type() == TYPE_IPV4) { - return_column = _execute_number_expanded( - offsets, nested_null_map, *nested_column, *right_column, - right_nested_null_map, array_null_map); - } else if (left_element_type->get_primitive_type() == TYPE_IPV6) { - return_column = _execute_number_expanded( - offsets, nested_null_map, *nested_column, *right_column, - right_nested_null_map, array_null_map); - } + if (col) { + return_column = std::move(col); + return true; + } + return false; + }; + dispatch_switch_scalar(right_type->get_primitive_type(), call); } if (return_column) { diff --git a/regression-test/data/datatype_p0/timestamptz/test_timestamptz_agg_functions.out b/regression-test/data/datatype_p0/timestamptz/test_timestamptz_agg_functions.out new file mode 100644 index 00000000000000..850cbe14a980d5 --- /dev/null +++ b/regression-test/data/datatype_p0/timestamptz/test_timestamptz_agg_functions.out @@ -0,0 +1,13 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !map_agg_v2 -- +3 + +-- !histogram_not_null -- +true + +-- !group_array_intersect -- +1 + +-- !group_array_union -- +3 + diff --git a/regression-test/data/datatype_p0/timestamptz/test_timestamptz_array_index.out b/regression-test/data/datatype_p0/timestamptz/test_timestamptz_array_index.out new file mode 100644 index 00000000000000..b6edf44ae3a417 --- /dev/null +++ b/regression-test/data/datatype_p0/timestamptz/test_timestamptz_array_index.out @@ -0,0 +1,19 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !array_position -- +1 1 +2 0 +3 0 + +-- !array_contains -- +1 true +2 false +3 false + +-- !countequal -- +1 2 +2 0 +3 0 + +-- !array_position_literal -- +1 1 + diff --git a/regression-test/data/datatype_p0/timestamptz/test_timestamptz_max_min_by.out b/regression-test/data/datatype_p0/timestamptz/test_timestamptz_max_min_by.out new file mode 100644 index 00000000000000..d1d853d0ab25b9 --- /dev/null +++ b/regression-test/data/datatype_p0/timestamptz/test_timestamptz_max_min_by.out @@ -0,0 +1,7 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !max_by_value -- +2024-01-01 00:00:00.000000+00:00 2024-01-01 00:00:00.000000+00:00 + +-- !max_by_key -- +beta alpha + diff --git a/regression-test/suites/datatype_p0/timestamptz/test_timestamptz_agg_functions.groovy b/regression-test/suites/datatype_p0/timestamptz/test_timestamptz_agg_functions.groovy new file mode 100644 index 00000000000000..89126b5a284772 --- /dev/null +++ b/regression-test/suites/datatype_p0/timestamptz/test_timestamptz_agg_functions.groovy @@ -0,0 +1,59 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_timestamptz_agg_functions", "datatype_p0") { + sql "SET enable_nereids_planner = true" + sql "SET enable_fallback_to_original_planner = false" + sql "SET time_zone = '+00:00'" + + sql "DROP TABLE IF EXISTS test_tz_agg" + sql """ + CREATE TABLE test_tz_agg ( + id INT, + ts TIMESTAMPTZ(6), + arr ARRAY, + w INT + ) + DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES('replication_num' = '1') + """ + + sql """ + INSERT INTO test_tz_agg VALUES + (1, CAST('2024-01-01 00:00:00 +00:00' AS TIMESTAMPTZ(6)), + ARRAY(CAST('2024-01-01 00:00:00 +00:00' AS TIMESTAMPTZ(6)), CAST('2024-01-02 00:00:00 +00:00' AS TIMESTAMPTZ(6))), 1), + (2, CAST('2024-01-02 00:00:00 +00:00' AS TIMESTAMPTZ(6)), + ARRAY(CAST('2024-01-02 00:00:00 +00:00' AS TIMESTAMPTZ(6)), CAST('2024-01-03 00:00:00 +00:00' AS TIMESTAMPTZ(6))), 2), + (3, CAST('2024-01-03 00:00:00 +00:00' AS TIMESTAMPTZ(6)), + ARRAY(CAST('2024-01-01 00:00:00 +00:00' AS TIMESTAMPTZ(6)), CAST('2024-01-02 00:00:00 +00:00' AS TIMESTAMPTZ(6))), 3) + """ + + // map_agg_v2 with TIMESTAMPTZ as key + qt_map_agg_v2 "SELECT size(map_agg_v2(ts, id)) FROM test_tz_agg" + + // histogram on TIMESTAMPTZ — just check it returns a non-null result + qt_histogram_not_null "SELECT histogram(ts) IS NOT NULL FROM test_tz_agg" + + // group_array_intersect on Array + qt_group_array_intersect "SELECT size(group_array_intersect(arr)) FROM test_tz_agg" + + // group_array_union on Array + qt_group_array_union "SELECT size(group_array_union(arr)) FROM test_tz_agg" + + sql "DROP TABLE IF EXISTS test_tz_agg" +} diff --git a/regression-test/suites/datatype_p0/timestamptz/test_timestamptz_array_index.groovy b/regression-test/suites/datatype_p0/timestamptz/test_timestamptz_array_index.groovy new file mode 100644 index 00000000000000..efa8c0284583fa --- /dev/null +++ b/regression-test/suites/datatype_p0/timestamptz/test_timestamptz_array_index.groovy @@ -0,0 +1,83 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_timestamptz_array_index", "datatype_p0") { + sql "SET enable_nereids_planner = true" + sql "SET enable_fallback_to_original_planner = false" + sql "SET time_zone = '+00:00'" + + sql "DROP TABLE IF EXISTS test_tz_array_index" + sql """ + CREATE TABLE test_tz_array_index ( + id INT, + arr ARRAY, + probe TIMESTAMPTZ(6) + ) + DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES('replication_num' = '1') + """ + + sql """ + INSERT INTO test_tz_array_index VALUES ( + 1, + ARRAY( + CAST('2024-03-31 01:15:00 +00:00' AS TIMESTAMPTZ(6)), + CAST('2024-03-31 09:15:00 +08:00' AS TIMESTAMPTZ(6)), + CAST('2024-03-31 02:15:00 +00:00' AS TIMESTAMPTZ(6)) + ), + CAST('2024-03-31 01:15:00 +00:00' AS TIMESTAMPTZ(6)) + ) + """ + sql """ + INSERT INTO test_tz_array_index VALUES ( + 2, + ARRAY( + CAST('2024-06-01 10:00:00 +00:00' AS TIMESTAMPTZ(6)), + CAST('2024-06-01 20:00:00 +00:00' AS TIMESTAMPTZ(6)) + ), + CAST('2024-06-01 12:00:00 +00:00' AS TIMESTAMPTZ(6)) + ) + """ + sql """ + INSERT INTO test_tz_array_index VALUES ( + 3, + ARRAY( + CAST('2024-01-01 00:00:00 +00:00' AS TIMESTAMPTZ(6)) + ), + NULL + ) + """ + + // array_position: first occurrence (1-based), or 0 if not found + qt_array_position "SELECT id, array_position(arr, probe) AS pos FROM test_tz_array_index ORDER BY id" + + // array_contains: true/false + qt_array_contains "SELECT id, array_contains(arr, probe) AS contained FROM test_tz_array_index ORDER BY id" + + // countequal: count of matching elements + qt_countequal "SELECT id, countequal(arr, probe) AS cnt FROM test_tz_array_index ORDER BY id" + + // array_position with literal probe value (same timezone, equivalent UTC) + qt_array_position_literal """ + SELECT id, array_position(arr, CAST('2024-03-31 09:15:00 +08:00' AS TIMESTAMPTZ(6))) AS pos + FROM test_tz_array_index + WHERE id = 1 + """ + + sql "DROP TABLE IF EXISTS test_tz_array_index" +} diff --git a/regression-test/suites/datatype_p0/timestamptz/test_timestamptz_max_min_by.groovy b/regression-test/suites/datatype_p0/timestamptz/test_timestamptz_max_min_by.groovy new file mode 100644 index 00000000000000..5de10671c0eb75 --- /dev/null +++ b/regression-test/suites/datatype_p0/timestamptz/test_timestamptz_max_min_by.groovy @@ -0,0 +1,67 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +suite("test_timestamptz_max_min_by") { + + sql "set time_zone = '+00:00';" + + // Bug 7: TIMESTAMPTZ as return value (first argument of max_by / min_by) + sql "DROP TABLE IF EXISTS t_max_by_value;" + sql """ + CREATE TABLE t_max_by_value ( + score INT, + ts_value TIMESTAMPTZ(6) + ) + DUPLICATE KEY(score) + DISTRIBUTED BY HASH(score) BUCKETS 1 + PROPERTIES('replication_num' = '1'); + """ + sql """ + INSERT INTO t_max_by_value VALUES + (1, CAST('2024-01-01 00:00:00 +00:00' AS TIMESTAMPTZ(6))), + (2, CAST('2024-01-01 08:00:00 +08:00' AS TIMESTAMPTZ(6))); + """ + // CAST to VARCHAR for deterministic string output across sessions + order_qt_max_by_value """ + SELECT CAST(max_by(ts_value, score) AS VARCHAR(64)), + CAST(min_by(ts_value, score) AS VARCHAR(64)) + FROM t_max_by_value; + """ + + // Bug 8: TIMESTAMPTZ as order key (second argument of max_by / min_by) + sql "DROP TABLE IF EXISTS t_max_by_key;" + sql """ + CREATE TABLE t_max_by_key ( + payload VARCHAR(64), + ts_key TIMESTAMPTZ(6) + ) + DUPLICATE KEY(payload) + DISTRIBUTED BY HASH(payload) BUCKETS 1 + PROPERTIES('replication_num' = '1'); + """ + sql """ + INSERT INTO t_max_by_key VALUES + ('alpha', CAST('2024-01-01 00:00:00 +00:00' AS TIMESTAMPTZ(6))), + ('beta', CAST('2024-01-01 02:00:00 +00:00' AS TIMESTAMPTZ(6))); + """ + order_qt_max_by_key """ + SELECT max_by(payload, ts_key), + min_by(payload, ts_key) + FROM t_max_by_key; + """ +}