diff --git a/be/src/exprs/function/function_hash.cpp b/be/src/exprs/function/function_hash.cpp index 19145a80ba35c1..f07819c1f869f4 100644 --- a/be/src/exprs/function/function_hash.cpp +++ b/be/src/exprs/function/function_hash.cpp @@ -46,6 +46,8 @@ struct MurmurHash3Impl { static constexpr auto get_name() { if constexpr (ReturnType == TYPE_INT) { return "murmur_hash3_32"; + } else if constexpr (ReturnType == TYPE_LARGEINT) { + return "murmur_hash3_u64_v2"; } else if constexpr (is_mmh64_v2) { return "murmur_hash3_64_v2"; } else { @@ -98,7 +100,7 @@ struct MurmurHash3Impl { } else { col_to_data[i] = HashUtil::murmur_hash3_64( reinterpret_cast(&data[current_offset]), - offsets[i] - current_offset, col_to_data[i]); + offsets[i] - current_offset, static_cast(col_to_data[i])); } current_offset = offsets[i]; } @@ -111,7 +113,7 @@ struct MurmurHash3Impl { HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]); } else { col_to_data[i] = HashUtil::murmur_hash3_64( - value.data(), value.size(), col_to_data[i]); + value.data(), value.size(), static_cast(col_to_data[i])); } } } else { @@ -129,6 +131,8 @@ using FunctionMurmurHash3_64 = FunctionVariadicArgumentsBase>; using FunctionMurmurHash3_64_V2 = FunctionVariadicArgumentsBase>; +using FunctionMurmurHash3U64V2 = + FunctionVariadicArgumentsBase>; #ifdef BE_TEST const char* murmur_hash3_get_name_type_int_for_test() { @@ -230,6 +234,7 @@ void register_function_hash(SimpleFunctionFactory& factory) { factory.register_function(); factory.register_function(); factory.register_function(); + factory.register_function(); factory.register_function(); factory.register_function(); factory.register_alias("xxhash_64", "xxhash3_64"); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java index c8719dbaeb5832..f7b21c7dfbf095 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java @@ -374,6 +374,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.MurmurHash332; import org.apache.doris.nereids.trees.expressions.functions.scalar.MurmurHash364; import org.apache.doris.nereids.trees.expressions.functions.scalar.MurmurHash364V2; +import org.apache.doris.nereids.trees.expressions.functions.scalar.MurmurHash3U64V2; import org.apache.doris.nereids.trees.expressions.functions.scalar.Negative; import org.apache.doris.nereids.trees.expressions.functions.scalar.NextDay; import org.apache.doris.nereids.trees.expressions.functions.scalar.NgramSearch; @@ -942,6 +943,7 @@ public class BuiltinScalarFunctions implements FunctionHelper { scalar(MurmurHash332.class, "murmur_hash3_32"), scalar(MurmurHash364.class, "murmur_hash3_64"), scalar(MurmurHash364V2.class, "murmur_hash3_64_v2"), + scalar(MurmurHash3U64V2.class, "murmur_hash3_u64_v2"), scalar(Negative.class, "negative"), scalar(NextDay.class, "next_day"), scalar(NonNullable.class, "non_nullable"), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MurmurHash3U64V2.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MurmurHash3U64V2.java new file mode 100644 index 00000000000000..9ef87a50820cca --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MurmurHash3U64V2.java @@ -0,0 +1,75 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions.scalar; + +import org.apache.doris.catalog.FunctionSignature; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; +import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable; +import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.LargeIntType; +import org.apache.doris.nereids.types.StringType; +import org.apache.doris.nereids.types.VarcharType; +import org.apache.doris.nereids.util.ExpressionUtils; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; + +import java.util.List; + +/** + * ScalarFunction 'murmur_hash3_u64_v2'. + */ +public class MurmurHash3U64V2 extends ScalarFunction + implements ExplicitlyCastableSignature, PropagateNullable { + public static final List SIGNATURES = ImmutableList.of( + FunctionSignature.ret(LargeIntType.INSTANCE).varArgs(VarcharType.SYSTEM_DEFAULT), + FunctionSignature.ret(LargeIntType.INSTANCE).varArgs(StringType.INSTANCE) + ); + + /** + * constructor with 1 or more arguments. + */ + public MurmurHash3U64V2(Expression arg, Expression... varArgs) { + super("murmur_hash3_u64_v2", ExpressionUtils.mergeArguments(arg, varArgs)); + } + + /** constructor for withChildren and reuse signature */ + private MurmurHash3U64V2(ScalarFunctionParams functionParams) { + super(functionParams); + } + + /** + * withChildren. + */ + @Override + public MurmurHash3U64V2 withChildren(List children) { + Preconditions.checkArgument(!children.isEmpty()); + return new MurmurHash3U64V2(getFunctionParams(children)); + } + + @Override + public List getSignatures() { + return SIGNATURES; + } + + @Override + public R accept(ExpressionVisitor visitor, C context) { + return visitor.visitMurmurHash3U64V2(this, context); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java index c32d2157d50c50..a20abfeae853c7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java @@ -396,6 +396,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.MurmurHash332; import org.apache.doris.nereids.trees.expressions.functions.scalar.MurmurHash364; import org.apache.doris.nereids.trees.expressions.functions.scalar.MurmurHash364V2; +import org.apache.doris.nereids.trees.expressions.functions.scalar.MurmurHash3U64V2; import org.apache.doris.nereids.trees.expressions.functions.scalar.Negative; import org.apache.doris.nereids.trees.expressions.functions.scalar.NextDay; import org.apache.doris.nereids.trees.expressions.functions.scalar.NgramSearch; @@ -2000,6 +2001,10 @@ default R visitMurmurHash364V2(MurmurHash364V2 murmurHash364V2, C context) { return visitScalarFunction(murmurHash364V2, context); } + default R visitMurmurHash3U64V2(MurmurHash3U64V2 murmurHash3U64V2, C context) { + return visitScalarFunction(murmurHash3U64V2, context); + } + default R visitXxHash32(XxHash32 xxHash32, C context) { return visitScalarFunction(xxHash32, context); } diff --git a/regression-test/data/query_p0/sql_functions/hash_functions/test_hash_function.out b/regression-test/data/query_p0/sql_functions/hash_functions/test_hash_function.out index e4d755e029e7d0..764d533fcc7de3 100644 --- a/regression-test/data/query_p0/sql_functions/hash_functions/test_hash_function.out +++ b/regression-test/data/query_p0/sql_functions/hash_functions/test_hash_function.out @@ -29,6 +29,76 @@ -- !mmh3_64_v2_4 -- 3669213779466221743 +-- !mmh3_64_v2_5 -- +-2648103510258542450 + +-- !mmh3_64_v2_6 -- +-5640908359072688302 + +-- !mmh3_u64_v2_1 -- +\N + +-- !mmh3_u64_v2_2 -- +4038800892574899471 + +-- !mmh3_u64_v2_3 -- +5998619086395760910 + +-- !mmh3_u64_v2_4 -- +3669213779466221743 + +-- !mmh3_u64_v2_5 -- +15798640563451009166 + +-- !mmh3_u64_v2_6 -- +12805835714636863314 + +-- !mmh3_u64_v2_7 -- +0 + +-- !mmh3_u64_v2_8 -- +9607679276477937801 + +-- !mmh3_u64_v2_9 -- +17783800982478351481 + +-- !mmh3_u64_v2_10 -- +10490885898849282672 + +-- !mmh3_64_v2_table -- +1 4038800892574899471 +2 5998619086395760910 +3 \N +4 0 +5 3669213779466221743 +6 -2648103510258542450 +7 -5640908359072688302 +8 5163374697039953916 +9 -1516026088323099476 + +-- !mmh3_u64_v2_table -- +1 4038800892574899471 +2 5998619086395760910 +3 \N +4 0 +5 3669213779466221743 +6 15798640563451009166 +7 12805835714636863314 +8 5163374697039953916 +9 16930717985386452140 + +-- !mmh3_64_v2_fold_1 -- +-6017608668500074082 + +-- !mmh3_64_v2_fold_2 -- +-4107623306750946434 + +-- !mmh3_u64_v2_fold_1 -- +12429135405209477534 + +-- !mmh3_u64_v2_fold_2 -- +14339120766958605182 + -- !sql -- \N @@ -46,3 +116,4 @@ -- !sql -- 7001965798170371843 + diff --git a/regression-test/suites/query_p0/sql_functions/hash_functions/test_hash_function.groovy b/regression-test/suites/query_p0/sql_functions/hash_functions/test_hash_function.groovy index 74acd20a9982ce..ace3379c99e652 100644 --- a/regression-test/suites/query_p0/sql_functions/hash_functions/test_hash_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/hash_functions/test_hash_function.groovy @@ -32,6 +32,73 @@ suite("test_hash_function", "arrow_flight_sql") { qt_mmh3_64_v2_2 "SELECT MURMUR_HASH3_64_V2('1000209601_1756808272');" qt_mmh3_64_v2_3 "SELECT MURMUR_HASH3_64_V2('hello world');" qt_mmh3_64_v2_4 "SELECT MURMUR_HASH3_64_V2('apache doris');" + qt_mmh3_64_v2_5 "SELECT MURMUR_HASH3_64_V2('1013199993_1756808272');" + qt_mmh3_64_v2_6 "SELECT MURMUR_HASH3_64_V2('1020273884_1756808272');" + + // murmur_hash3_u64_v2 tests + qt_mmh3_u64_v2_1 "SELECT MURMUR_HASH3_U64_V2(NULL);" + qt_mmh3_u64_v2_2 "SELECT MURMUR_HASH3_U64_V2('1000209601_1756808272');" + qt_mmh3_u64_v2_3 "SELECT MURMUR_HASH3_U64_V2('hello world');" + qt_mmh3_u64_v2_4 "SELECT MURMUR_HASH3_U64_V2('apache doris');" + qt_mmh3_u64_v2_5 "SELECT MURMUR_HASH3_U64_V2('1013199993_1756808272');" + qt_mmh3_u64_v2_6 "SELECT MURMUR_HASH3_U64_V2('1020273884_1756808272');" + qt_mmh3_u64_v2_7 "SELECT MURMUR_HASH3_U64_V2('');" + qt_mmh3_u64_v2_8 "SELECT MURMUR_HASH3_U64_V2('a');" + qt_mmh3_u64_v2_9 "SELECT MURMUR_HASH3_U64_V2('hello', 'world');" + qt_mmh3_u64_v2_10 "SELECT MURMUR_HASH3_U64_V2('hello', 'world', '!');" + + // Validation: murmur_hash3_u64_v2 should equal (murmur_hash3_64_v2 & 2^64-1) + def validate_mmh3_u64_v2 = { String... args -> + def argList = args.collect { "'${it}'" }.join(', ') + def u64_res = sql "SELECT MURMUR_HASH3_U64_V2(${argList});" + def v2_masked = sql "SELECT CAST(MURMUR_HASH3_64_V2(${argList}) AS LARGEINT) & 18446744073709551615;" + assertEquals(u64_res, v2_masked); + } + + validate_mmh3_u64_v2('1000209601_1756808272'); + validate_mmh3_u64_v2('hello world'); + validate_mmh3_u64_v2('apache doris'); + validate_mmh3_u64_v2('1013199993_1756808272'); + validate_mmh3_u64_v2('1020273884_1756808272'); + validate_mmh3_u64_v2(''); + validate_mmh3_u64_v2('a'); + validate_mmh3_u64_v2('你好🤣'); + validate_mmh3_u64_v2('アパッチドリス'); + + // Table-based tests for mmh3_64_v2 and mmh3_u64_v2 + sql "DROP TABLE IF EXISTS test_hash_tbl;" + sql """ + CREATE TABLE test_hash_tbl ( + id INT, + str_col VARCHAR(100) + ) DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES ("replication_num" = "1"); + """ + + sql """ + INSERT INTO test_hash_tbl VALUES + (1, '1000209601_1756808272'), + (2, 'hello world'), + (3, NULL), + (4, ''), + (5, 'apache doris'), + (6, '1013199993_1756808272'), + (7, '1020273884_1756808272'), + (8, '你好🤣'), + (9, 'アパッチドリス'); + """ + + qt_mmh3_64_v2_table "SELECT id, MURMUR_HASH3_64_V2(str_col) FROM test_hash_tbl ORDER BY id;" + qt_mmh3_u64_v2_table "SELECT id, MURMUR_HASH3_U64_V2(str_col) FROM test_hash_tbl ORDER BY id;" + + sql "DROP TABLE IF EXISTS test_hash_tbl;" + + // Constant folding tests + qt_mmh3_64_v2_fold_1 "SELECT MURMUR_HASH3_64_V2('test') + 1;" + qt_mmh3_64_v2_fold_2 "SELECT MURMUR_HASH3_64_V2('a', 'b') * 2;" + qt_mmh3_u64_v2_fold_1 "SELECT MURMUR_HASH3_U64_V2('test') + 1;" + qt_mmh3_u64_v2_fold_2 "SELECT MURMUR_HASH3_U64_V2('a', 'b') * 2;" qt_sql "SELECT xxhash_32(null);" qt_sql "SELECT xxhash_32(\"hello\");"