From c39f9ff76e87dca465338c7aef1284107e0faa05 Mon Sep 17 00:00:00 2001 From: Terran Date: Wed, 4 Mar 2026 11:07:57 +0800 Subject: [PATCH 1/5] Add regexp function (enabled in Hive library) --- .../java/org/apache/calcite/sql/SqlKind.java | 5 ++++- .../calcite/sql/dialect/HiveSqlDialect.java | 3 +++ .../calcite/sql/fun/SqlLibraryOperators.java | 7 +++---- .../calcite/util/RelToSqlConverterUtil.java | 17 +++++++++++++++++ .../rel/rel2sql/RelToSqlConverterTest.java | 10 ++++++++++ site/_docs/reference.md | 2 +- 6 files changed, 38 insertions(+), 6 deletions(-) diff --git a/core/src/main/java/org/apache/calcite/sql/SqlKind.java b/core/src/main/java/org/apache/calcite/sql/SqlKind.java index d2c57864b85c..aca201cb8759 100644 --- a/core/src/main/java/org/apache/calcite/sql/SqlKind.java +++ b/core/src/main/java/org/apache/calcite/sql/SqlKind.java @@ -1357,7 +1357,10 @@ public enum SqlKind { * commands for them. Use OTHER_DDL in the short term, but we are happy to add * new enum values for your object types. Just ask! */ - OTHER_DDL; + OTHER_DDL, + + /** The {@code REGEXP} function. */ + REGEXP; //~ Static fields/initializers --------------------------------------------- diff --git a/core/src/main/java/org/apache/calcite/sql/dialect/HiveSqlDialect.java b/core/src/main/java/org/apache/calcite/sql/dialect/HiveSqlDialect.java index db57e5affedc..1e32f0e6db32 100644 --- a/core/src/main/java/org/apache/calcite/sql/dialect/HiveSqlDialect.java +++ b/core/src/main/java/org/apache/calcite/sql/dialect/HiveSqlDialect.java @@ -129,6 +129,9 @@ public HiveSqlDialect(Context context) { case TRIM: RelToSqlConverterUtil.unparseHiveTrim(writer, call, leftPrec, rightPrec); break; + case REGEXP: + RelToSqlConverterUtil.unparseRegexp(writer, call, leftPrec, rightPrec); + break; default: super.unparseCall(writer, call, leftPrec, rightPrec); } diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java index 947223b682a8..f34bfc249a7a 100644 --- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java +++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java @@ -705,11 +705,10 @@ static RelDataType deriveTypeSplit(SqlOperatorBinding operatorBinding, public static final SqlFunction REGEXP_SUBSTR = REGEXP_EXTRACT.withName("REGEXP_SUBSTR"); /** The "REGEXP(value, regexp)" function, equivalent to {@link #RLIKE}. */ - @LibraryOperator(libraries = {SPARK}) + @LibraryOperator(libraries = {SPARK, HIVE}) public static final SqlFunction REGEXP = - SqlBasicFunction.create("REGEXP", ReturnTypes.BOOLEAN_NULLABLE, - OperandTypes.STRING_STRING, - SqlFunctionCategory.STRING); + SqlBasicFunction.create(SqlKind.REGEXP, ReturnTypes.BOOLEAN_NULLABLE, + OperandTypes.STRING_STRING); /** The "REGEXP_LIKE(value, regexp)" function, equivalent to {@link #RLIKE}. */ @LibraryOperator(libraries = {SPARK, MYSQL, POSTGRESQL, ORACLE}) diff --git a/core/src/main/java/org/apache/calcite/util/RelToSqlConverterUtil.java b/core/src/main/java/org/apache/calcite/util/RelToSqlConverterUtil.java index ddb37e8dac3e..ed4740602302 100644 --- a/core/src/main/java/org/apache/calcite/util/RelToSqlConverterUtil.java +++ b/core/src/main/java/org/apache/calcite/util/RelToSqlConverterUtil.java @@ -445,4 +445,21 @@ public ClickHouseSqlArrayTypeNameSpec(SqlTypeNameSpec elementTypeName, writer.endList(frame); } } + + /** + * Unparses REGEXP function calls by converting from function call format + * (e.g., REGEXP(column, pattern)) to infix operator format (e.g., column REGEXP pattern). + */ + public static void unparseRegexp(SqlWriter writer, SqlCall call, int leftPrec, int rightPrec) { + if (call.operandCount() != 2) { + throw new IllegalArgumentException("REGEXP operator requires exactly 2 operands"); + } + + final SqlWriter.Frame frame = writer.startList(SqlWriter.FrameTypeEnum.SIMPLE, "", ""); + call.operand(0).unparse(writer, leftPrec, rightPrec); + writer.sep("REGEXP", true); + call.operand(1).unparse(writer, leftPrec, rightPrec); + writer.endList(frame); + } + } diff --git a/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java b/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java index a98f7fd1f8ff..ea81178268d7 100644 --- a/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java +++ b/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java @@ -11761,4 +11761,14 @@ public Sql schema(CalciteAssert.SchemaSpec schemaSpec) { .ok(expected); } + /** Test case for + * [CALCITE-7428] + * Add regexp function (enabled in Hive library). */ + @Test void testRegexpWithHive() { + final String query = "select \"brand_name\"\n" + + "from \"product\" where REGEXP(\"brand_name\",'[a-zA-Z]') "; + final String expectedSpark = "SELECT `brand_name`\nFROM " + + "`foodmart`.`product`\nWHERE `brand_name` REGEXP '[a-zA-Z]'"; + sql(query).withLibrary(SqlLibrary.HIVE).withHive().ok(expectedSpark); + } } diff --git a/site/_docs/reference.md b/site/_docs/reference.md index e1ea8e720697..ee3d2d739f8a 100644 --- a/site/_docs/reference.md +++ b/site/_docs/reference.md @@ -3004,7 +3004,7 @@ In the following: | b s | POW(numeric1, numeric2) | Returns *numeric1* raised to the power *numeric2* | b c h q m o f s p r | POWER(numeric1, numeric2) | Returns *numeric1* raised to the power of *numeric2* | p r | RANDOM() | Generates a random double between 0 and 1 inclusive -| s | REGEXP(string, regexp) | Equivalent to `string1 RLIKE string2` +| s h | REGEXP(string, regexp) | Equivalent to `string1 RLIKE string2` | b | REGEXP_CONTAINS(string, regexp) | Returns whether *string* is a partial match for the *regexp* | b | REGEXP_EXTRACT(string, regexp [, position [, occurrence]]) | Returns the substring in *string* that matches the *regexp*, starting search at *position* (default 1), and until locating the nth *occurrence* (default 1). Returns NULL if there is no match | b | REGEXP_EXTRACT_ALL(string, regexp) | Returns an array of all substrings in *string* that matches the *regexp*. Returns an empty array if there is no match From e2c80f3136ffec043128f498ddcf5ae1c742dedf Mon Sep 17 00:00:00 2001 From: Terran Date: Wed, 4 Mar 2026 18:07:37 +0800 Subject: [PATCH 2/5] Add testCase --- .../calcite/util/RelToSqlConverterUtil.java | 2 +- .../rel/rel2sql/RelToSqlConverterTest.java | 24 ++++++++++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/apache/calcite/util/RelToSqlConverterUtil.java b/core/src/main/java/org/apache/calcite/util/RelToSqlConverterUtil.java index ed4740602302..8b276c1dc7d5 100644 --- a/core/src/main/java/org/apache/calcite/util/RelToSqlConverterUtil.java +++ b/core/src/main/java/org/apache/calcite/util/RelToSqlConverterUtil.java @@ -455,7 +455,7 @@ public static void unparseRegexp(SqlWriter writer, SqlCall call, int leftPrec, i throw new IllegalArgumentException("REGEXP operator requires exactly 2 operands"); } - final SqlWriter.Frame frame = writer.startList(SqlWriter.FrameTypeEnum.SIMPLE, "", ""); + final SqlWriter.Frame frame = writer.startList(SqlWriter.FrameTypeEnum.SIMPLE, "(", ")"); call.operand(0).unparse(writer, leftPrec, rightPrec); writer.sep("REGEXP", true); call.operand(1).unparse(writer, leftPrec, rightPrec); diff --git a/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java b/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java index ea81178268d7..8e80ef639fd7 100644 --- a/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java +++ b/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java @@ -11768,7 +11768,29 @@ public Sql schema(CalciteAssert.SchemaSpec schemaSpec) { final String query = "select \"brand_name\"\n" + "from \"product\" where REGEXP(\"brand_name\",'[a-zA-Z]') "; final String expectedSpark = "SELECT `brand_name`\nFROM " - + "`foodmart`.`product`\nWHERE `brand_name` REGEXP '[a-zA-Z]'"; + + "`foodmart`.`product`\nWHERE (`brand_name` REGEXP '[a-zA-Z]')"; + sql(query).withLibrary(SqlLibrary.HIVE).withHive().ok(expectedSpark); + } + + /** Test case for + * [CALCITE-7428] + * Add regexp function (enabled in Hive library). */ + @Test void testRegexpWithHiveIsNotNull() { + final String query = "select \"brand_name\"\n" + + "from \"product\" where REGEXP(\"brand_name\",'[a-zA-Z]') is not null "; + final String expectedSpark = "SELECT `brand_name`\nFROM " + + "`foodmart`.`product`\nWHERE (`brand_name` REGEXP '[a-zA-Z]') IS NOT NULL"; + sql(query).withLibrary(SqlLibrary.HIVE).withHive().ok(expectedSpark); + } + + /** Test case for + * [CALCITE-7428] + * Add regexp function (enabled in Hive library). */ + @Test void testSelectRegexpWithHiveIsNotNull() { + final String query = "select REGEXP(\"brand_name\",'[a-zA-Z]') is not null \n" + + "from \"product\""; + final String expectedSpark = "SELECT (`brand_name` REGEXP '[a-zA-Z]') IS NOT NULL\n" + + "FROM `foodmart`.`product`"; sql(query).withLibrary(SqlLibrary.HIVE).withHive().ok(expectedSpark); } } From a8cd4bd00f769ffc9658eee9ee58543838e57620 Mon Sep 17 00:00:00 2001 From: Terran Date: Thu, 12 Mar 2026 20:04:44 +0800 Subject: [PATCH 3/5] Del REGEXP SqlKind --- .../main/java/org/apache/calcite/sql/SqlKind.java | 5 +---- .../apache/calcite/sql/dialect/HiveSqlDialect.java | 2 +- .../apache/calcite/sql/fun/SqlLibraryOperators.java | 2 +- .../apache/calcite/util/RelToSqlConverterUtil.java | 13 +++++++------ 4 files changed, 10 insertions(+), 12 deletions(-) diff --git a/core/src/main/java/org/apache/calcite/sql/SqlKind.java b/core/src/main/java/org/apache/calcite/sql/SqlKind.java index aca201cb8759..d2c57864b85c 100644 --- a/core/src/main/java/org/apache/calcite/sql/SqlKind.java +++ b/core/src/main/java/org/apache/calcite/sql/SqlKind.java @@ -1357,10 +1357,7 @@ public enum SqlKind { * commands for them. Use OTHER_DDL in the short term, but we are happy to add * new enum values for your object types. Just ask! */ - OTHER_DDL, - - /** The {@code REGEXP} function. */ - REGEXP; + OTHER_DDL; //~ Static fields/initializers --------------------------------------------- diff --git a/core/src/main/java/org/apache/calcite/sql/dialect/HiveSqlDialect.java b/core/src/main/java/org/apache/calcite/sql/dialect/HiveSqlDialect.java index 1e32f0e6db32..689ecc1ecf60 100644 --- a/core/src/main/java/org/apache/calcite/sql/dialect/HiveSqlDialect.java +++ b/core/src/main/java/org/apache/calcite/sql/dialect/HiveSqlDialect.java @@ -129,7 +129,7 @@ public HiveSqlDialect(Context context) { case TRIM: RelToSqlConverterUtil.unparseHiveTrim(writer, call, leftPrec, rightPrec); break; - case REGEXP: + case RLIKE: RelToSqlConverterUtil.unparseRegexp(writer, call, leftPrec, rightPrec); break; default: diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java index f34bfc249a7a..2479a92ba247 100644 --- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java +++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java @@ -707,7 +707,7 @@ static RelDataType deriveTypeSplit(SqlOperatorBinding operatorBinding, /** The "REGEXP(value, regexp)" function, equivalent to {@link #RLIKE}. */ @LibraryOperator(libraries = {SPARK, HIVE}) public static final SqlFunction REGEXP = - SqlBasicFunction.create(SqlKind.REGEXP, ReturnTypes.BOOLEAN_NULLABLE, + SqlBasicFunction.create("REGEXP", SqlKind.RLIKE, ReturnTypes.BOOLEAN_NULLABLE, OperandTypes.STRING_STRING); /** The "REGEXP_LIKE(value, regexp)" function, equivalent to {@link #RLIKE}. */ diff --git a/core/src/main/java/org/apache/calcite/util/RelToSqlConverterUtil.java b/core/src/main/java/org/apache/calcite/util/RelToSqlConverterUtil.java index 8b276c1dc7d5..1c97f55ea1ce 100644 --- a/core/src/main/java/org/apache/calcite/util/RelToSqlConverterUtil.java +++ b/core/src/main/java/org/apache/calcite/util/RelToSqlConverterUtil.java @@ -454,12 +454,13 @@ public static void unparseRegexp(SqlWriter writer, SqlCall call, int leftPrec, i if (call.operandCount() != 2) { throw new IllegalArgumentException("REGEXP operator requires exactly 2 operands"); } - - final SqlWriter.Frame frame = writer.startList(SqlWriter.FrameTypeEnum.SIMPLE, "(", ")"); - call.operand(0).unparse(writer, leftPrec, rightPrec); - writer.sep("REGEXP", true); - call.operand(1).unparse(writer, leftPrec, rightPrec); - writer.endList(frame); + if ("REGEXP".equals(call.getOperator().getName())) { + final SqlWriter.Frame frame = writer.startList(SqlWriter.FrameTypeEnum.SIMPLE, "(", ")"); + call.operand(0).unparse(writer, leftPrec, rightPrec); + writer.sep("REGEXP", true); + call.operand(1).unparse(writer, leftPrec, rightPrec); + writer.endList(frame); + } } } From 41b5ac4c536d54657fe9b860b603a14c71da98d2 Mon Sep 17 00:00:00 2001 From: Terran Date: Thu, 12 Mar 2026 20:20:50 +0800 Subject: [PATCH 4/5] Change variable name --- .../calcite/rel/rel2sql/RelToSqlConverterTest.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java b/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java index 8e80ef639fd7..0f4bb5a8adbb 100644 --- a/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java +++ b/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java @@ -11767,9 +11767,9 @@ public Sql schema(CalciteAssert.SchemaSpec schemaSpec) { @Test void testRegexpWithHive() { final String query = "select \"brand_name\"\n" + "from \"product\" where REGEXP(\"brand_name\",'[a-zA-Z]') "; - final String expectedSpark = "SELECT `brand_name`\nFROM " + final String expectedHive = "SELECT `brand_name`\nFROM " + "`foodmart`.`product`\nWHERE (`brand_name` REGEXP '[a-zA-Z]')"; - sql(query).withLibrary(SqlLibrary.HIVE).withHive().ok(expectedSpark); + sql(query).withLibrary(SqlLibrary.HIVE).withHive().ok(expectedHive); } /** Test case for @@ -11778,9 +11778,9 @@ public Sql schema(CalciteAssert.SchemaSpec schemaSpec) { @Test void testRegexpWithHiveIsNotNull() { final String query = "select \"brand_name\"\n" + "from \"product\" where REGEXP(\"brand_name\",'[a-zA-Z]') is not null "; - final String expectedSpark = "SELECT `brand_name`\nFROM " + final String expectedHive = "SELECT `brand_name`\nFROM " + "`foodmart`.`product`\nWHERE (`brand_name` REGEXP '[a-zA-Z]') IS NOT NULL"; - sql(query).withLibrary(SqlLibrary.HIVE).withHive().ok(expectedSpark); + sql(query).withLibrary(SqlLibrary.HIVE).withHive().ok(expectedHive); } /** Test case for @@ -11789,8 +11789,8 @@ public Sql schema(CalciteAssert.SchemaSpec schemaSpec) { @Test void testSelectRegexpWithHiveIsNotNull() { final String query = "select REGEXP(\"brand_name\",'[a-zA-Z]') is not null \n" + "from \"product\""; - final String expectedSpark = "SELECT (`brand_name` REGEXP '[a-zA-Z]') IS NOT NULL\n" + final String expectedHive = "SELECT (`brand_name` REGEXP '[a-zA-Z]') IS NOT NULL\n" + "FROM `foodmart`.`product`"; - sql(query).withLibrary(SqlLibrary.HIVE).withHive().ok(expectedSpark); + sql(query).withLibrary(SqlLibrary.HIVE).withHive().ok(expectedHive); } } From ee4188451856322442090ca66990920bdb4f7b87 Mon Sep 17 00:00:00 2001 From: Terran Date: Fri, 13 Mar 2026 14:25:03 +0800 Subject: [PATCH 5/5] Remove redundant judgments from the unparseRegexp function and modify comments --- .../java/org/apache/calcite/util/RelToSqlConverterUtil.java | 3 --- .../apache/calcite/rel/rel2sql/RelToSqlConverterTest.java | 6 +++--- .../main/java/org/apache/calcite/test/SqlOperatorTest.java | 1 + 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/core/src/main/java/org/apache/calcite/util/RelToSqlConverterUtil.java b/core/src/main/java/org/apache/calcite/util/RelToSqlConverterUtil.java index 1c97f55ea1ce..975bfc8505bb 100644 --- a/core/src/main/java/org/apache/calcite/util/RelToSqlConverterUtil.java +++ b/core/src/main/java/org/apache/calcite/util/RelToSqlConverterUtil.java @@ -451,9 +451,6 @@ public ClickHouseSqlArrayTypeNameSpec(SqlTypeNameSpec elementTypeName, * (e.g., REGEXP(column, pattern)) to infix operator format (e.g., column REGEXP pattern). */ public static void unparseRegexp(SqlWriter writer, SqlCall call, int leftPrec, int rightPrec) { - if (call.operandCount() != 2) { - throw new IllegalArgumentException("REGEXP operator requires exactly 2 operands"); - } if ("REGEXP".equals(call.getOperator().getName())) { final SqlWriter.Frame frame = writer.startList(SqlWriter.FrameTypeEnum.SIMPLE, "(", ")"); call.operand(0).unparse(writer, leftPrec, rightPrec); diff --git a/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java b/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java index 0f4bb5a8adbb..7f9f10e55782 100644 --- a/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java +++ b/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java @@ -11763,7 +11763,7 @@ public Sql schema(CalciteAssert.SchemaSpec schemaSpec) { /** Test case for * [CALCITE-7428] - * Add regexp function (enabled in Hive library). */ + * Support regexp function change regexp operator for Hive library. */ @Test void testRegexpWithHive() { final String query = "select \"brand_name\"\n" + "from \"product\" where REGEXP(\"brand_name\",'[a-zA-Z]') "; @@ -11774,7 +11774,7 @@ public Sql schema(CalciteAssert.SchemaSpec schemaSpec) { /** Test case for * [CALCITE-7428] - * Add regexp function (enabled in Hive library). */ + * Support regexp function change regexp operator for Hive library. */ @Test void testRegexpWithHiveIsNotNull() { final String query = "select \"brand_name\"\n" + "from \"product\" where REGEXP(\"brand_name\",'[a-zA-Z]') is not null "; @@ -11785,7 +11785,7 @@ public Sql schema(CalciteAssert.SchemaSpec schemaSpec) { /** Test case for * [CALCITE-7428] - * Add regexp function (enabled in Hive library). */ + * Support regexp function change regexp operator for Hive library. */ @Test void testSelectRegexpWithHiveIsNotNull() { final String query = "select REGEXP(\"brand_name\",'[a-zA-Z]') is not null \n" + "from \"product\""; diff --git a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java index 41b45d3c761a..b6731f9e63f1 100644 --- a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java +++ b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java @@ -4232,6 +4232,7 @@ void checkIsNull(SqlOperatorFixture f, SqlOperator operator) { checkRlikeFunc(f, SqlLibrary.HIVE, SqlLibraryOperators.RLIKE); checkRlikeFunc(f, SqlLibrary.SPARK, SqlLibraryOperators.RLIKE); checkRlikeFunc(f, SqlLibrary.SPARK, SqlLibraryOperators.REGEXP); + checkRlikeFunc(f, SqlLibrary.HIVE, SqlLibraryOperators.REGEXP); checkRlikeFunc(f, SqlLibrary.MYSQL, SqlLibraryOperators.RLIKE); checkNotRlikeFunc(f.withLibrary(SqlLibrary.HIVE)); checkNotRlikeFunc(f.withLibrary(SqlLibrary.SPARK));