@@ -33,7 +33,7 @@ use regex::Regex;
3333use std:: any:: Any ;
3434use std:: sync:: { Arc , OnceLock } ;
3535
36- #[ derive( Debug ) ]
36+ #[ derive( Debug , PartialEq , Eq , Hash ) ]
3737pub struct RegexpSubstrFunc {
3838 signature : Signature ,
3939}
@@ -335,7 +335,7 @@ fn compile_regex(regex: &str, flags: Option<&str>) -> Result<Regex, ArrowError>
335335mod tests {
336336 use crate :: regex:: regexpsubstr:: { regexp_substr, RegexpSubstrFunc } ;
337337 use arrow:: array:: { Array , ArrayRef , Int64Array , LargeStringArray , StringArray } ;
338- use arrow:: datatypes:: DataType ;
338+ use arrow:: datatypes:: { DataType , Field } ;
339339 use datafusion_common:: ScalarValue ;
340340 use datafusion_expr:: { ScalarFunctionArgs , ScalarUDFImpl } ;
341341 use datafusion_expr_common:: columnar_value:: ColumnarValue ;
@@ -371,14 +371,30 @@ mod tests {
371371 ScalarValue :: LargeUtf8 as fn ( Option < String > ) -> ScalarValue ,
372372 ) ,
373373 ] {
374+ let args_vec = vec ! [
375+ ColumnarValue :: Scalar ( scalar( Some ( value. to_string( ) ) ) ) ,
376+ ColumnarValue :: Scalar ( scalar( Some ( regex. to_string( ) ) ) ) ,
377+ ] ;
378+ let arg_fields = args_vec
379+ . iter ( )
380+ . enumerate ( )
381+ . map ( |( idx, arg) | {
382+ Field :: new ( format ! ( "f_{idx}" ) , arg. data_type ( ) , true ) . into ( )
383+ } )
384+ . collect ( ) ;
374385 let result =
375386 RegexpSubstrFunc :: new ( ) . invoke_with_args ( ScalarFunctionArgs {
376- args : vec ! [
377- ColumnarValue :: Scalar ( scalar( Some ( value. to_string( ) ) ) ) ,
378- ColumnarValue :: Scalar ( scalar( Some ( regex. to_string( ) ) ) ) ,
379- ] ,
387+ args : args_vec,
388+ arg_fields,
380389 number_rows : 1 ,
381- return_type : data_type,
390+ return_field : Arc :: new ( Field :: new (
391+ "f" ,
392+ data_type. clone ( ) ,
393+ true ,
394+ ) ) ,
395+ config_options : Arc :: new (
396+ datafusion_common:: config:: ConfigOptions :: default ( ) ,
397+ ) ,
382398 } ) ;
383399 match result {
384400 Ok ( ColumnarValue :: Scalar (
@@ -422,14 +438,26 @@ mod tests {
422438 ) ,
423439 _ => unreachable ! ( ) ,
424440 } ;
441+ let args_vec = vec ! [
442+ ColumnarValue :: Array ( Arc :: new( array_values) ) ,
443+ ColumnarValue :: Scalar ( regex) ,
444+ ] ;
445+ let arg_fields = args_vec
446+ . iter ( )
447+ . enumerate ( )
448+ . map ( |( idx, arg) | {
449+ Field :: new ( format ! ( "f_{idx}" ) , arg. data_type ( ) , true ) . into ( )
450+ } )
451+ . collect ( ) ;
425452 let result =
426453 RegexpSubstrFunc :: new ( ) . invoke_with_args ( ScalarFunctionArgs {
427- args : vec ! [
428- ColumnarValue :: Array ( Arc :: new( array_values) ) ,
429- ColumnarValue :: Scalar ( regex) ,
430- ] ,
454+ args : args_vec,
455+ arg_fields,
431456 number_rows : 1 ,
432- return_type : data_type,
457+ return_field : Arc :: new ( Field :: new ( "f" , data_type. clone ( ) , true ) ) ,
458+ config_options : Arc :: new (
459+ datafusion_common:: config:: ConfigOptions :: default ( ) ,
460+ ) ,
433461 } ) ;
434462 match result {
435463 Ok ( ColumnarValue :: Array ( array) ) => {
@@ -511,22 +539,34 @@ mod tests {
511539 ScalarValue :: LargeUtf8 as fn ( Option < String > ) -> ScalarValue ,
512540 ) ,
513541 ] {
542+ let args_vec = vec ! [
543+ ColumnarValue :: Scalar ( scalar( Some ( value. to_string( ) ) ) ) ,
544+ ColumnarValue :: Scalar ( scalar( Some ( regex. to_string( ) ) ) ) ,
545+ ColumnarValue :: Scalar ( ScalarValue :: Int64 ( Some ( 1 ) ) ) ,
546+ ColumnarValue :: Scalar ( ScalarValue :: Int64 ( Some ( 1 ) ) ) ,
547+ ColumnarValue :: Scalar ( scalar( Some ( flags[ spos] . to_string( ) ) ) ) ,
548+ ColumnarValue :: Scalar ( ScalarValue :: Int64 ( Some ( group_num[ spos] ) ) ) ,
549+ ] ;
550+ let arg_fields = args_vec
551+ . iter ( )
552+ . enumerate ( )
553+ . map ( |( idx, arg) | {
554+ Field :: new ( format ! ( "f_{idx}" ) , arg. data_type ( ) , true ) . into ( )
555+ } )
556+ . collect ( ) ;
514557 let result =
515558 RegexpSubstrFunc :: new ( ) . invoke_with_args ( ScalarFunctionArgs {
516- args : vec ! [
517- ColumnarValue :: Scalar ( scalar( Some ( value. to_string( ) ) ) ) ,
518- ColumnarValue :: Scalar ( scalar( Some ( regex. to_string( ) ) ) ) ,
519- ColumnarValue :: Scalar ( ScalarValue :: Int64 ( Some ( 1 ) ) ) ,
520- ColumnarValue :: Scalar ( ScalarValue :: Int64 ( Some ( 1 ) ) ) ,
521- ColumnarValue :: Scalar ( scalar( Some (
522- flags[ spos] . to_string( ) ,
523- ) ) ) ,
524- ColumnarValue :: Scalar ( ScalarValue :: Int64 ( Some (
525- group_num[ spos] ,
526- ) ) ) ,
527- ] ,
559+ args : args_vec,
560+ arg_fields,
528561 number_rows : 1 ,
529- return_type : data_type,
562+ return_field : Arc :: new ( Field :: new (
563+ "f" ,
564+ data_type. clone ( ) ,
565+ true ,
566+ ) ) ,
567+ config_options : Arc :: new (
568+ datafusion_common:: config:: ConfigOptions :: default ( ) ,
569+ ) ,
530570 } ) ;
531571 match result {
532572 Ok ( ColumnarValue :: Scalar (
0 commit comments