From e2a2ac4fcb44fb3dc91444a233477cf4e6e76622 Mon Sep 17 00:00:00 2001 From: Sean Doherty Date: Sat, 16 May 2026 16:22:19 -0500 Subject: [PATCH 1/2] Fix time interval arithmetic wrapping --- .../expr-common/src/type_coercion/binary.rs | 43 +++++-- .../physical-expr/src/expressions/binary.rs | 115 ++++++++++++++++++ .../datetime/arith_time_interval.slt | 49 ++++---- 3 files changed, 176 insertions(+), 31 deletions(-) diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index aec87ec5ff853..2debeec77504a 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -260,6 +260,30 @@ impl<'a> BinaryTypeCoercer<'a> { ) }) } + Plus if is_time_interval(lhs, rhs) => { + let time_type = if is_time(lhs) { lhs } else { rhs }; + let interval_type = Interval(MonthDayNano); + if is_time(lhs) { + return Ok(Signature { + lhs: time_type.clone(), + rhs: interval_type, + ret: time_type.clone(), + }); + } else { + return Ok(Signature { + lhs: interval_type, + rhs: time_type.clone(), + ret: time_type.clone(), + }); + } + } + Minus if is_time(lhs) && is_interval(rhs) => { + return Ok(Signature { + lhs: lhs.clone(), + rhs: Interval(MonthDayNano), + ret: lhs.clone(), + }); + } Minus if is_date_minus_date(lhs, rhs) => { return Ok(Signature { lhs: lhs.clone(), @@ -362,6 +386,18 @@ fn is_date_minus_date(lhs: &DataType, rhs: &DataType) -> bool { ) } +fn is_time(data_type: &DataType) -> bool { + matches!(data_type, DataType::Time32(_) | DataType::Time64(_)) +} + +fn is_interval(data_type: &DataType) -> bool { + matches!(data_type, DataType::Interval(_)) +} + +fn is_time_interval(lhs: &DataType, rhs: &DataType) -> bool { + (is_time(lhs) && is_interval(rhs)) || (is_interval(lhs) && is_time(rhs)) +} + /// Coercion rules for mathematics operators between decimal and non-decimal types. fn math_decimal_coercion( lhs_type: &DataType, @@ -1985,13 +2021,6 @@ fn temporal_math_coercion( (Time32(_) | Time64(_), Time32(_) | Time64(_)) => { Some((Interval(MonthDayNano), Interval(MonthDayNano))) } - // time + interval -> Interval - (Time32(_) | Time64(_), Interval(_)) => { - Some((Interval(MonthDayNano), Interval(MonthDayNano))) - } - (Interval(_), Time32(_) | Time64(_)) => { - Some((Interval(MonthDayNano), Interval(MonthDayNano))) - } // Interval * number => Interval ( Interval(_), diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index b92668fe9bd0d..524961fdfc53d 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -177,6 +177,117 @@ fn is_date_minus_date(lhs: &DataType, rhs: &DataType) -> bool { ) } +/// Apply PostgreSQL-compatible `time +/- interval` arithmetic. +/// +/// Arrow's kernels currently coerce these expressions through `Interval`, +/// which can produce values outside the 24-hour time-of-day domain. SQL `time` +/// values should remain times, wrapping around midnight. +fn apply_time_interval_arithmetic( + lhs: &ColumnarValue, + op: &Operator, + rhs: &ColumnarValue, +) -> Result> { + let (time, interval, subtract) = match (lhs.data_type(), op, rhs.data_type()) { + ( + DataType::Time32(_) | DataType::Time64(_), + Operator::Plus, + DataType::Interval(IntervalUnit::MonthDayNano), + ) => (lhs, rhs, false), + ( + DataType::Time32(_) | DataType::Time64(_), + Operator::Minus, + DataType::Interval(IntervalUnit::MonthDayNano), + ) => (lhs, rhs, true), + ( + DataType::Interval(IntervalUnit::MonthDayNano), + Operator::Plus, + DataType::Time32(_) | DataType::Time64(_), + ) => (rhs, lhs, false), + _ => return Ok(None), + }; + + let is_scalar = matches!( + (time, interval), + (ColumnarValue::Scalar(_), ColumnarValue::Scalar(_)) + ); + + let arrays = ColumnarValue::values_to_arrays(&[time.clone(), interval.clone()])?; + let result = apply_time_interval_arrays(&arrays[0], &arrays[1], subtract)?; + + if is_scalar { + Ok(Some(ColumnarValue::Scalar(ScalarValue::try_from_array( + result.as_ref(), + 0, + )?))) + } else { + Ok(Some(ColumnarValue::Array(result))) + } +} + +fn apply_time_interval_arrays( + time: &ArrayRef, + interval: &ArrayRef, + subtract: bool, +) -> Result { + macro_rules! apply_time_unit { + ($time_type:ty, $native:ty, $units_per_day:expr, $nanos_per_unit:expr) => {{ + let time = time.as_primitive::<$time_type>(); + let interval = interval.as_primitive::(); + let result = (0..time.len()) + .map(|i| { + if time.is_null(i) || interval.is_null(i) { + None + } else { + Some(wrap_time_value( + time.value(i) as i64, + interval.value(i), + subtract, + $units_per_day, + $nanos_per_unit, + ) as $native) + } + }) + .collect::>(); + Ok(Arc::new(result) as ArrayRef) + }}; + } + + match time.data_type() { + DataType::Time32(TimeUnit::Second) => { + apply_time_unit!(Time32SecondType, i32, 86_400, 1_000_000_000) + } + DataType::Time32(TimeUnit::Millisecond) => { + apply_time_unit!(Time32MillisecondType, i32, 86_400_000, 1_000_000) + } + DataType::Time64(TimeUnit::Microsecond) => { + apply_time_unit!(Time64MicrosecondType, i64, 86_400_000_000, 1_000) + } + DataType::Time64(TimeUnit::Nanosecond) => { + apply_time_unit!(Time64NanosecondType, i64, 86_400_000_000_000, 1) + } + other => internal_err!("Unexpected time data type for time arithmetic: {other}"), + } +} + +fn wrap_time_value( + time_value: i64, + interval: IntervalMonthDayNano, + subtract: bool, + units_per_day: i64, + nanos_per_unit: i64, +) -> i64 { + // Month and day interval fields are whole-day-or-larger components, so + // they have no effect after wrapping to a 24-hour time-of-day value. + let delta = (interval.nanoseconds / nanos_per_unit).rem_euclid(units_per_day); + let signed_delta = if subtract { + -(delta as i128) + } else { + delta as i128 + }; + + (time_value as i128 + signed_delta).rem_euclid(units_per_day as i128) as i64 +} + /// Computes the difference between two dates and returns the result as Int64 (days) /// This aligns with PostgreSQL, DuckDB, and MySQL behavior where date - date returns an integer /// @@ -337,6 +448,10 @@ impl PhysicalExpr for BinaryExpr { let schema = batch.schema(); let input_schema = schema.as_ref(); + if let Some(result) = apply_time_interval_arithmetic(&lhs, self.op(), &rhs)? { + return Ok(result); + } + match self.op { Operator::Plus if self.fail_on_overflow => return apply(&lhs, &rhs, add), Operator::Plus => return apply(&lhs, &rhs, add_wrapping), diff --git a/datafusion/sqllogictest/test_files/datetime/arith_time_interval.slt b/datafusion/sqllogictest/test_files/datetime/arith_time_interval.slt index 997eae9b1bd8b..a8c73a0dd8763 100644 --- a/datafusion/sqllogictest/test_files/datetime/arith_time_interval.slt +++ b/datafusion/sqllogictest/test_files/datetime/arith_time_interval.slt @@ -3,50 +3,51 @@ # time + interval → time # Add an interval to a time # time '01:00' + interval '3 hours' → 04:00:00 -# -# note that while the above reflects what postgresql does -# in the case of datafusion/arrow that is not the case. The -# result will be an interval, not a time. -query ? +query D SELECT '01:00'::time + interval '3 hours' ---- -4 hours +04:00:00 query T SELECT arrow_typeof('01:00'::time + interval '3 hours') ---- -Interval(MonthDayNano) +Time64(ns) -query ? +query D SELECT '22:00'::time + interval '3 hours' ---- -25 hours +01:00:00 + +query D +SELECT time '23:30' + interval '2 hours' +---- +01:30:00 -query ? +query D SELECT interval '3 hours' + '22:00'::time ---- -25 hours +01:00:00 -query ? +query D SELECT arrow_cast('22:00', 'Time32(Second)') + interval '3 hours' ---- -25 hours +01:00:00 -query ? +query D SELECT arrow_cast('22:00', 'Time32(Millisecond)') + interval '3 hours' ---- -25 hours +01:00:00 -query ? +query D SELECT arrow_cast('22:00', 'Time64(Microsecond)') + interval '3 hours' ---- -25 hours +01:00:00 -query ? +query D SELECT arrow_cast('22:00', 'Time64(Nanosecond)') + interval '3 hours' ---- -25 hours +01:00:00 # postgresql behavior # @@ -54,17 +55,17 @@ SELECT arrow_cast('22:00', 'Time64(Nanosecond)') + interval '3 hours' # Subtract an interval from a time # time '05:00' - interval '2 hours' → 03:00:00 -query ? +query D SELECT '05:00'::time - interval '2 hours' ---- -3 hours +03:00:00 query T SELECT arrow_typeof('05:00'::time - interval '2 hours') ---- -Interval(MonthDayNano) +Time64(ns) -query ? +query D SELECT '02:00'::time - interval '3 hours' ---- --1 hours +23:00:00 From f5221f286b74b42f12fcbb4771cadfad581e90e9 Mon Sep 17 00:00:00 2001 From: Sean Doherty Date: Sat, 16 May 2026 17:14:41 -0500 Subject: [PATCH 2/2] Handle subsecond time interval wrapping --- datafusion/physical-expr/src/expressions/binary.rs | 13 ++++++------- .../test_files/datetime/arith_time_interval.slt | 5 +++++ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index 524961fdfc53d..7900c307746df 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -278,14 +278,13 @@ fn wrap_time_value( ) -> i64 { // Month and day interval fields are whole-day-or-larger components, so // they have no effect after wrapping to a 24-hour time-of-day value. - let delta = (interval.nanoseconds / nanos_per_unit).rem_euclid(units_per_day); - let signed_delta = if subtract { - -(delta as i128) - } else { - delta as i128 - }; + let nanos_per_unit = nanos_per_unit as i128; + let nanos_per_day = units_per_day as i128 * nanos_per_unit; + let delta = (interval.nanoseconds as i128).rem_euclid(nanos_per_day); + let signed_delta = if subtract { -delta } else { delta }; - (time_value as i128 + signed_delta).rem_euclid(units_per_day as i128) as i64 + ((time_value as i128 * nanos_per_unit + signed_delta).rem_euclid(nanos_per_day) + / nanos_per_unit) as i64 } /// Computes the difference between two dates and returns the result as Int64 (days) diff --git a/datafusion/sqllogictest/test_files/datetime/arith_time_interval.slt b/datafusion/sqllogictest/test_files/datetime/arith_time_interval.slt index a8c73a0dd8763..029b3467a5ec5 100644 --- a/datafusion/sqllogictest/test_files/datetime/arith_time_interval.slt +++ b/datafusion/sqllogictest/test_files/datetime/arith_time_interval.slt @@ -49,6 +49,11 @@ SELECT arrow_cast('22:00', 'Time64(Nanosecond)') + interval '3 hours' ---- 01:00:00 +query D +SELECT arrow_cast('00:00:00', 'Time32(Second)') - interval '500 milliseconds' +---- +23:59:59 + # postgresql behavior # # time - interval → time