Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 36 additions & 7 deletions datafusion/expr-common/src/type_coercion/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,30 @@ impl<'a> BinaryTypeCoercer<'a> {
)
})
}
Plus if is_time_interval(lhs, rhs) => {
let time_type = if is_time(lhs) { lhs } else { rhs };
let interval_type = Interval(MonthDayNano);
if is_time(lhs) {
return Ok(Signature {
lhs: time_type.clone(),
rhs: interval_type,
ret: time_type.clone(),
});
} else {
return Ok(Signature {
lhs: interval_type,
rhs: time_type.clone(),
ret: time_type.clone(),
});
}
}
Minus if is_time(lhs) && is_interval(rhs) => {
return Ok(Signature {
lhs: lhs.clone(),
rhs: Interval(MonthDayNano),
ret: lhs.clone(),
});
}
Minus if is_date_minus_date(lhs, rhs) => {
return Ok(Signature {
lhs: lhs.clone(),
Expand Down Expand Up @@ -362,6 +386,18 @@ fn is_date_minus_date(lhs: &DataType, rhs: &DataType) -> bool {
)
}

fn is_time(data_type: &DataType) -> bool {
matches!(data_type, DataType::Time32(_) | DataType::Time64(_))
}

fn is_interval(data_type: &DataType) -> bool {
matches!(data_type, DataType::Interval(_))
}

fn is_time_interval(lhs: &DataType, rhs: &DataType) -> bool {
(is_time(lhs) && is_interval(rhs)) || (is_interval(lhs) && is_time(rhs))
}

/// Coercion rules for mathematics operators between decimal and non-decimal types.
fn math_decimal_coercion(
lhs_type: &DataType,
Expand Down Expand Up @@ -1985,13 +2021,6 @@ fn temporal_math_coercion(
(Time32(_) | Time64(_), Time32(_) | Time64(_)) => {
Some((Interval(MonthDayNano), Interval(MonthDayNano)))
}
// time + interval -> Interval
(Time32(_) | Time64(_), Interval(_)) => {
Some((Interval(MonthDayNano), Interval(MonthDayNano)))
}
(Interval(_), Time32(_) | Time64(_)) => {
Some((Interval(MonthDayNano), Interval(MonthDayNano)))
}
// Interval * number => Interval
(
Interval(_),
Expand Down
114 changes: 114 additions & 0 deletions datafusion/physical-expr/src/expressions/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,116 @@ fn is_date_minus_date(lhs: &DataType, rhs: &DataType) -> bool {
)
}

/// Apply PostgreSQL-compatible `time +/- interval` arithmetic.
///
/// Arrow's kernels currently coerce these expressions through `Interval`,
/// which can produce values outside the 24-hour time-of-day domain. SQL `time`
/// values should remain times, wrapping around midnight.
fn apply_time_interval_arithmetic(
lhs: &ColumnarValue,
op: &Operator,
rhs: &ColumnarValue,
) -> Result<Option<ColumnarValue>> {
let (time, interval, subtract) = match (lhs.data_type(), op, rhs.data_type()) {
(
DataType::Time32(_) | DataType::Time64(_),
Operator::Plus,
DataType::Interval(IntervalUnit::MonthDayNano),
) => (lhs, rhs, false),
(
DataType::Time32(_) | DataType::Time64(_),
Operator::Minus,
DataType::Interval(IntervalUnit::MonthDayNano),
) => (lhs, rhs, true),
(
DataType::Interval(IntervalUnit::MonthDayNano),
Operator::Plus,
DataType::Time32(_) | DataType::Time64(_),
) => (rhs, lhs, false),
_ => return Ok(None),
};

let is_scalar = matches!(
(time, interval),
(ColumnarValue::Scalar(_), ColumnarValue::Scalar(_))
);

let arrays = ColumnarValue::values_to_arrays(&[time.clone(), interval.clone()])?;
let result = apply_time_interval_arrays(&arrays[0], &arrays[1], subtract)?;

if is_scalar {
Ok(Some(ColumnarValue::Scalar(ScalarValue::try_from_array(
result.as_ref(),
0,
)?)))
} else {
Ok(Some(ColumnarValue::Array(result)))
}
}

fn apply_time_interval_arrays(
time: &ArrayRef,
interval: &ArrayRef,
subtract: bool,
) -> Result<ArrayRef> {
macro_rules! apply_time_unit {
($time_type:ty, $native:ty, $units_per_day:expr, $nanos_per_unit:expr) => {{
let time = time.as_primitive::<$time_type>();
let interval = interval.as_primitive::<IntervalMonthDayNanoType>();
let result = (0..time.len())
.map(|i| {
if time.is_null(i) || interval.is_null(i) {
None
} else {
Some(wrap_time_value(
time.value(i) as i64,
interval.value(i),
subtract,
$units_per_day,
$nanos_per_unit,
) as $native)
}
})
.collect::<PrimitiveArray<$time_type>>();
Ok(Arc::new(result) as ArrayRef)
}};
}

match time.data_type() {
DataType::Time32(TimeUnit::Second) => {
apply_time_unit!(Time32SecondType, i32, 86_400, 1_000_000_000)
}
DataType::Time32(TimeUnit::Millisecond) => {
apply_time_unit!(Time32MillisecondType, i32, 86_400_000, 1_000_000)
}
DataType::Time64(TimeUnit::Microsecond) => {
apply_time_unit!(Time64MicrosecondType, i64, 86_400_000_000, 1_000)
}
DataType::Time64(TimeUnit::Nanosecond) => {
apply_time_unit!(Time64NanosecondType, i64, 86_400_000_000_000, 1)
}
other => internal_err!("Unexpected time data type for time arithmetic: {other}"),
}
}

fn wrap_time_value(
time_value: i64,
interval: IntervalMonthDayNano,
subtract: bool,
units_per_day: i64,
nanos_per_unit: i64,
) -> i64 {
// Month and day interval fields are whole-day-or-larger components, so
// they have no effect after wrapping to a 24-hour time-of-day value.
let nanos_per_unit = nanos_per_unit as i128;
let nanos_per_day = units_per_day as i128 * nanos_per_unit;
let delta = (interval.nanoseconds as i128).rem_euclid(nanos_per_day);
let signed_delta = if subtract { -delta } else { delta };

((time_value as i128 * nanos_per_unit + signed_delta).rem_euclid(nanos_per_day)
/ nanos_per_unit) as i64
}

/// Computes the difference between two dates and returns the result as Int64 (days)
/// This aligns with PostgreSQL, DuckDB, and MySQL behavior where date - date returns an integer
///
Expand Down Expand Up @@ -337,6 +447,10 @@ impl PhysicalExpr for BinaryExpr {
let schema = batch.schema();
let input_schema = schema.as_ref();

if let Some(result) = apply_time_interval_arithmetic(&lhs, self.op(), &rhs)? {
return Ok(result);
}

match self.op {
Operator::Plus if self.fail_on_overflow => return apply(&lhs, &rhs, add),
Operator::Plus => return apply(&lhs, &rhs, add_wrapping),
Expand Down
54 changes: 30 additions & 24 deletions datafusion/sqllogictest/test_files/datetime/arith_time_interval.slt
Original file line number Diff line number Diff line change
Expand Up @@ -3,68 +3,74 @@
# time + interval → time
# Add an interval to a time
# time '01:00' + interval '3 hours' → 04:00:00
#
# note that while the above reflects what postgresql does
# in the case of datafusion/arrow that is not the case. The
# result will be an interval, not a time.

query ?
query D
SELECT '01:00'::time + interval '3 hours'
----
4 hours
04:00:00

query T
SELECT arrow_typeof('01:00'::time + interval '3 hours')
----
Interval(MonthDayNano)
Time64(ns)

query ?
query D
SELECT '22:00'::time + interval '3 hours'
----
25 hours
01:00:00

query D
SELECT time '23:30' + interval '2 hours'
----
01:30:00

query ?
query D
SELECT interval '3 hours' + '22:00'::time
----
25 hours
01:00:00

query ?
query D
SELECT arrow_cast('22:00', 'Time32(Second)') + interval '3 hours'
----
25 hours
01:00:00

query ?
query D
SELECT arrow_cast('22:00', 'Time32(Millisecond)') + interval '3 hours'
----
25 hours
01:00:00

query ?
query D
SELECT arrow_cast('22:00', 'Time64(Microsecond)') + interval '3 hours'
----
25 hours
01:00:00

query ?
query D
SELECT arrow_cast('22:00', 'Time64(Nanosecond)') + interval '3 hours'
----
25 hours
01:00:00

query D
SELECT arrow_cast('00:00:00', 'Time32(Second)') - interval '500 milliseconds'
----
23:59:59

# postgresql behavior
#
# time - interval → time
# Subtract an interval from a time
# time '05:00' - interval '2 hours' → 03:00:00

query ?
query D
SELECT '05:00'::time - interval '2 hours'
----
3 hours
03:00:00

query T
SELECT arrow_typeof('05:00'::time - interval '2 hours')
----
Interval(MonthDayNano)
Time64(ns)

query ?
query D
SELECT '02:00'::time - interval '3 hours'
----
-1 hours
23:00:00