From f27bf1f334f077a222520628d7679ab6b23582b1 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Thu, 24 Apr 2025 13:40:23 +0100 Subject: [PATCH 1/3] NAN, -NAN, -Inf and Inf cannot be min/max values of a primitive array There's a annoying side problem here that min == max might still mean that array is non constant but I think it's better than min/max being useless. We might need a function to ask if there's any indefinite values in the array --- encodings/alp/src/alp/compute/compare.rs | 4 +- .../src/arrays/primitive/compute/min_max.rs | 38 ++++++++++++++++++- vortex-dtype/src/ptype.rs | 14 +++++++ 3 files changed, 52 insertions(+), 4 deletions(-) diff --git a/encodings/alp/src/alp/compute/compare.rs b/encodings/alp/src/alp/compute/compare.rs index c0346ae9487..920edf7138d 100644 --- a/encodings/alp/src/alp/compute/compare.rs +++ b/encodings/alp/src/alp/compute/compare.rs @@ -78,7 +78,7 @@ where Operator::Gt | Operator::Gte => { // Per IEEE 754 totalOrder semantics the ordering is -Nan < -Inf < Inf < Nan. // All values in the encoded array are definitely finite - let is_not_finite = value.is_infinite() || NativePType::is_nan(value); + let is_not_finite = NativePType::is_infinite(value) || NativePType::is_nan(value); if is_not_finite { Ok(Some( ConstantArray::new(value.is_sign_negative(), alp.len()).into_array(), @@ -97,7 +97,7 @@ where Operator::Lt | Operator::Lte => { // Per IEEE 754 totalOrder semantics the ordering is -Nan < -Inf < Inf < Nan. // All values in the encoded array are definitely finite - let is_not_finite = value.is_infinite() || NativePType::is_nan(value); + let is_not_finite = NativePType::is_infinite(value) || NativePType::is_nan(value); if is_not_finite { Ok(Some( ConstantArray::new(value.is_sign_positive(), alp.len()).into_array(), diff --git a/vortex-array/src/arrays/primitive/compute/min_max.rs b/vortex-array/src/arrays/primitive/compute/min_max.rs index cdd541ab728..a412f9e1bc8 100644 --- a/vortex-array/src/arrays/primitive/compute/min_max.rs +++ b/vortex-array/src/arrays/primitive/compute/min_max.rs @@ -38,10 +38,13 @@ where fn compute_min_max<'a, T>(iter: impl Iterator, dtype: &DType) -> Option where - T: Into + NativePType + Copy, + T: Into + NativePType, { // this `compare` function provides a total ordering (even for NaN values) - match iter.minmax_by(|a, b| a.total_compare(**b)) { + match iter + .filter(|v| !v.is_nan() && !v.is_infinite()) + .minmax_by(|a, b| a.total_compare(**b)) + { itertools::MinMaxResult::NoElements => None, itertools::MinMaxResult::OneElement(&x) => { let scalar = Scalar::new(dtype.clone(), x.into()); @@ -56,3 +59,34 @@ where }), } } + +#[cfg(test)] +mod tests { + use vortex_buffer::buffer; + + use crate::arrays::PrimitiveArray; + use crate::compute::min_max; + use crate::validity::Validity; + + #[test] + fn min_max_nan() { + let array = PrimitiveArray::new( + buffer![f32::NAN, -1.0 / 0.0, -1.0, 1.0], + Validity::NonNullable, + ); + let min_max = min_max(&array).unwrap().unwrap(); + assert_eq!(f32::try_from(min_max.min).unwrap(), -1.0); + assert_eq!(f32::try_from(min_max.max).unwrap(), 1.0); + } + + #[test] + fn min_max_inf() { + let array = PrimitiveArray::new( + buffer![f32::INFINITY, f32::NEG_INFINITY, -1.0, 1.0], + Validity::NonNullable, + ); + let min_max = min_max(&array).unwrap().unwrap(); + assert_eq!(f32::try_from(min_max.min).unwrap(), -1.0); + assert_eq!(f32::try_from(min_max.max).unwrap(), 1.0); + } +} diff --git a/vortex-dtype/src/ptype.rs b/vortex-dtype/src/ptype.rs index b6e5af4df57..e74cc809b4d 100644 --- a/vortex-dtype/src/ptype.rs +++ b/vortex-dtype/src/ptype.rs @@ -80,6 +80,10 @@ pub trait NativePType: /// For integer types, this is always `false` fn is_nan(self) -> bool; + /// Whether this instance (`self`) is Infinite + /// For integer types, this is always `false` + fn is_infinite(self) -> bool; + /// Compare another instance of this type to `self`, providing a total ordering fn total_compare(self, other: Self) -> Ordering; @@ -121,6 +125,11 @@ macro_rules! native_ptype { false } + #[inline] + fn is_infinite(self) -> bool { + false + } + #[inline] fn total_compare(self, other: Self) -> Ordering { self.cmp(&other) @@ -144,6 +153,11 @@ macro_rules! native_float_ptype { <$T>::is_nan(self) } + #[inline] + fn is_infinite(self) -> bool { + <$T>::is_infinite(self) + } + #[inline] fn total_compare(self, other: Self) -> Ordering { self.total_cmp(&other) From b69dc104db89372df2b99c31a5841174a5293b31 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Fri, 25 Apr 2025 08:11:42 +0100 Subject: [PATCH 2/3] just nan --- vortex-array/src/arrays/primitive/compute/min_max.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vortex-array/src/arrays/primitive/compute/min_max.rs b/vortex-array/src/arrays/primitive/compute/min_max.rs index a412f9e1bc8..befad724424 100644 --- a/vortex-array/src/arrays/primitive/compute/min_max.rs +++ b/vortex-array/src/arrays/primitive/compute/min_max.rs @@ -40,9 +40,10 @@ fn compute_min_max<'a, T>(iter: impl Iterator, dtype: &DType) -> O where T: Into + NativePType, { - // this `compare` function provides a total ordering (even for NaN values) + // `total_compare` function provides a total ordering (even for NaN values). + // However, we exclude NaNs from min max as they're not useful for any purpose where min/max would be used match iter - .filter(|v| !v.is_nan() && !v.is_infinite()) + .filter(|v| !v.is_nan()) .minmax_by(|a, b| a.total_compare(**b)) { itertools::MinMaxResult::NoElements => None, From 7ff17d63292676b2d7acb61bdd9216e69a929b4e Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Fri, 25 Apr 2025 08:40:14 +0100 Subject: [PATCH 3/3] tests --- vortex-array/src/arrays/primitive/compute/min_max.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vortex-array/src/arrays/primitive/compute/min_max.rs b/vortex-array/src/arrays/primitive/compute/min_max.rs index befad724424..1f28a364be2 100644 --- a/vortex-array/src/arrays/primitive/compute/min_max.rs +++ b/vortex-array/src/arrays/primitive/compute/min_max.rs @@ -72,7 +72,7 @@ mod tests { #[test] fn min_max_nan() { let array = PrimitiveArray::new( - buffer![f32::NAN, -1.0 / 0.0, -1.0, 1.0], + buffer![f32::NAN, -f32::NAN, -1.0, 1.0], Validity::NonNullable, ); let min_max = min_max(&array).unwrap().unwrap(); @@ -87,7 +87,7 @@ mod tests { Validity::NonNullable, ); let min_max = min_max(&array).unwrap().unwrap(); - assert_eq!(f32::try_from(min_max.min).unwrap(), -1.0); - assert_eq!(f32::try_from(min_max.max).unwrap(), 1.0); + assert_eq!(f32::try_from(min_max.min).unwrap(), f32::NEG_INFINITY); + assert_eq!(f32::try_from(min_max.max).unwrap(), f32::INFINITY); } }