From c9f32852f13a6e9cffebbbca8195481bc04a7ae8 Mon Sep 17 00:00:00 2001 From: xiedeyantu Date: Sun, 17 May 2026 16:46:41 +0800 Subject: [PATCH 1/2] fix generate_series table function overflows Co-authored-by: Copilot --- .../functions-table/src/generate_series.rs | 48 +++++++++++++------ .../test_files/table_functions.slt | 8 ++++ 2 files changed, 41 insertions(+), 15 deletions(-) diff --git a/datafusion/functions-table/src/generate_series.rs b/datafusion/functions-table/src/generate_series.rs index 175a6b3bff06c..d809b7af820ac 100644 --- a/datafusion/functions-table/src/generate_series.rs +++ b/datafusion/functions-table/src/generate_series.rs @@ -77,7 +77,7 @@ pub trait SeriesValue: fmt::Debug + Clone + Send + Sync + 'static { fn should_stop(&self, end: Self, step: &Self::StepType, include_end: bool) -> bool; /// Advance to the next value in the series - fn advance(&mut self, step: &Self::StepType) -> Result<()>; + fn advance(&mut self, end: &mut Self, step: &Self::StepType) -> Result<()>; /// Create an Arrow array from a vector of values fn create_array(&self, values: Vec) -> Result; @@ -97,8 +97,16 @@ impl SeriesValue for i64 { reach_end_int64(*self, end, *step, include_end) } - fn advance(&mut self, step: &Self::StepType) -> Result<()> { - *self += step; + fn advance(&mut self, end: &mut Self, step: &Self::StepType) -> Result<()> { + if let Some(next) = self.checked_add(*step) { + *self = next; + } else { + *end = if *step > 0 { + self.saturating_sub(1) + } else { + self.saturating_add(1) + }; + } Ok(()) } @@ -152,7 +160,7 @@ impl SeriesValue for TimestampValue { } } - fn advance(&mut self, step: &Self::StepType) -> Result<()> { + fn advance(&mut self, _end: &mut Self, step: &Self::StepType) -> Result<()> { let tz = self .parsed_tz .unwrap_or_else(|| Tz::from_str("+00:00").unwrap()); @@ -250,16 +258,18 @@ impl GenerateSeriesTable { step, include_end, name, - } => Arc::new(RwLock::new(GenericSeriesState { - schema: self.schema(), - start: *start, - end: *end, - step: *step, - current: *start, - batch_size, - include_end: *include_end, - name, - })), + } => { + Arc::new(RwLock::new(GenericSeriesState { + schema: self.schema(), + start: *start, + end: *end, + step: *step, + current: *start, + batch_size, + include_end: *include_end, + name, + })) + } GenSeriesArgs::TimestampArgs { start, end, @@ -391,7 +401,15 @@ impl LazyBatchGenerator for GenericSeriesState { .should_stop(self.end.clone(), &self.step, self.include_end) { buf.push(self.current.to_value_type()); - self.current.advance(&self.step)?; + if self + .current + .should_stop(self.end.clone(), &self.step, false) + { + self.current.advance(&mut self.end, &self.step)?; + break; + } + + self.current.advance(&mut self.end, &self.step)?; } if buf.is_empty() { diff --git a/datafusion/sqllogictest/test_files/table_functions.slt b/datafusion/sqllogictest/test_files/table_functions.slt index 3d654c4195feb..47417cc25a522 100644 --- a/datafusion/sqllogictest/test_files/table_functions.slt +++ b/datafusion/sqllogictest/test_files/table_functions.slt @@ -183,6 +183,14 @@ SELECT * FROM generate_series(1, 2, 3, 4) statement error DataFusion error: Error during planning: Argument \#1 must be an INTEGER, TIMESTAMP, DATE or NULL, got Utf8 SELECT * FROM generate_series('foo', 'bar') +# Regression test: generate_series with a step that would overflow i64 after the last +# included value must return the reachable values rather than an error, matching +# PostgreSQL/DuckDB behavior. +query I +SELECT * FROM generate_series(9223372036854775806, 9223372036854775807, 2) +---- +9223372036854775806 + # UDF and UDTF `generate_series` can be used simultaneously query ? rowsort SELECT generate_series(1, t1.end) FROM generate_series(3, 5) as t1(end) From d27cd10096e81d02eedfaa6f1101f18613005924 Mon Sep 17 00:00:00 2001 From: xiedeyantu Date: Sun, 17 May 2026 17:00:54 +0800 Subject: [PATCH 2/2] fmt Co-authored-by: Copilot --- .../functions-table/src/generate_series.rs | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/datafusion/functions-table/src/generate_series.rs b/datafusion/functions-table/src/generate_series.rs index d809b7af820ac..184a8ff394bd1 100644 --- a/datafusion/functions-table/src/generate_series.rs +++ b/datafusion/functions-table/src/generate_series.rs @@ -258,18 +258,16 @@ impl GenerateSeriesTable { step, include_end, name, - } => { - Arc::new(RwLock::new(GenericSeriesState { - schema: self.schema(), - start: *start, - end: *end, - step: *step, - current: *start, - batch_size, - include_end: *include_end, - name, - })) - } + } => Arc::new(RwLock::new(GenericSeriesState { + schema: self.schema(), + start: *start, + end: *end, + step: *step, + current: *start, + batch_size, + include_end: *include_end, + name, + })), GenSeriesArgs::TimestampArgs { start, end,