From b423c80a2ec99cab74dfc627b9c81486c865dcb7 Mon Sep 17 00:00:00 2001 From: "William T. Nelson" <35801+wtn@users.noreply.github.com> Date: Thu, 23 Apr 2026 20:12:54 -0500 Subject: [PATCH] Added support for non-UTC time zones in datetime conversion - #130 Assisted-by: Claude --- CHANGELOG.md | 1 + Cargo.lock | 1 + ext/polars/Cargo.toml | 1 + ext/polars/src/conversion/any_value.rs | 4 +- ext/polars/src/conversion/chunked_array.rs | 21 +++----- ext/polars/src/conversion/datetime.rs | 59 +++++++++++++++++++--- lib/polars.rb | 1 + lib/polars/utils/convert.rb | 14 ++--- polars-df.gemspec | 1 + test/data_frame_test.rb | 9 ++++ test/types_test.rb | 20 ++++++-- 11 files changed, 100 insertions(+), 32 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 825c469a30..f175ada4e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ ## 0.25.2 (unreleased) - Fixed `unpivot` method when `on` option is `nil` +- Added support for non-UTC time zones when converting datetime values to Ruby ## 0.25.1 (2026-03-23) diff --git a/Cargo.lock b/Cargo.lock index d7c7aca602..063f3d552a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2165,6 +2165,7 @@ dependencies = [ "ahash", "bytes", "chrono", + "chrono-tz", "either", "magnus", "mimalloc", diff --git a/ext/polars/Cargo.toml b/ext/polars/Cargo.toml index 401392adb5..29d9c36ed8 100644 --- a/ext/polars/Cargo.toml +++ b/ext/polars/Cargo.toml @@ -16,6 +16,7 @@ ahash = "0.8" arrow = { package = "polars-arrow", version = "=0.53.0" } bytes = "1" chrono = "0.4" +chrono-tz = "0.10" either = "1.8" magnus = { version = "0.8", features = ["chrono"] } num-traits = "0.2" diff --git a/ext/polars/src/conversion/any_value.rs b/ext/polars/src/conversion/any_value.rs index 115038f477..95910621f4 100644 --- a/ext/polars/src/conversion/any_value.rs +++ b/ext/polars/src/conversion/any_value.rs @@ -55,10 +55,10 @@ pub(crate) fn any_value_into_rb_object(av: AnyValue, ruby: &Ruby) -> Value { }, AnyValue::Date(v) => pl_utils(ruby).funcall("_to_ruby_date", (v,)).unwrap(), AnyValue::Datetime(v, time_unit, time_zone) => { - datetime_to_rb_object(v, time_unit, time_zone) + datetime_to_rb_object(v, time_unit, time_zone).unwrap() } AnyValue::DatetimeOwned(v, time_unit, time_zone) => { - datetime_to_rb_object(v, time_unit, time_zone.as_ref().map(AsRef::as_ref)) + datetime_to_rb_object(v, time_unit, time_zone.as_ref().map(AsRef::as_ref)).unwrap() } AnyValue::Duration(v, time_unit) => { let time_unit = time_unit.to_ascii(); diff --git a/ext/polars/src/conversion/chunked_array.rs b/ext/polars/src/conversion/chunked_array.rs index 3720b256a1..5f94a43446 100644 --- a/ext/polars/src/conversion/chunked_array.rs +++ b/ext/polars/src/conversion/chunked_array.rs @@ -2,6 +2,7 @@ use magnus::{IntoValue, RString, Ruby, TryConvert, Value, prelude::*}; use polars::prelude::*; use polars_compute::decimal::DecimalFmtBuffer; +use super::datetime::datetime_to_rb_object; use super::{Wrap, get_rbseq, struct_dict}; use crate::RbResult; @@ -89,20 +90,12 @@ impl IntoValue for Wrap<&DurationChunked> { impl IntoValue for Wrap<&DatetimeChunked> { fn into_value_with(self, ruby: &Ruby) -> Value { - let utils = pl_utils(ruby); - let time_unit = Wrap(self.0.time_unit()).into_value_with(ruby); - let time_zone = self - .0 - .time_zone() - .as_deref() - .map(|v| v.into_value_with(ruby)); - let iter = self.0.physical().into_iter().map(|opt_v| { - opt_v.map(|v| { - utils - .funcall::<_, _, Value>("_to_ruby_datetime", (v, time_unit, time_zone)) - .unwrap() - }) - }); + let time_zone = self.0.time_zone().as_ref(); + let time_unit = self.0.time_unit(); + let iter = + self.0.physical().iter().map(|opt_v| { + opt_v.map(|v| datetime_to_rb_object(v, time_unit, time_zone).unwrap()) + }); ruby.ary_from_iter(iter).as_value() } } diff --git a/ext/polars/src/conversion/datetime.rs b/ext/polars/src/conversion/datetime.rs index 05ec4c0ef5..ce1afe8eba 100644 --- a/ext/polars/src/conversion/datetime.rs +++ b/ext/polars/src/conversion/datetime.rs @@ -1,12 +1,59 @@ -use magnus::{Ruby, Value, prelude::*}; +//! Utilities for converting dates, times, datetimes, and so on. + +use std::str::FromStr; + +use chrono::{DateTime, Datelike, FixedOffset, NaiveDateTime, TimeDelta, TimeZone as _}; +use chrono_tz::Tz; +use magnus::{IntoValue, Ruby, Value, prelude::*}; use polars::prelude::*; use crate::rb_modules::pl_utils; +use crate::{RbPolarsErr, RbResult}; + +pub fn elapsed_offset_to_timedelta(elapsed: i64, time_unit: TimeUnit) -> TimeDelta { + let (in_second, nano_multiplier) = match time_unit { + TimeUnit::Nanoseconds => (1_000_000_000, 1), + TimeUnit::Microseconds => (1_000_000, 1_000), + TimeUnit::Milliseconds => (1_000, 1_000_000), + }; + let mut elapsed_sec = elapsed / in_second; + let mut elapsed_nanos = nano_multiplier * (elapsed % in_second); + if elapsed_nanos < 0 { + // TimeDelta expects nanos to always be positive. + elapsed_sec -= 1; + elapsed_nanos += 1_000_000_000; + } + TimeDelta::new(elapsed_sec, elapsed_nanos as u32).unwrap() +} + +/// Convert time-units-since-epoch to a more structured object. +pub fn timestamp_to_naive_datetime(since_epoch: i64, time_unit: TimeUnit) -> NaiveDateTime { + DateTime::UNIX_EPOCH.naive_utc() + elapsed_offset_to_timedelta(since_epoch, time_unit) +} -pub fn datetime_to_rb_object(v: i64, tu: TimeUnit, tz: Option<&TimeZone>) -> Value { +pub fn datetime_to_rb_object(v: i64, tu: TimeUnit, tz: Option<&TimeZone>) -> RbResult { let ruby = Ruby::get().unwrap(); - let tu = tu.to_ascii(); - pl_utils(&ruby) - .funcall("_to_ruby_datetime", (v, tu, tz.map(|v| v.to_string()))) - .unwrap() + if let Some(time_zone) = tz { + if let Ok(tz) = Tz::from_str(time_zone) { + let utc_datetime = DateTime::UNIX_EPOCH + elapsed_offset_to_timedelta(v, tu); + if utc_datetime.year() >= 2100 { + // chrono-tz does not support dates after 2100 + // https://github.com/chronotope/chrono-tz/issues/135 + pl_utils(&ruby).funcall("_to_ruby_datetime", (v, tu.to_ascii(), time_zone.as_str())) + } else { + let datetime = utc_datetime.with_timezone(&tz); + Ok(datetime.fixed_offset().into_value_with(&ruby)) + } + } else if let Ok(tz) = FixedOffset::from_str(time_zone) { + let naive_datetime = timestamp_to_naive_datetime(v, tu); + let datetime = tz.from_utc_datetime(&naive_datetime); + Ok(datetime.into_value_with(&ruby)) + } else { + Err(RbPolarsErr::Other(format!("Could not parse timezone: {time_zone}")).into()) + } + } else { + Ok(timestamp_to_naive_datetime(v, tu) + .and_utc() + .into_value_with(&ruby)) + } } diff --git a/lib/polars.rb b/lib/polars.rb index 7bc5db878b..26db32b9b1 100644 --- a/lib/polars.rb +++ b/lib/polars.rb @@ -9,6 +9,7 @@ require "bigdecimal" require "date" require "stringio" +require "tzinfo" # modules require_relative "polars/expr_dispatch" diff --git a/lib/polars/utils/convert.rb b/lib/polars/utils/convert.rb index bb22ef6efa..df819a9e6a 100644 --- a/lib/polars/utils/convert.rb +++ b/lib/polars/utils/convert.rb @@ -63,19 +63,19 @@ def self._to_ruby_time(value) end def self._to_ruby_datetime(value, time_unit = "ns", time_zone = nil) - if time_zone.nil? || time_zone == "" || time_zone == "UTC" + utc_time = if time_unit == "ns" - ::Time.at(value / 1000000000, value % 1000000000, :nsec).utc + ::Time.at(value / 1_000_000_000, value % 1_000_000_000, :nsec).utc elsif time_unit == "us" - ::Time.at(value / 1000000, value % 1000000, :usec).utc + ::Time.at(value / 1_000_000, value % 1_000_000, :usec).utc elsif time_unit == "ms" - ::Time.at(value / 1000, value % 1000, :millisecond).utc + ::Time.at(value / 1_000, value % 1_000, :millisecond).utc else raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}" end - else - raise Todo - end + return utc_time if time_zone.nil? + + utc_time.getlocal(::TZInfo::Timezone.get(time_zone)) end def self._to_ruby_duration(value, time_unit = "ns") diff --git a/polars-df.gemspec b/polars-df.gemspec index 648fef5db4..235066ac45 100644 --- a/polars-df.gemspec +++ b/polars-df.gemspec @@ -18,4 +18,5 @@ Gem::Specification.new do |spec| spec.add_dependency "bigdecimal" spec.add_dependency "rb_sys" + spec.add_dependency "tzinfo", "~> 2.0" end diff --git a/test/data_frame_test.rb b/test/data_frame_test.rb index 7c0b12f0a7..570fb43ee0 100644 --- a/test/data_frame_test.rb +++ b/test/data_frame_test.rb @@ -364,6 +364,15 @@ def test_describe assert df.describe end + def test_describe_tz_aware_datetime + df = Polars::DataFrame.new({ + "a" => [1, 2, 3], + "ts" => Polars::Series.new("ts", [Time.utc(2026, 1, 1), Time.utc(2026, 1, 2), Time.utc(2026, 1, 3)]) + .cast(Polars::Datetime.new("ms", "Europe/London")) + }) + assert df.describe + end + def test_sort df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) assert_frame ({"a" => [1, 3, 2], "b" => ["one", "three", "two"]}), df.sort("b") diff --git a/test/types_test.rb b/test/types_test.rb index 342409531d..9d0c2595bc 100644 --- a/test/types_test.rb +++ b/test/types_test.rb @@ -122,9 +122,23 @@ def test_series_dtype_datetime_time_unit def test_series_dtype_datetime_time_zone s = Polars::Series.new([Time.utc(2020, 1, 1)], dtype: Polars::Datetime.new("us", "Europe/Amsterdam")) - assert_equal Polars::Datetime.new("us", "Europe/Amsterdam"), s.dtype - # TODO fix - # assert_series [Time.utc(2020, 1, 1)], s, dtype: Polars::Datetime.new("us", "Europe/Amsterdam") + assert_series [Time.utc(2020, 1, 1)], s, dtype: Polars::Datetime.new("us", "Europe/Amsterdam") + end + + def test_series_dtype_datetime_time_zone_dst + s = Polars::Series.new([Time.utc(2024, 7, 1)], dtype: Polars::Datetime.new("us", "Europe/London")) + assert_series [Time.utc(2024, 7, 1)], s, dtype: Polars::Datetime.new("us", "Europe/London") + end + + def test_series_dtype_datetime_fixed_offset + s = Polars::Series.new([Time.utc(2024, 1, 1)], dtype: Polars::Datetime.new("us", "+09:00")) + assert_series [Time.utc(2024, 1, 1)], s, dtype: Polars::Datetime.new("us", "Etc/GMT-9") + end + + # chrono-tz has no data past 2100; falls back to Ruby-side TZInfo. + def test_series_dtype_datetime_time_zone_post_2100 + s = Polars::Series.new([Time.utc(2150, 1, 1)], dtype: Polars::Datetime.new("us", "America/New_York")) + assert_series [Time.utc(2150, 1, 1)], s, dtype: Polars::Datetime.new("us", "America/New_York") end def test_series_dtype_duration