|
| 1 | +diff -ruN arrow-apache-arrow-17.0.0-after-paimon/cpp/src/parquet/arrow/schema.cc arrow-apache-arrow-17.0.0/cpp/src/parquet/arrow/schema.cc |
| 2 | +--- arrow-apache-arrow-17.0.0-after-paimon/cpp/src/parquet/arrow/schema.cc 2026-03-27 01:23:23.651831424 +0800 |
| 3 | ++++ arrow-apache-arrow-17.0.0/cpp/src/parquet/arrow/schema.cc 2026-03-27 01:28:36.855281965 +0800 |
| 4 | +@@ -178,7 +178,8 @@ |
| 5 | + |
| 6 | + // The user is explicitly asking for Impala int96 encoding, there is no |
| 7 | + // logical type. |
| 8 | +- if (arrow_properties.support_deprecated_int96_timestamps() && target_unit == ::arrow::TimeUnit::NANO) { |
| 9 | ++ if (arrow_properties.force_write_int96_timestamps() || |
| 10 | ++ (arrow_properties.support_deprecated_int96_timestamps() && target_unit == ::arrow::TimeUnit::NANO)) { |
| 11 | + *physical_type = ParquetType::INT96; |
| 12 | + return Status::OK(); |
| 13 | + } |
| 14 | +diff -ruN arrow-apache-arrow-17.0.0-after-paimon/cpp/src/parquet/properties.h arrow-apache-arrow-17.0.0/cpp/src/parquet/properties.h |
| 15 | +--- arrow-apache-arrow-17.0.0-after-paimon/cpp/src/parquet/properties.h 2026-03-27 01:23:23.643831362 +0800 |
| 16 | ++++ arrow-apache-arrow-17.0.0/cpp/src/parquet/properties.h 2026-03-27 01:27:47.717897537 +0800 |
| 17 | +@@ -980,6 +980,7 @@ |
| 18 | + public: |
| 19 | + Builder() |
| 20 | + : write_timestamps_as_int96_(false), |
| 21 | ++ force_write_int96_timestamps_(false), |
| 22 | + coerce_timestamps_enabled_(false), |
| 23 | + coerce_timestamps_unit_(::arrow::TimeUnit::SECOND), |
| 24 | + truncated_timestamps_allowed_(false), |
| 25 | +@@ -1005,6 +1006,21 @@ |
| 26 | + return this; |
| 27 | + } |
| 28 | + |
| 29 | ++ /// \brief Force writing legacy int96 timestamps. |
| 30 | ++ /// |
| 31 | ++ /// This bypasses unit-based guards and writes INT96 whenever timestamp |
| 32 | ++ /// metadata is resolved. |
| 33 | ++ Builder* enable_force_write_int96_timestamps() { |
| 34 | ++ force_write_int96_timestamps_ = true; |
| 35 | ++ return this; |
| 36 | ++ } |
| 37 | ++ |
| 38 | ++ /// \brief Disable forcing legacy int96 timestamps (default). |
| 39 | ++ Builder* disable_force_write_int96_timestamps() { |
| 40 | ++ force_write_int96_timestamps_ = false; |
| 41 | ++ return this; |
| 42 | ++ } |
| 43 | ++ |
| 44 | + /// \brief Coerce all timestamps to the specified time unit. |
| 45 | + /// \param unit time unit to truncate to. |
| 46 | + /// For Parquet versions 1.0 and 2.4, nanoseconds are casted to microseconds. |
| 47 | +@@ -1085,7 +1101,8 @@ |
| 48 | + /// Create the final properties. |
| 49 | + std::shared_ptr<ArrowWriterProperties> build() { |
| 50 | + return std::shared_ptr<ArrowWriterProperties>(new ArrowWriterProperties( |
| 51 | +- write_timestamps_as_int96_, coerce_timestamps_enabled_, coerce_timestamps_unit_, |
| 52 | ++ write_timestamps_as_int96_, force_write_int96_timestamps_, |
| 53 | ++ coerce_timestamps_enabled_, coerce_timestamps_unit_, |
| 54 | + truncated_timestamps_allowed_, store_schema_, compliant_nested_types_, |
| 55 | + engine_version_, use_threads_, executor_)); |
| 56 | + } |
| 57 | +@@ -1093,6 +1110,8 @@ |
| 58 | + private: |
| 59 | + bool write_timestamps_as_int96_; |
| 60 | + |
| 61 | ++ bool force_write_int96_timestamps_; |
| 62 | ++ |
| 63 | + bool coerce_timestamps_enabled_; |
| 64 | + ::arrow::TimeUnit::type coerce_timestamps_unit_; |
| 65 | + bool truncated_timestamps_allowed_; |
| 66 | +@@ -1107,6 +1126,8 @@ |
| 67 | + |
| 68 | + bool support_deprecated_int96_timestamps() const { return write_timestamps_as_int96_; } |
| 69 | + |
| 70 | ++ bool force_write_int96_timestamps() const { return force_write_int96_timestamps_; } |
| 71 | ++ |
| 72 | + bool coerce_timestamps_enabled() const { return coerce_timestamps_enabled_; } |
| 73 | + ::arrow::TimeUnit::type coerce_timestamps_unit() const { |
| 74 | + return coerce_timestamps_unit_; |
| 75 | +@@ -1138,6 +1159,7 @@ |
| 76 | + |
| 77 | + private: |
| 78 | + explicit ArrowWriterProperties(bool write_nanos_as_int96, |
| 79 | ++ bool force_write_int96_timestamps, |
| 80 | + bool coerce_timestamps_enabled, |
| 81 | + ::arrow::TimeUnit::type coerce_timestamps_unit, |
| 82 | + bool truncated_timestamps_allowed, bool store_schema, |
| 83 | +@@ -1145,6 +1167,7 @@ |
| 84 | + EngineVersion engine_version, bool use_threads, |
| 85 | + ::arrow::internal::Executor* executor) |
| 86 | + : write_timestamps_as_int96_(write_nanos_as_int96), |
| 87 | ++ force_write_int96_timestamps_(force_write_int96_timestamps), |
| 88 | + coerce_timestamps_enabled_(coerce_timestamps_enabled), |
| 89 | + coerce_timestamps_unit_(coerce_timestamps_unit), |
| 90 | + truncated_timestamps_allowed_(truncated_timestamps_allowed), |
| 91 | +@@ -1155,6 +1178,7 @@ |
| 92 | + executor_(executor) {} |
| 93 | + |
| 94 | + const bool write_timestamps_as_int96_; |
| 95 | ++ const bool force_write_int96_timestamps_; |
| 96 | + const bool coerce_timestamps_enabled_; |
| 97 | + const ::arrow::TimeUnit::type coerce_timestamps_unit_; |
| 98 | + const bool truncated_timestamps_allowed_; |
0 commit comments