From 59e5f366fe18c54f8d9e4f26742c02f6e7a9164a Mon Sep 17 00:00:00 2001 From: lani_karrot Date: Wed, 7 Jan 2026 19:24:49 +0900 Subject: [PATCH] fix: rejecting inf as value --- tdigest/include/tdigest_impl.hpp | 5 +++ tdigest/test/tdigest_test.cpp | 53 ++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/tdigest/include/tdigest_impl.hpp b/tdigest/include/tdigest_impl.hpp index b8fab38d..75f2d9ee 100644 --- a/tdigest/include/tdigest_impl.hpp +++ b/tdigest/include/tdigest_impl.hpp @@ -37,6 +37,7 @@ tdigest(false, k, std::numeric_limits::infinity(), -std::numeric_limits::i template void tdigest::update(T value) { if (std::isnan(value)) return; + if (std::isinf(value)) return; if (buffer_.size() == centroids_capacity_ * BUFFER_MULTIPLIER) compress(); buffer_.push_back(value); min_ = std::min(min_, value); @@ -94,6 +95,7 @@ template double tdigest::get_rank(T value) const { if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch"); if (std::isnan(value)) throw std::invalid_argument("operation is undefined for NaN"); + if (std::isinf(value)) throw std::invalid_argument("operation is undefined for infinity"); if (value < min_) return 0; if (value > max_) return 1; // one centroid and value == min_ == max_ @@ -621,6 +623,9 @@ void tdigest::check_split_points(const T* values, uint32_t size) { if (std::isnan(values[i])) { throw std::invalid_argument("Values must not be NaN"); } + if (std::isinf(values[i])) { + throw std::invalid_argument("Values must not be infinity"); + } if ((i < (size - 1)) && !(values[i] < values[i + 1])) { throw std::invalid_argument("Values must be unique and monotonically increasing"); } diff --git a/tdigest/test/tdigest_test.cpp b/tdigest/test/tdigest_test.cpp index 9f92094d..45c10822 100644 --- a/tdigest/test/tdigest_test.cpp +++ b/tdigest/test/tdigest_test.cpp @@ -470,4 +470,57 @@ TEST_CASE("iterate centroids", "[tdigest]") { REQUIRE(td.get_total_weight() == total_weight); } +TEST_CASE("update rejects positive infinity", "[tdigest]") { + tdigest_double td(100); + td.update(1.0); + td.update(2.0); + td.update(std::numeric_limits::infinity()); + REQUIRE(td.get_total_weight() == 2); + REQUIRE(td.get_max_value() == 2.0); +} + +TEST_CASE("update rejects negative infinity", "[tdigest]") { + tdigest_double td(100); + td.update(1.0); + td.update(2.0); + td.update(-std::numeric_limits::infinity()); + REQUIRE(td.get_total_weight() == 2); + REQUIRE(td.get_min_value() == 1.0); +} + +TEST_CASE("get_rank rejects positive infinity", "[tdigest]") { + tdigest_double td(100); + td.update(1.0); + td.update(2.0); + REQUIRE_THROWS_AS(td.get_rank(std::numeric_limits::infinity()), std::invalid_argument); +} + +TEST_CASE("get_rank rejects negative infinity", "[tdigest]") { + tdigest_double td(100); + td.update(1.0); + td.update(2.0); + REQUIRE_THROWS_AS(td.get_rank(-std::numeric_limits::infinity()), std::invalid_argument); +} + +TEST_CASE("get_CDF rejects positive infinity in split points", "[tdigest]") { + tdigest_double td(100); + for (int i = 0; i < 100; ++i) td.update(i); + const double split_points[2] = {50.0, std::numeric_limits::infinity()}; + REQUIRE_THROWS_AS(td.get_CDF(split_points, 2), std::invalid_argument); +} + +TEST_CASE("get_CDF rejects negative infinity in split points", "[tdigest]") { + tdigest_double td(100); + for (int i = 0; i < 100; ++i) td.update(i); + const double split_points[2] = {-std::numeric_limits::infinity(), 50.0}; + REQUIRE_THROWS_AS(td.get_CDF(split_points, 2), std::invalid_argument); +} + +TEST_CASE("get_PMF rejects infinity in split points", "[tdigest]") { + tdigest_double td(100); + for (int i = 0; i < 100; ++i) td.update(i); + const double split_points[1] = {std::numeric_limits::infinity()}; + REQUIRE_THROWS_AS(td.get_PMF(split_points, 1), std::invalid_argument); +} + } /* namespace datasketches */