From def1b4b681fa8782310f79362a841e7406af9b4f Mon Sep 17 00:00:00 2001 From: Carlo Maria Proietti Date: Fri, 19 Dec 2025 15:27:08 +0100 Subject: [PATCH 1/8] fixed PR --- .../aggregation/aggregators/Aggregator.kt | 19 +++++++++++ .../AggregatorAggregationHandler.kt | 30 ++++++++++++++-- .../aggregation/aggregators/Aggregators.kt | 7 +++- .../dataframe/impl/columns/ValueColumnImpl.kt | 34 ++++++++++++++++++- 4 files changed, 85 insertions(+), 5 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator.kt index a7cb2a8b14..a8cf1922ef 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator.kt @@ -8,6 +8,7 @@ import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.inputHandler import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.multipleColumnsHandlers.FlatteningMultipleColumnsHandler import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.multipleColumnsHandlers.NoMultipleColumnsHandler import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.multipleColumnsHandlers.TwoStepMultipleColumnsHandler +import org.jetbrains.kotlinx.dataframe.impl.columns.ParameterValue import kotlin.reflect.KType import kotlin.reflect.full.withNullability @@ -42,6 +43,7 @@ public class Aggregator( public val inputHandler: AggregatorInputHandler, public val multipleColumnsHandler: AggregatorMultipleColumnsHandler, public val name: String, + public val statisticsParameters: Map, ) : AggregatorInputHandler by inputHandler, AggregatorMultipleColumnsHandler by multipleColumnsHandler, AggregatorAggregationHandler by aggregationHandler { @@ -75,6 +77,7 @@ public class Aggregator( aggregationHandler: AggregatorAggregationHandler, inputHandler: AggregatorInputHandler, multipleColumnsHandler: AggregatorMultipleColumnsHandler, + statisticsParameters: Map, ): AggregatorProvider = AggregatorProvider { name -> Aggregator( @@ -82,6 +85,22 @@ public class Aggregator( inputHandler = inputHandler, multipleColumnsHandler = multipleColumnsHandler, name = name, + statisticsParameters = statisticsParameters, + ) + } + + internal operator fun invoke( + aggregationHandler: AggregatorAggregationHandler, + inputHandler: AggregatorInputHandler, + multipleColumnsHandler: AggregatorMultipleColumnsHandler, + ): AggregatorProvider = + AggregatorProvider { name -> + Aggregator( + aggregationHandler = aggregationHandler, + inputHandler = inputHandler, + multipleColumnsHandler = multipleColumnsHandler, + name = name, + emptyMap(), ) } } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt index 7b1b0357eb..a7f65152c3 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt @@ -3,6 +3,9 @@ package org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.api.asSequence import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.aggregationHandlers.SelectingAggregationHandler +import org.jetbrains.kotlinx.dataframe.impl.columns.ParameterValue +import org.jetbrains.kotlinx.dataframe.impl.columns.StatisticResult +import org.jetbrains.kotlinx.dataframe.impl.columns.ValueColumnInternal import kotlin.reflect.KType /** @@ -26,13 +29,34 @@ public interface AggregatorAggregationHandler /** * Aggregates the data in the given column and computes a single resulting value. - * Calls [aggregateSequence]. + * Calls [aggregateSequence]. It tries to exploit a cache for statistics which is proper of + * [ValueColumnInternal] */ - public fun aggregateSingleColumn(column: DataColumn): Return = - aggregateSequence( + public fun aggregateSingleColumn(column: DataColumn): Return { + if (column is ValueColumnInternal<*>) { + // cache check, cache is dynamically created + val aggregator = this.aggregator ?: throw IllegalStateException("Aggregator is required") + val desiredStatisticNotConsideringParameters = column.statistics.getOrPut(aggregator.name) { + mutableMapOf, StatisticResult>() + } + // can't compare maps whose Values are Any? -> ParameterValue instead + val desiredStatistic = desiredStatisticNotConsideringParameters[aggregator.statisticsParameters] + // if desiredStatistic is null, statistic was never calculated + if (desiredStatistic != null) { + return desiredStatistic.value as Return + } + val statistic = aggregateSequence( + values = column.asSequence(), + valueType = column.type().toValueType(), + ) + desiredStatisticNotConsideringParameters[aggregator.statisticsParameters] = StatisticResult(statistic) + return aggregateSingleColumn(column) + } + return aggregateSequence( values = column.asSequence(), valueType = column.type().toValueType(), ) + } /** * Function that can give the return type of [aggregateSequence] as [KType], given the type of the input. diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt index 9648fed3ad..25660dd3d1 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt @@ -8,6 +8,7 @@ import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.inputHandler import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.inputHandlers.NumberInputHandler import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.multipleColumnsHandlers.FlatteningMultipleColumnsHandler import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.multipleColumnsHandlers.TwoStepMultipleColumnsHandler +import org.jetbrains.kotlinx.dataframe.impl.columns.ParameterValue import org.jetbrains.kotlinx.dataframe.math.indexOfMax import org.jetbrains.kotlinx.dataframe.math.indexOfMedian import org.jetbrains.kotlinx.dataframe.math.indexOfMin @@ -35,10 +36,12 @@ public object Aggregators { getReturnType: CalculateReturnType, indexOfResult: IndexOfResult, stepOneSelector: Selector, + statisticsParameters: Map, ) = Aggregator( aggregationHandler = SelectingAggregationHandler(stepOneSelector, indexOfResult, getReturnType), inputHandler = AnyInputHandler(), multipleColumnsHandler = TwoStepMultipleColumnsHandler(), + statisticsParameters = statisticsParameters, ) private fun flattenHybridForAny( @@ -117,8 +120,9 @@ public object Aggregators { by withOneOption { skipNaN: Boolean -> twoStepSelectingForAny, Comparable?>( getReturnType = minTypeConversion, - stepOneSelector = { type -> minOrNull(type, skipNaN) }, indexOfResult = { type -> indexOfMin(type, skipNaN) }, + stepOneSelector = { type -> minOrNull(type, skipNaN) }, + statisticsParameters = mapOf(Pair("skipNaN", ParameterValue(skipNaN))), ) } @@ -132,6 +136,7 @@ public object Aggregators { getReturnType = maxTypeConversion, stepOneSelector = { type -> maxOrNull(type, skipNaN) }, indexOfResult = { type -> indexOfMax(type, skipNaN) }, + statisticsParameters = mapOf(Pair("skipNaN", ParameterValue(skipNaN))), ) } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt index f758360d1f..071801a5d8 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt @@ -8,6 +8,32 @@ import org.jetbrains.kotlinx.dataframe.columns.ValueColumn import kotlin.reflect.KType import kotlin.reflect.full.withNullability +@JvmInline +internal value class StatisticResult(val value: Any?) + +public class ParameterValue(public val parameter: Any?) { + + override fun equals(other: Any?): Boolean { + val otherAsParameterValue = other as ParameterValue? + val that = otherAsParameterValue?.parameter + if (parameter is Boolean && that is Boolean) { + return this.parameter == that + } + return super.equals(other) + } + + override fun hashCode(): Int { + if (parameter is Boolean?) { + return this.parameter.hashCode() + } + return super.hashCode() + } +} + +internal interface ValueColumnInternal : ValueColumn { + val statistics: MutableMap, StatisticResult>> +} + internal open class ValueColumnImpl( values: List, name: String, @@ -15,7 +41,8 @@ internal open class ValueColumnImpl( val defaultValue: T? = null, distinct: Lazy>? = null, ) : DataColumnImpl(values, name, type, distinct), - ValueColumn { + ValueColumn, + ValueColumnInternal { override fun distinct() = ValueColumnImpl(toSet().toList(), name, type, defaultValue, distinct) @@ -48,10 +75,13 @@ internal open class ValueColumnImpl( override fun defaultValue() = defaultValue override fun forceResolve() = ResolvingValueColumn(this) + + override val statistics = mutableMapOf, StatisticResult>>() } internal class ResolvingValueColumn(override val source: ValueColumn) : ValueColumn by source, + ValueColumnInternal, ForceResolvedColumn { override fun resolve(context: ColumnResolutionContext) = super.resolve(context) @@ -70,4 +100,6 @@ internal class ResolvingValueColumn(override val source: ValueColumn) : override fun equals(other: Any?) = source.checkEquals(other) override fun hashCode(): Int = source.hashCode() + + override val statistics = mutableMapOf, StatisticResult>>() } From b3b1f8255304c46543d6c901ae01fe6a5ada3b9a Mon Sep 17 00:00:00 2001 From: Carlo Maria Proietti Date: Sun, 28 Dec 2025 20:22:43 +0100 Subject: [PATCH 2/8] each statistic is now exploiting dynamically allocated cache --- .../aggregation/aggregators/Aggregators.kt | 46 +++++++++++++++---- .../dataframe/impl/columns/ValueColumnImpl.kt | 14 +++++- 2 files changed, 50 insertions(+), 10 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt index 25660dd3d1..de39e3bfb9 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt @@ -48,10 +48,12 @@ public object Aggregators { getReturnType: CalculateReturnType, indexOfResult: IndexOfResult, reducer: Reducer, + statisticsParameters: Map, ) = Aggregator( aggregationHandler = HybridAggregationHandler(reducer, indexOfResult, getReturnType), inputHandler = AnyInputHandler(), multipleColumnsHandler = FlatteningMultipleColumnsHandler(), + statisticsParameters = statisticsParameters, ) private fun twoStepReducingForAny( @@ -86,20 +88,24 @@ public object Aggregators { private fun flattenReducingForNumbers( getReturnType: CalculateReturnType, + statisticsParameters: Map, reducer: Reducer, ) = Aggregator( aggregationHandler = ReducingAggregationHandler(reducer, getReturnType), inputHandler = NumberInputHandler(), multipleColumnsHandler = FlatteningMultipleColumnsHandler(), + statisticsParameters = statisticsParameters, ) private fun twoStepReducingForNumbers( getReturnType: CalculateReturnType, + statisticsParameters: Map, reducer: Reducer, ) = Aggregator( aggregationHandler = ReducingAggregationHandler(reducer, getReturnType), inputHandler = NumberInputHandler(), multipleColumnsHandler = TwoStepMultipleColumnsHandler(), + statisticsParameters = statisticsParameters, ) /** @include [AggregatorOptionSwitch1] */ @@ -145,17 +151,30 @@ public object Aggregators { skipNaN: Boolean, ddof: Int, -> - flattenReducingForNumbers(stdTypeConversion) { type -> - std(type, skipNaN, ddof) - } + flattenReducingForNumbers( + getReturnType = stdTypeConversion, + statisticsParameters = mapOf( + Pair("skipNaN", ParameterValue(skipNaN)), + Pair("ddof", ParameterValue(ddof)), + ), + reducer = { type -> + std(type, skipNaN, ddof) + }, + ) } // step one: T: Number? -> Double // step two: Double -> Double public val mean: AggregatorOptionSwitch1 by withOneOption { skipNaN: Boolean -> - twoStepReducingForNumbers(meanTypeConversion) { type -> - mean(type, skipNaN) - } + twoStepReducingForNumbers( + getReturnType = meanTypeConversion, + statisticsParameters = mapOf( + Pair("skipNaN", ParameterValue(skipNaN)), + ), + reducer = { type -> + mean(type, skipNaN) + }, + ) } // T: primitive Number? -> Double? @@ -192,6 +211,10 @@ public object Aggregators { getReturnType = percentileConversion, reducer = { type -> percentileOrNull(percentile, type, skipNaN) as Comparable? }, indexOfResult = { type -> indexOfPercentile(percentile, type, skipNaN) }, + statisticsParameters = mapOf( + Pair("skipNaN", ParameterValue(skipNaN)), + Pair("percentile", ParameterValue(percentile)), + ), ) } @@ -220,6 +243,7 @@ public object Aggregators { getReturnType = medianConversion, reducer = { type -> medianOrNull(type, skipNaN) as Comparable? }, indexOfResult = { type -> indexOfMedian(type, skipNaN) }, + statisticsParameters = mapOf(Pair("skipNaN", ParameterValue(skipNaN))), ) } @@ -228,8 +252,12 @@ public object Aggregators { // Short -> Int // Nothing -> Double public val sum: AggregatorOptionSwitch1 by withOneOption { skipNaN: Boolean -> - twoStepReducingForNumbers(sumTypeConversion) { type -> - sum(type, skipNaN) - } + twoStepReducingForNumbers( + getReturnType = sumTypeConversion, + statisticsParameters = mapOf(Pair("skipNaN", ParameterValue(skipNaN))), + reducer = { type -> + sum(type, skipNaN) + }, + ) } } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt index 071801a5d8..1656d32b20 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt @@ -19,11 +19,23 @@ public class ParameterValue(public val parameter: Any?) { if (parameter is Boolean && that is Boolean) { return this.parameter == that } + if (parameter is Double && that is Double) { + return this.parameter == that + } + if (parameter is Int && that is Int) { + return this.parameter == that + } return super.equals(other) } override fun hashCode(): Int { - if (parameter is Boolean?) { + if (parameter is Boolean) { + return this.parameter.hashCode() + } + if (parameter is Double) { + return this.parameter.hashCode() + } + if (parameter is Int) { return this.parameter.hashCode() } return super.hashCode() From 53411f78d1e5fc0ec73403d3bb6f08bf5ae1180d Mon Sep 17 00:00:00 2001 From: Carlo Maria Proietti Date: Sun, 28 Dec 2025 20:26:39 +0100 Subject: [PATCH 3/8] apiDump --- core/api/core.api | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/core/api/core.api b/core/api/core.api index c788698ff0..630f3d9d1d 100644 --- a/core/api/core.api +++ b/core/api/core.api @@ -5756,7 +5756,7 @@ public final class org/jetbrains/kotlinx/dataframe/impl/aggregation/Aggregations } public final class org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator : org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler, org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorInputHandler, org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorMultipleColumnsHandler { - public fun (Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler;Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorInputHandler;Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorMultipleColumnsHandler;Ljava/lang/String;)V + public fun (Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler;Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorInputHandler;Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorMultipleColumnsHandler;Ljava/lang/String;Ljava/util/Map;)V public fun aggregateMultipleColumns (Lkotlin/sequences/Sequence;)Ljava/lang/Object; public fun aggregateSequence (Lkotlin/sequences/Sequence;Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/ValueType;)Ljava/lang/Object; public fun aggregateSingleColumn (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Ljava/lang/Object; @@ -5769,6 +5769,7 @@ public final class org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/ public final fun getInputHandler ()Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorInputHandler; public final fun getMultipleColumnsHandler ()Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorMultipleColumnsHandler; public final fun getName ()Ljava/lang/String; + public final fun getStatisticsParameters ()Ljava/util/Map; public fun indexOfAggregationResultSingleSequence (Lkotlin/sequences/Sequence;Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/ValueType;)I public fun init (Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator;)V public fun preprocessAggregation (Lkotlin/sequences/Sequence;Lorg/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/ValueType;)Lkotlin/Pair; @@ -6040,6 +6041,13 @@ public final class org/jetbrains/kotlinx/dataframe/impl/columns/DataColumnIntern public static final fun forceResolve (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; } +public final class org/jetbrains/kotlinx/dataframe/impl/columns/ParameterValue { + public fun (Ljava/lang/Object;)V + public fun equals (Ljava/lang/Object;)Z + public final fun getParameter ()Ljava/lang/Object; + public fun hashCode ()I +} + public final class org/jetbrains/kotlinx/dataframe/impl/columns/UtilsKt { public static final fun asAnyFrameColumn (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Lorg/jetbrains/kotlinx/dataframe/columns/FrameColumn; public static final fun transform (Lorg/jetbrains/kotlinx/dataframe/columns/ColumnsResolver;Lkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/impl/columns/TransformableColumnSet; From ee23718ceb36265a4a2f858c2bf90bed049ae646 Mon Sep 17 00:00:00 2001 From: Carlo Maria Proietti Date: Fri, 2 Jan 2026 18:44:59 +0100 Subject: [PATCH 4/8] ParameterValue -> Any --- .../aggregation/aggregators/Aggregator.kt | 5 +-- .../AggregatorAggregationHandler.kt | 3 +- .../aggregation/aggregators/Aggregators.kt | 33 ++++++++--------- .../dataframe/impl/columns/ValueColumnImpl.kt | 37 ++----------------- 4 files changed, 22 insertions(+), 56 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator.kt index a8cf1922ef..719b2ad140 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator.kt @@ -8,7 +8,6 @@ import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.inputHandler import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.multipleColumnsHandlers.FlatteningMultipleColumnsHandler import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.multipleColumnsHandlers.NoMultipleColumnsHandler import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.multipleColumnsHandlers.TwoStepMultipleColumnsHandler -import org.jetbrains.kotlinx.dataframe.impl.columns.ParameterValue import kotlin.reflect.KType import kotlin.reflect.full.withNullability @@ -43,7 +42,7 @@ public class Aggregator( public val inputHandler: AggregatorInputHandler, public val multipleColumnsHandler: AggregatorMultipleColumnsHandler, public val name: String, - public val statisticsParameters: Map, + public val statisticsParameters: Map, ) : AggregatorInputHandler by inputHandler, AggregatorMultipleColumnsHandler by multipleColumnsHandler, AggregatorAggregationHandler by aggregationHandler { @@ -77,7 +76,7 @@ public class Aggregator( aggregationHandler: AggregatorAggregationHandler, inputHandler: AggregatorInputHandler, multipleColumnsHandler: AggregatorMultipleColumnsHandler, - statisticsParameters: Map, + statisticsParameters: Map, ): AggregatorProvider = AggregatorProvider { name -> Aggregator( diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt index a7f65152c3..d35df8d0d9 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt @@ -3,7 +3,6 @@ package org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.api.asSequence import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.aggregationHandlers.SelectingAggregationHandler -import org.jetbrains.kotlinx.dataframe.impl.columns.ParameterValue import org.jetbrains.kotlinx.dataframe.impl.columns.StatisticResult import org.jetbrains.kotlinx.dataframe.impl.columns.ValueColumnInternal import kotlin.reflect.KType @@ -37,7 +36,7 @@ public interface AggregatorAggregationHandler // cache check, cache is dynamically created val aggregator = this.aggregator ?: throw IllegalStateException("Aggregator is required") val desiredStatisticNotConsideringParameters = column.statistics.getOrPut(aggregator.name) { - mutableMapOf, StatisticResult>() + mutableMapOf, StatisticResult>() } // can't compare maps whose Values are Any? -> ParameterValue instead val desiredStatistic = desiredStatisticNotConsideringParameters[aggregator.statisticsParameters] diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt index de39e3bfb9..e6d171ec9e 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt @@ -8,7 +8,6 @@ import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.inputHandler import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.inputHandlers.NumberInputHandler import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.multipleColumnsHandlers.FlatteningMultipleColumnsHandler import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.multipleColumnsHandlers.TwoStepMultipleColumnsHandler -import org.jetbrains.kotlinx.dataframe.impl.columns.ParameterValue import org.jetbrains.kotlinx.dataframe.math.indexOfMax import org.jetbrains.kotlinx.dataframe.math.indexOfMedian import org.jetbrains.kotlinx.dataframe.math.indexOfMin @@ -36,7 +35,7 @@ public object Aggregators { getReturnType: CalculateReturnType, indexOfResult: IndexOfResult, stepOneSelector: Selector, - statisticsParameters: Map, + statisticsParameters: Map, ) = Aggregator( aggregationHandler = SelectingAggregationHandler(stepOneSelector, indexOfResult, getReturnType), inputHandler = AnyInputHandler(), @@ -48,7 +47,7 @@ public object Aggregators { getReturnType: CalculateReturnType, indexOfResult: IndexOfResult, reducer: Reducer, - statisticsParameters: Map, + statisticsParameters: Map, ) = Aggregator( aggregationHandler = HybridAggregationHandler(reducer, indexOfResult, getReturnType), inputHandler = AnyInputHandler(), @@ -88,7 +87,7 @@ public object Aggregators { private fun flattenReducingForNumbers( getReturnType: CalculateReturnType, - statisticsParameters: Map, + statisticsParameters: Map, reducer: Reducer, ) = Aggregator( aggregationHandler = ReducingAggregationHandler(reducer, getReturnType), @@ -99,7 +98,7 @@ public object Aggregators { private fun twoStepReducingForNumbers( getReturnType: CalculateReturnType, - statisticsParameters: Map, + statisticsParameters: Map, reducer: Reducer, ) = Aggregator( aggregationHandler = ReducingAggregationHandler(reducer, getReturnType), @@ -128,7 +127,7 @@ public object Aggregators { getReturnType = minTypeConversion, indexOfResult = { type -> indexOfMin(type, skipNaN) }, stepOneSelector = { type -> minOrNull(type, skipNaN) }, - statisticsParameters = mapOf(Pair("skipNaN", ParameterValue(skipNaN))), + statisticsParameters = mapOf(Pair("skipNaN", skipNaN)), ) } @@ -142,7 +141,7 @@ public object Aggregators { getReturnType = maxTypeConversion, stepOneSelector = { type -> maxOrNull(type, skipNaN) }, indexOfResult = { type -> indexOfMax(type, skipNaN) }, - statisticsParameters = mapOf(Pair("skipNaN", ParameterValue(skipNaN))), + statisticsParameters = mapOf(Pair("skipNaN", skipNaN)), ) } @@ -153,9 +152,9 @@ public object Aggregators { -> flattenReducingForNumbers( getReturnType = stdTypeConversion, - statisticsParameters = mapOf( - Pair("skipNaN", ParameterValue(skipNaN)), - Pair("ddof", ParameterValue(ddof)), + statisticsParameters = mapOf( + Pair("skipNaN", skipNaN), + Pair("ddof", ddof), ), reducer = { type -> std(type, skipNaN, ddof) @@ -168,8 +167,8 @@ public object Aggregators { public val mean: AggregatorOptionSwitch1 by withOneOption { skipNaN: Boolean -> twoStepReducingForNumbers( getReturnType = meanTypeConversion, - statisticsParameters = mapOf( - Pair("skipNaN", ParameterValue(skipNaN)), + statisticsParameters = mapOf( + Pair("skipNaN", skipNaN), ), reducer = { type -> mean(type, skipNaN) @@ -211,9 +210,9 @@ public object Aggregators { getReturnType = percentileConversion, reducer = { type -> percentileOrNull(percentile, type, skipNaN) as Comparable? }, indexOfResult = { type -> indexOfPercentile(percentile, type, skipNaN) }, - statisticsParameters = mapOf( - Pair("skipNaN", ParameterValue(skipNaN)), - Pair("percentile", ParameterValue(percentile)), + statisticsParameters = mapOf( + Pair("skipNaN", skipNaN), + Pair("percentile", percentile), ), ) } @@ -243,7 +242,7 @@ public object Aggregators { getReturnType = medianConversion, reducer = { type -> medianOrNull(type, skipNaN) as Comparable? }, indexOfResult = { type -> indexOfMedian(type, skipNaN) }, - statisticsParameters = mapOf(Pair("skipNaN", ParameterValue(skipNaN))), + statisticsParameters = mapOf(Pair("skipNaN", skipNaN)), ) } @@ -254,7 +253,7 @@ public object Aggregators { public val sum: AggregatorOptionSwitch1 by withOneOption { skipNaN: Boolean -> twoStepReducingForNumbers( getReturnType = sumTypeConversion, - statisticsParameters = mapOf(Pair("skipNaN", ParameterValue(skipNaN))), + statisticsParameters = mapOf(Pair("skipNaN", skipNaN)), reducer = { type -> sum(type, skipNaN) }, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt index 1656d32b20..b7b2157e39 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt @@ -11,39 +11,8 @@ import kotlin.reflect.full.withNullability @JvmInline internal value class StatisticResult(val value: Any?) -public class ParameterValue(public val parameter: Any?) { - - override fun equals(other: Any?): Boolean { - val otherAsParameterValue = other as ParameterValue? - val that = otherAsParameterValue?.parameter - if (parameter is Boolean && that is Boolean) { - return this.parameter == that - } - if (parameter is Double && that is Double) { - return this.parameter == that - } - if (parameter is Int && that is Int) { - return this.parameter == that - } - return super.equals(other) - } - - override fun hashCode(): Int { - if (parameter is Boolean) { - return this.parameter.hashCode() - } - if (parameter is Double) { - return this.parameter.hashCode() - } - if (parameter is Int) { - return this.parameter.hashCode() - } - return super.hashCode() - } -} - internal interface ValueColumnInternal : ValueColumn { - val statistics: MutableMap, StatisticResult>> + val statistics: MutableMap, StatisticResult>> } internal open class ValueColumnImpl( @@ -88,7 +57,7 @@ internal open class ValueColumnImpl( override fun forceResolve() = ResolvingValueColumn(this) - override val statistics = mutableMapOf, StatisticResult>>() + override val statistics = mutableMapOf, StatisticResult>>() } internal class ResolvingValueColumn(override val source: ValueColumn) : @@ -113,5 +82,5 @@ internal class ResolvingValueColumn(override val source: ValueColumn) : override fun hashCode(): Int = source.hashCode() - override val statistics = mutableMapOf, StatisticResult>>() + override val statistics = mutableMapOf, StatisticResult>>() } From 2f509c12612fb958cd752489c1156e3b6d51541e Mon Sep 17 00:00:00 2001 From: Carlo Maria Proietti Date: Fri, 2 Jan 2026 18:55:28 +0100 Subject: [PATCH 5/8] apiDump --- core/api/core.api | 7 ------- 1 file changed, 7 deletions(-) diff --git a/core/api/core.api b/core/api/core.api index 630f3d9d1d..74f31e3dbd 100644 --- a/core/api/core.api +++ b/core/api/core.api @@ -6041,13 +6041,6 @@ public final class org/jetbrains/kotlinx/dataframe/impl/columns/DataColumnIntern public static final fun forceResolve (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; } -public final class org/jetbrains/kotlinx/dataframe/impl/columns/ParameterValue { - public fun (Ljava/lang/Object;)V - public fun equals (Ljava/lang/Object;)Z - public final fun getParameter ()Ljava/lang/Object; - public fun hashCode ()I -} - public final class org/jetbrains/kotlinx/dataframe/impl/columns/UtilsKt { public static final fun asAnyFrameColumn (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Lorg/jetbrains/kotlinx/dataframe/columns/FrameColumn; public static final fun transform (Lorg/jetbrains/kotlinx/dataframe/columns/ColumnsResolver;Lkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/impl/columns/TransformableColumnSet; From c4f19121f49db067f235e36ee31d1622b68e83eb Mon Sep 17 00:00:00 2001 From: Carlo Maria Proietti Date: Mon, 5 Jan 2026 19:21:09 +0100 Subject: [PATCH 6/8] changes after review --- .../aggregation/aggregators/Aggregator.kt | 15 ------------- .../aggregation/aggregators/Aggregators.kt | 21 +++++++++++-------- .../TwoStepMultipleColumnsHandler.kt | 1 + 3 files changed, 13 insertions(+), 24 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator.kt index 719b2ad140..4746df815b 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator.kt @@ -87,21 +87,6 @@ public class Aggregator( statisticsParameters = statisticsParameters, ) } - - internal operator fun invoke( - aggregationHandler: AggregatorAggregationHandler, - inputHandler: AggregatorInputHandler, - multipleColumnsHandler: AggregatorMultipleColumnsHandler, - ): AggregatorProvider = - AggregatorProvider { name -> - Aggregator( - aggregationHandler = aggregationHandler, - inputHandler = inputHandler, - multipleColumnsHandler = multipleColumnsHandler, - name = name, - emptyMap(), - ) - } } } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt index e6d171ec9e..47b0f79eb7 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt @@ -67,6 +67,7 @@ public object Aggregators { ReducingAggregationHandler(stepTwoReducer, getReturnType) }, ), + statisticsParameters = emptyMap(), ) private fun flattenReducingForAny(reducer: Reducer) = @@ -74,6 +75,7 @@ public object Aggregators { aggregationHandler = ReducingAggregationHandler(reducer, preserveReturnTypeNullIfEmpty), inputHandler = AnyInputHandler(), multipleColumnsHandler = FlatteningMultipleColumnsHandler(), + statisticsParameters = emptyMap(), ) private fun flattenReducingForAny( @@ -83,6 +85,7 @@ public object Aggregators { aggregationHandler = ReducingAggregationHandler(reducer, getReturnType), inputHandler = AnyInputHandler(), multipleColumnsHandler = FlatteningMultipleColumnsHandler(), + statisticsParameters = emptyMap(), ) private fun flattenReducingForNumbers( @@ -127,7 +130,7 @@ public object Aggregators { getReturnType = minTypeConversion, indexOfResult = { type -> indexOfMin(type, skipNaN) }, stepOneSelector = { type -> minOrNull(type, skipNaN) }, - statisticsParameters = mapOf(Pair("skipNaN", skipNaN)), + statisticsParameters = mapOf("skipNaN" to skipNaN), ) } @@ -141,7 +144,7 @@ public object Aggregators { getReturnType = maxTypeConversion, stepOneSelector = { type -> maxOrNull(type, skipNaN) }, indexOfResult = { type -> indexOfMax(type, skipNaN) }, - statisticsParameters = mapOf(Pair("skipNaN", skipNaN)), + statisticsParameters = mapOf("skipNaN" to skipNaN), ) } @@ -153,8 +156,8 @@ public object Aggregators { flattenReducingForNumbers( getReturnType = stdTypeConversion, statisticsParameters = mapOf( - Pair("skipNaN", skipNaN), - Pair("ddof", ddof), + ("skipNaN" to skipNaN), + ("ddof" to ddof), ), reducer = { type -> std(type, skipNaN, ddof) @@ -168,7 +171,7 @@ public object Aggregators { twoStepReducingForNumbers( getReturnType = meanTypeConversion, statisticsParameters = mapOf( - Pair("skipNaN", skipNaN), + ("skipNaN" to skipNaN), ), reducer = { type -> mean(type, skipNaN) @@ -211,8 +214,8 @@ public object Aggregators { reducer = { type -> percentileOrNull(percentile, type, skipNaN) as Comparable? }, indexOfResult = { type -> indexOfPercentile(percentile, type, skipNaN) }, statisticsParameters = mapOf( - Pair("skipNaN", skipNaN), - Pair("percentile", percentile), + ("skipNaN" to skipNaN), + ("percentile" to percentile), ), ) } @@ -242,7 +245,7 @@ public object Aggregators { getReturnType = medianConversion, reducer = { type -> medianOrNull(type, skipNaN) as Comparable? }, indexOfResult = { type -> indexOfMedian(type, skipNaN) }, - statisticsParameters = mapOf(Pair("skipNaN", skipNaN)), + statisticsParameters = mapOf("skipNaN" to skipNaN), ) } @@ -253,7 +256,7 @@ public object Aggregators { public val sum: AggregatorOptionSwitch1 by withOneOption { skipNaN: Boolean -> twoStepReducingForNumbers( getReturnType = sumTypeConversion, - statisticsParameters = mapOf(Pair("skipNaN", skipNaN)), + statisticsParameters = mapOf("skipNaN" to skipNaN), reducer = { type -> sum(type, skipNaN) }, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/multipleColumnsHandlers/TwoStepMultipleColumnsHandler.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/multipleColumnsHandlers/TwoStepMultipleColumnsHandler.kt index c24a3e34a8..bb8fa1356d 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/multipleColumnsHandlers/TwoStepMultipleColumnsHandler.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/multipleColumnsHandlers/TwoStepMultipleColumnsHandler.kt @@ -47,6 +47,7 @@ internal class TwoStepMultipleColumnsHandler( ?: aggregator as AggregatorAggregationHandler, inputHandler = stepTwoInputHandler ?: aggregator as AggregatorInputHandler, multipleColumnsHandler = NoMultipleColumnsHandler(), + statisticsParameters = emptyMap(), ).create(aggregator!!.name) } From ab6dfacb32bda53accff5f587e5df4cc2781c3eb Mon Sep 17 00:00:00 2001 From: Carlo Maria Proietti Date: Wed, 7 Jan 2026 16:57:31 +0100 Subject: [PATCH 7/8] get/put statistics --- .../AggregatorAggregationHandler.kt | 14 ++++------ .../dataframe/impl/columns/ValueColumnImpl.kt | 28 +++++++++++++++++-- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt index d35df8d0d9..d838b1b061 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt @@ -35,20 +35,18 @@ public interface AggregatorAggregationHandler if (column is ValueColumnInternal<*>) { // cache check, cache is dynamically created val aggregator = this.aggregator ?: throw IllegalStateException("Aggregator is required") - val desiredStatisticNotConsideringParameters = column.statistics.getOrPut(aggregator.name) { - mutableMapOf, StatisticResult>() - } - // can't compare maps whose Values are Any? -> ParameterValue instead - val desiredStatistic = desiredStatisticNotConsideringParameters[aggregator.statisticsParameters] - // if desiredStatistic is null, statistic was never calculated + val statisticName = aggregator.name + val parameters = aggregator.statisticsParameters + val desiredStatistic = column.getStatisticCacheOrNull(statisticName, parameters) + // if desiredStatistic is null, statistic was never calculated. if (desiredStatistic != null) { return desiredStatistic.value as Return } - val statistic = aggregateSequence( + val statisticValue = aggregateSequence( values = column.asSequence(), valueType = column.type().toValueType(), ) - desiredStatisticNotConsideringParameters[aggregator.statisticsParameters] = StatisticResult(statistic) + column.putStatisticCache(statisticName, parameters, StatisticResult(statisticValue)) return aggregateSingleColumn(column) } return aggregateSequence( diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt index b7b2157e39..6aa310108b 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt @@ -12,9 +12,13 @@ import kotlin.reflect.full.withNullability internal value class StatisticResult(val value: Any?) internal interface ValueColumnInternal : ValueColumn { - val statistics: MutableMap, StatisticResult>> + fun putStatisticCache(statName: String, arguments: Map, value: StatisticResult) + + fun getStatisticCacheOrNull(statName: String, arguments: Map): StatisticResult? } +internal fun ValueColumn.internal() = this as ValueColumnInternal + internal open class ValueColumnImpl( values: List, name: String, @@ -57,7 +61,16 @@ internal open class ValueColumnImpl( override fun forceResolve() = ResolvingValueColumn(this) - override val statistics = mutableMapOf, StatisticResult>>() + private val statisticsCache = mutableMapOf, StatisticResult>>() + + override fun putStatisticCache(statName: String, arguments: Map, value: StatisticResult) { + statisticsCache.getOrPut(statName) { + mutableMapOf, StatisticResult>() + }[arguments] = value + } + + override fun getStatisticCacheOrNull(statName: String, arguments: Map): StatisticResult? = + statisticsCache[statName]?.get(arguments) } internal class ResolvingValueColumn(override val source: ValueColumn) : @@ -82,5 +95,14 @@ internal class ResolvingValueColumn(override val source: ValueColumn) : override fun hashCode(): Int = source.hashCode() - override val statistics = mutableMapOf, StatisticResult>>() + private val statisticsCache = mutableMapOf, StatisticResult>>() + + override fun putStatisticCache(statName: String, arguments: Map, value: StatisticResult) { + statisticsCache.getOrPut(statName) { + mutableMapOf, StatisticResult>() + }[arguments] = value + } + + override fun getStatisticCacheOrNull(statName: String, arguments: Map): StatisticResult? = + statisticsCache[statName]?.get(arguments) } From a88847d08d4279aa49896c7ad72de257d984fd25 Mon Sep 17 00:00:00 2001 From: Carlo Maria Proietti Date: Wed, 7 Jan 2026 17:23:32 +0100 Subject: [PATCH 8/8] introducing ValueColumn.internal breaks ValueColumnWithParent --- .../jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt | 2 -- 1 file changed, 2 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt index 6aa310108b..947b082552 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt @@ -17,8 +17,6 @@ internal interface ValueColumnInternal : ValueColumn { fun getStatisticCacheOrNull(statName: String, arguments: Map): StatisticResult? } -internal fun ValueColumn.internal() = this as ValueColumnInternal - internal open class ValueColumnImpl( values: List, name: String,