From 0163d87f97e54d524cf67fbd7352fd081c4f2ff3 Mon Sep 17 00:00:00 2001 From: Aleksandr Nikolaev Date: Mon, 8 Dec 2025 20:23:46 +0100 Subject: [PATCH 1/2] Fixes to the documentation for the `distinct` and `distinctBy` functions. --- .../kotlinx/dataframe/api/distinct.kt | 49 +++++++++++++++---- .../documentation/DocumentationUrls.kt | 3 ++ 2 files changed, 42 insertions(+), 10 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt index e30f087a61..9880ecd43d 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt @@ -3,9 +3,11 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyColumnReference import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload import org.jetbrains.kotlinx.dataframe.annotations.Interpretable import org.jetbrains.kotlinx.dataframe.annotations.Refine +import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.DISTINCT_RETURN import org.jetbrains.kotlinx.dataframe.api.Select.SelectSelectingOptions import org.jetbrains.kotlinx.dataframe.columns.ColumnSet import org.jetbrains.kotlinx.dataframe.columns.SingleColumn @@ -23,24 +25,27 @@ import kotlin.reflect.KProperty // region DataFrame /** - * ## The Distinct Operation + * ## The {@get NAME Distinct} Operation * - * It removes duplicated rows based on {@get PHRASE_ENDING}. + * {@get DESCRIPTION It removes duplicated rows based on {@get PHRASE_ENDING}}. * - * __NOTE:__ The rows in the resulting [DataFrame] are in the same order as they were in the original [DataFrame]. + * __NOTE:__ The [rows][DataRow] in the resulting [DataFrame] are in the same order + * as they were in the original [DataFrame]. * - * {@get [DISTINCT_PARAM] @param [columns] - * The names of the columns to consider for evaluating distinct rows.} + * {@get [DISTINCT_PARAM]} * - * @return A new DataFrame containing only distinct rows. + * {@get [DISTINCT_RETURN] @return A new [DataFrame] containing only distinct rows.} * * @see [Selecting Columns][SelectSelectingOptions]. * @see {@include [DocumentationUrls.Distinct]} + * @see {@include [DocumentationUrls.DistinctBy]} */ @ExcludeFromSources @Suppress("ClassName") private interface DistinctDocs { interface DISTINCT_PARAM + + interface DISTINCT_RETURN } /** @@ -52,7 +57,10 @@ public fun DataFrame.distinct(): DataFrame = distinctBy { all() } /** * {@include [DistinctDocs]} - * {@set PHRASE_ENDING the specified columns}. + * {@set DESCRIPTION It selects the specified columns and keeps only distinct rows based on these selected columns} + * {@set [DistinctDocs.DISTINCT_PARAM] @param [columns] The names of the columns to select + * and to consider for evaluating distinct rows.} + * {@set [DISTINCT_RETURN] @return A new [DataFrame] containing only selected columns and distinct rows.} */ @Refine @Interpretable("Distinct0") @@ -60,7 +68,10 @@ public fun DataFrame.distinct(columns: ColumnsSelector): DataFra /** * {@include [DistinctDocs]} - * {@set PHRASE_ENDING the specified columns}. + * {@set DESCRIPTION It selects the specified columns and keeps only distinct rows based on these selected columns} + * {@set [DistinctDocs.DISTINCT_PARAM] @param [columns] The names of the columns to select + * and to consider for evaluating distinct rows.} + * {@set [DISTINCT_RETURN] @return A new [DataFrame] containing only selected columns and distinct rows.} */ @Deprecated(DEPRECATED_ACCESS_API) @AccessApiOverload @@ -72,13 +83,19 @@ public fun DataFrame.distinct(vararg columns: KProperty<*>): DataFrame /** * {@include [DistinctDocs]} - * {@set PHRASE_ENDING the specified columns}. + * {@set DESCRIPTION It selects the specified columns and keeps only distinct rows based on these selected columns} + * {@set [DistinctDocs.DISTINCT_PARAM] @param [columns] The names of the columns to select + * and to consider for evaluating distinct rows.} + * {@set [DISTINCT_RETURN] @return A new [DataFrame] containing only selected columns and distinct rows.} */ public fun DataFrame.distinct(vararg columns: String): DataFrame = distinct { columns.toColumnSet() } /** * {@include [DistinctDocs]} - * {@set PHRASE_ENDING the specified columns}. + * {@set DESCRIPTION It selects the specified columns and keeps only distinct rows based on these selected columns} + * {@set [DistinctDocs.DISTINCT_PARAM] @param [columns] The names of the columns to select + * and to consider for evaluating distinct rows.} + * {@set [DISTINCT_RETURN] @return A new [DataFrame] containing only selected columns and distinct rows.} */ @Deprecated(DEPRECATED_ACCESS_API) @AccessApiOverload @@ -87,7 +104,10 @@ public fun DataFrame.distinct(vararg columns: AnyColumnReference): DataFr /** * {@include [DistinctDocs]} + * {@set NAME DistinctBy} * {@set PHRASE_ENDING the specified columns}. + * {@set [DistinctDocs.DISTINCT_PARAM] @param [columns] + * The names of the columns to consider for evaluating distinct rows.} */ @Deprecated(DEPRECATED_ACCESS_API) @AccessApiOverload @@ -96,13 +116,19 @@ public fun DataFrame.distinctBy(vararg columns: KProperty<*>): DataFrame< /** * {@include [DistinctDocs]} + * {@set NAME DistinctBy} * {@set PHRASE_ENDING the specified columns}. + * {@set [DistinctDocs.DISTINCT_PARAM] @param [columns] + * The names of the columns to consider for evaluating distinct rows.} */ public fun DataFrame.distinctBy(vararg columns: String): DataFrame = distinctBy { columns.toColumnSet() } /** * {@include [DistinctDocs]} + * {@set NAME DistinctBy} * {@set PHRASE_ENDING the specified columns}. + * {@set [DistinctDocs.DISTINCT_PARAM] @param [columns] + * The names of the columns to consider for evaluating distinct rows.} */ @Deprecated(DEPRECATED_ACCESS_API) @AccessApiOverload @@ -111,7 +137,10 @@ public fun DataFrame.distinctBy(vararg columns: AnyColumnReference): Data /** * {@include [DistinctDocs]} + * {@set NAME DistinctBy} * {@set PHRASE_ENDING the specified columns}. + * {@set [DistinctDocs.DISTINCT_PARAM] @param [columns] + * The names of the columns to consider for evaluating distinct rows.} */ public fun DataFrame.distinctBy(columns: ColumnsSelector): DataFrame { val cols = get(columns) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt index 006c494270..7f4ad81dbb 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt @@ -87,6 +87,9 @@ internal interface DocumentationUrls { /** See `distinct` on the documentation website. */ interface Distinct + /** See `distinctBy` on the documentation website. */ + interface DistinctBy + /** See `flatten` on the documentation website. */ interface Flatten From 2432bf5ed4b817e585e4a3e7c196accad3b658b2 Mon Sep 17 00:00:00 2001 From: Aleksandr Nikolaev Date: Mon, 15 Dec 2025 11:12:09 +0100 Subject: [PATCH 2/2] Fixes to the `distinct` and `distinctBy` KDocs after review. --- .../kotlinx/dataframe/api/distinct.kt | 121 ++++++++---------- .../documentation/DocumentationUrls.kt | 4 +- 2 files changed, 57 insertions(+), 68 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt index 9880ecd43d..332b2c5f4f 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt @@ -7,15 +7,19 @@ import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload import org.jetbrains.kotlinx.dataframe.annotations.Interpretable import org.jetbrains.kotlinx.dataframe.annotations.Refine +import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.DESCRIPTION +import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.DISTINCT_PARAM import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.DISTINCT_RETURN +import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.FUNCTION +import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.PHRASE_ENDING import org.jetbrains.kotlinx.dataframe.api.Select.SelectSelectingOptions import org.jetbrains.kotlinx.dataframe.columns.ColumnSet -import org.jetbrains.kotlinx.dataframe.columns.SingleColumn import org.jetbrains.kotlinx.dataframe.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls import org.jetbrains.kotlinx.dataframe.documentation.DslGrammarTemplateColumnsSelectionDsl.DslGrammarTemplate import org.jetbrains.kotlinx.dataframe.documentation.ExcludeFromSources import org.jetbrains.kotlinx.dataframe.documentation.Indent +import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns import org.jetbrains.kotlinx.dataframe.exceptions.DuplicateColumnNamesException import org.jetbrains.kotlinx.dataframe.impl.columns.DistinctColumnSet import org.jetbrains.kotlinx.dataframe.indices @@ -25,20 +29,27 @@ import kotlin.reflect.KProperty // region DataFrame /** - * ## The {@get NAME Distinct} Operation + * {@get [DESCRIPTION] Removes duplicated rows based on {@get [PHRASE_ENDING]}}. * - * {@get DESCRIPTION It removes duplicated rows based on {@get PHRASE_ENDING}}. - * - * __NOTE:__ The [rows][DataRow] in the resulting [DataFrame] are in the same order + * The [rows][DataRow] in the resulting [DataFrame] are in the same order * as they were in the original [DataFrame]. * - * {@get [DISTINCT_PARAM]} + * See also {@get [FUNCTION] [distinctBy] that removes duplicated rows based on the specified columns + * and keeps all the columns in the resulting [DataFrame].} + * + * @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention] + * + * See [Selecting Columns][SelectSelectingOptions]. * - * {@get [DISTINCT_RETURN] @return A new [DataFrame] containing only distinct rows.} + * For more information: + * + * {@include [DocumentationUrls.Distinct]} + * + * {@include [DocumentationUrls.DistinctBy]} + * + * {@get [DISTINCT_PARAM]} * - * @see [Selecting Columns][SelectSelectingOptions]. - * @see {@include [DocumentationUrls.Distinct]} - * @see {@include [DocumentationUrls.DistinctBy]} + * @return {@get [DISTINCT_RETURN] A new [DataFrame] containing only distinct rows.} */ @ExcludeFromSources @Suppress("ClassName") @@ -46,33 +57,32 @@ private interface DistinctDocs { interface DISTINCT_PARAM interface DISTINCT_RETURN + + interface DESCRIPTION + + interface PHRASE_ENDING + + interface FUNCTION } /** * {@include [DistinctDocs]} - * {@set PHRASE_ENDING all columns}. - * {@set [DistinctDocs.DISTINCT_PARAM]} + * {@set [PHRASE_ENDING] all columns} + * {@set [DISTINCT_PARAM]} */ public fun DataFrame.distinct(): DataFrame = distinctBy { all() } /** * {@include [DistinctDocs]} - * {@set DESCRIPTION It selects the specified columns and keeps only distinct rows based on these selected columns} - * {@set [DistinctDocs.DISTINCT_PARAM] @param [columns] The names of the columns to select - * and to consider for evaluating distinct rows.} - * {@set [DISTINCT_RETURN] @return A new [DataFrame] containing only selected columns and distinct rows.} + * {@set [DESCRIPTION] Selects the specified columns and keeps only distinct rows based on these selected columns} + * {@set [DISTINCT_PARAM] @param [columns\] The [ColumnsSelector] used to select columns + * that will be included in the resulting [DataFrame] and considered for evaluating distinct rows.} + * {@set [DISTINCT_RETURN] A new [DataFrame] containing only selected columns and distinct rows.} */ @Refine @Interpretable("Distinct0") public fun DataFrame.distinct(columns: ColumnsSelector): DataFrame = select(columns).distinct() -/** - * {@include [DistinctDocs]} - * {@set DESCRIPTION It selects the specified columns and keeps only distinct rows based on these selected columns} - * {@set [DistinctDocs.DISTINCT_PARAM] @param [columns] The names of the columns to select - * and to consider for evaluating distinct rows.} - * {@set [DISTINCT_RETURN] @return A new [DataFrame] containing only selected columns and distinct rows.} - */ @Deprecated(DEPRECATED_ACCESS_API) @AccessApiOverload public fun DataFrame.distinct(vararg columns: KProperty<*>): DataFrame = @@ -83,32 +93,18 @@ public fun DataFrame.distinct(vararg columns: KProperty<*>): DataFrame /** * {@include [DistinctDocs]} - * {@set DESCRIPTION It selects the specified columns and keeps only distinct rows based on these selected columns} - * {@set [DistinctDocs.DISTINCT_PARAM] @param [columns] The names of the columns to select + * {@set [DESCRIPTION] Selects the specified columns and keeps only distinct rows based on these selected columns} + * {@set [DISTINCT_PARAM] @param [columns\] The names of the columns to select * and to consider for evaluating distinct rows.} - * {@set [DISTINCT_RETURN] @return A new [DataFrame] containing only selected columns and distinct rows.} + * {@set [DISTINCT_RETURN] A new [DataFrame] containing only selected columns and distinct rows.} */ public fun DataFrame.distinct(vararg columns: String): DataFrame = distinct { columns.toColumnSet() } -/** - * {@include [DistinctDocs]} - * {@set DESCRIPTION It selects the specified columns and keeps only distinct rows based on these selected columns} - * {@set [DistinctDocs.DISTINCT_PARAM] @param [columns] The names of the columns to select - * and to consider for evaluating distinct rows.} - * {@set [DISTINCT_RETURN] @return A new [DataFrame] containing only selected columns and distinct rows.} - */ @Deprecated(DEPRECATED_ACCESS_API) @AccessApiOverload public fun DataFrame.distinct(vararg columns: AnyColumnReference): DataFrame = distinct { columns.toColumnSet() } -/** - * {@include [DistinctDocs]} - * {@set NAME DistinctBy} - * {@set PHRASE_ENDING the specified columns}. - * {@set [DistinctDocs.DISTINCT_PARAM] @param [columns] - * The names of the columns to consider for evaluating distinct rows.} - */ @Deprecated(DEPRECATED_ACCESS_API) @AccessApiOverload public fun DataFrame.distinctBy(vararg columns: KProperty<*>): DataFrame = @@ -116,20 +112,15 @@ public fun DataFrame.distinctBy(vararg columns: KProperty<*>): DataFrame< /** * {@include [DistinctDocs]} - * {@set NAME DistinctBy} - * {@set PHRASE_ENDING the specified columns}. - * {@set [DistinctDocs.DISTINCT_PARAM] @param [columns] + * {@set [PHRASE_ENDING] the specified columns} + * {@set [FUNCTION] [distinct] that selects the specified columns + * (if the columns are not specified, selects all columns) + * and keeps only distinct rows based on these selected columns.} + * {@set [DISTINCT_PARAM] @param [columns\] * The names of the columns to consider for evaluating distinct rows.} */ public fun DataFrame.distinctBy(vararg columns: String): DataFrame = distinctBy { columns.toColumnSet() } -/** - * {@include [DistinctDocs]} - * {@set NAME DistinctBy} - * {@set PHRASE_ENDING the specified columns}. - * {@set [DistinctDocs.DISTINCT_PARAM] @param [columns] - * The names of the columns to consider for evaluating distinct rows.} - */ @Deprecated(DEPRECATED_ACCESS_API) @AccessApiOverload public fun DataFrame.distinctBy(vararg columns: AnyColumnReference): DataFrame = @@ -137,10 +128,12 @@ public fun DataFrame.distinctBy(vararg columns: AnyColumnReference): Data /** * {@include [DistinctDocs]} - * {@set NAME DistinctBy} - * {@set PHRASE_ENDING the specified columns}. - * {@set [DistinctDocs.DISTINCT_PARAM] @param [columns] - * The names of the columns to consider for evaluating distinct rows.} + * {@set [PHRASE_ENDING] the specified columns} + * {@set [FUNCTION] [distinct] that selects the specified columns + * (if the columns are not specified, selects all columns) + * and keeps only distinct rows based on these selected columns.} + * {@set [DISTINCT_PARAM] @param [columns\] The [ColumnsSelector] used to select columns + * that will be considered for evaluating distinct rows.} */ public fun DataFrame.distinctBy(columns: ColumnsSelector): DataFrame { val cols = get(columns) @@ -153,15 +146,13 @@ public fun DataFrame.distinctBy(columns: ColumnsSelector): DataF // region ColumnsSelectionDsl /** - * ##### Distinct {@include [ColumnsSelectionDslLink]} + * Distinct {@include [ColumnsSelectionDslLink]}. * * See [Grammar] for all functions in this interface. */ public interface DistinctColumnsSelectionDsl { /** - * ## Distinct Grammar - * * @include [DslGrammarTemplate] * {@set [DslGrammarTemplate.DEFINITIONS] * {@include [DslGrammarTemplate.ColumnSetDef]} @@ -181,23 +172,21 @@ public interface DistinctColumnsSelectionDsl { } /** - * ## Distinct * Returns a new [ColumnSet] from [this] [ColumnSet] containing only distinct columns (by path). * This is useful when you've selected the same column multiple times but only want it once. * - * NOTE: This doesn't solve [DuplicateColumnNamesException] if you've selected two columns with the same name. + * This doesn't solve [DuplicateColumnNamesException] if you've selected two columns with the same name. * For this, you'll need to [rename][ColumnsSelectionDsl.named] one of the columns. * - * ### Check out: [Grammar] - * - * #### For Example: - * `df.`[select][DataFrame.select]` { (`[colsOf][SingleColumn.colsOf]`<`[Int][Int]`>() `[and][ColumnsSelectionDsl.and]` age).`[distinct][ColumnSet.distinct]`() }` + * See also [Grammar], [named][ColumnsSelectionDsl.named], [simplify][ColumnsSelectionDsl.simplify]. * - * `df.`[select][DataFrame.select]` { `[colsAtAnyDepth][ColumnsSelectionDsl.colsAtAnyDepth]`().`[nameStartsWith][ColumnsSelectionDsl.nameStartsWith]`("order").`[distinct][ColumnSet.distinct]`() }` + * ### Examples + * ```kotlin + * df.select { (colsOf() and age).distinct() } + * df.select { colsAtAnyDepth().nameStartsWith("order").distinct() } + * ``` * * @return A new [ColumnSet] containing only distinct columns (by path). - * @see ColumnsSelectionDsl.named - * @see ColumnsSelectionDsl.simplify */ public fun ColumnSet.distinct(): ColumnSet = DistinctColumnSet(this) } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt index 7f4ad81dbb..ed612d01c8 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt @@ -84,10 +84,10 @@ internal interface DocumentationUrls { /** [See `remove` on the documentation website.]({@include [Url]}/remove.html) */ interface Remove - /** See `distinct` on the documentation website. */ + /** [See `distinct` on the documentation website.]({@include [Url]}/distinct.html) */ interface Distinct - /** See `distinctBy` on the documentation website. */ + /** [See `distinctBy` on the documentation website.]({@include [Url]}/distinct.html#distinctby) */ interface DistinctBy /** See `flatten` on the documentation website. */