diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt index e30f087a61..52864413d2 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt @@ -3,9 +3,15 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyColumnReference import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload import org.jetbrains.kotlinx.dataframe.annotations.Interpretable import org.jetbrains.kotlinx.dataframe.annotations.Refine +import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.DESCRIPTION +import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.DISTINCT_PARAM +import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.DISTINCT_RETURN +import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.FUNCTION +import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.PHRASE_ENDING import org.jetbrains.kotlinx.dataframe.api.Select.SelectSelectingOptions import org.jetbrains.kotlinx.dataframe.columns.ColumnSet import org.jetbrains.kotlinx.dataframe.columns.SingleColumn @@ -14,6 +20,7 @@ import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls import org.jetbrains.kotlinx.dataframe.documentation.DslGrammarTemplateColumnsSelectionDsl.DslGrammarTemplate import org.jetbrains.kotlinx.dataframe.documentation.ExcludeFromSources import org.jetbrains.kotlinx.dataframe.documentation.Indent +import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns import org.jetbrains.kotlinx.dataframe.exceptions.DuplicateColumnNamesException import org.jetbrains.kotlinx.dataframe.impl.columns.DistinctColumnSet import org.jetbrains.kotlinx.dataframe.indices @@ -23,45 +30,60 @@ import kotlin.reflect.KProperty // region DataFrame /** - * ## The Distinct Operation + * {@get [DESCRIPTION] Removes duplicated rows based on {@get [PHRASE_ENDING]}}. * - * It removes duplicated rows based on {@get PHRASE_ENDING}. + * The [rows][DataRow] in the resulting [DataFrame] are in the same order + * as they were in the original [DataFrame]. * - * __NOTE:__ The rows in the resulting [DataFrame] are in the same order as they were in the original [DataFrame]. + * See also {@get [FUNCTION] [distinctBy] that removes duplicated rows based on the specified columns + * and keeps all the columns in the resulting [DataFrame].} * - * {@get [DISTINCT_PARAM] @param [columns] - * The names of the columns to consider for evaluating distinct rows.} + * @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention] + * + * See [Selecting Columns][SelectSelectingOptions]. + * + * For more information: * - * @return A new DataFrame containing only distinct rows. + * {@include [DocumentationUrls.Distinct]} * - * @see [Selecting Columns][SelectSelectingOptions]. - * @see {@include [DocumentationUrls.Distinct]} + * {@include [DocumentationUrls.DistinctBy]} + * + * {@get [DISTINCT_PARAM]} + * + * @return {@get [DISTINCT_RETURN] A new [DataFrame] containing only distinct rows} */ @ExcludeFromSources @Suppress("ClassName") private interface DistinctDocs { interface DISTINCT_PARAM + + interface DISTINCT_RETURN + + interface DESCRIPTION + + interface PHRASE_ENDING + + interface FUNCTION } /** * {@include [DistinctDocs]} - * {@set PHRASE_ENDING all columns}. - * {@set [DistinctDocs.DISTINCT_PARAM]} + * {@set [PHRASE_ENDING] all columns}. + * {@set [DISTINCT_PARAM]} */ public fun DataFrame.distinct(): DataFrame = distinctBy { all() } /** * {@include [DistinctDocs]} - * {@set PHRASE_ENDING the specified columns}. + * {@set [DESCRIPTION] Selects the specified columns and keeps only distinct rows based on these selected columns} + * {@set [DISTINCT_PARAM] @param [columns\] The [ColumnsSelector] used to select columns + * that will be included in the resulting [DataFrame] and considered for evaluating distinct rows.} + * {@set [DISTINCT_RETURN] A new [DataFrame] containing only selected columns and distinct rows.} */ @Refine @Interpretable("Distinct0") public fun DataFrame.distinct(columns: ColumnsSelector): DataFrame = select(columns).distinct() -/** - * {@include [DistinctDocs]} - * {@set PHRASE_ENDING the specified columns}. - */ @Deprecated(DEPRECATED_ACCESS_API) @AccessApiOverload public fun DataFrame.distinct(vararg columns: KProperty<*>): DataFrame = @@ -72,23 +94,18 @@ public fun DataFrame.distinct(vararg columns: KProperty<*>): DataFrame /** * {@include [DistinctDocs]} - * {@set PHRASE_ENDING the specified columns}. + * {@set [DESCRIPTION] Selects the specified columns and keeps only distinct rows based on these selected columns} + * {@set [DISTINCT_PARAM] @param [columns\] The names of the columns to select + * and to consider for evaluating distinct rows.} + * {@set [DISTINCT_RETURN] A new [DataFrame] containing only selected columns and distinct rows.} */ public fun DataFrame.distinct(vararg columns: String): DataFrame = distinct { columns.toColumnSet() } -/** - * {@include [DistinctDocs]} - * {@set PHRASE_ENDING the specified columns}. - */ @Deprecated(DEPRECATED_ACCESS_API) @AccessApiOverload public fun DataFrame.distinct(vararg columns: AnyColumnReference): DataFrame = distinct { columns.toColumnSet() } -/** - * {@include [DistinctDocs]} - * {@set PHRASE_ENDING the specified columns}. - */ @Deprecated(DEPRECATED_ACCESS_API) @AccessApiOverload public fun DataFrame.distinctBy(vararg columns: KProperty<*>): DataFrame = @@ -96,14 +113,15 @@ public fun DataFrame.distinctBy(vararg columns: KProperty<*>): DataFrame< /** * {@include [DistinctDocs]} - * {@set PHRASE_ENDING the specified columns}. + * {@set [PHRASE_ENDING] the specified columns}. + * {@set [FUNCTION] [distinct] that selects the specified columns + * (if the columns are not specified, selects all columns) + * and keeps only distinct rows based on these selected columns.} + * {@set [DISTINCT_PARAM] @param [columns\] + * The names of the columns to consider for evaluating distinct rows.} */ public fun DataFrame.distinctBy(vararg columns: String): DataFrame = distinctBy { columns.toColumnSet() } -/** - * {@include [DistinctDocs]} - * {@set PHRASE_ENDING the specified columns}. - */ @Deprecated(DEPRECATED_ACCESS_API) @AccessApiOverload public fun DataFrame.distinctBy(vararg columns: AnyColumnReference): DataFrame = @@ -111,7 +129,12 @@ public fun DataFrame.distinctBy(vararg columns: AnyColumnReference): Data /** * {@include [DistinctDocs]} - * {@set PHRASE_ENDING the specified columns}. + * {@set [PHRASE_ENDING] the specified columns}. + * {@set [FUNCTION] [distinct] that selects the specified columns + * (if the columns are not specified, selects all columns) + * and keeps only distinct rows based on these selected columns.} + * {@set [DISTINCT_PARAM] @param [columns\] The [ColumnsSelector] used to select columns + * that will be considered for evaluating distinct rows.} */ public fun DataFrame.distinctBy(columns: ColumnsSelector): DataFrame { val cols = get(columns) @@ -156,7 +179,7 @@ public interface DistinctColumnsSelectionDsl { * Returns a new [ColumnSet] from [this] [ColumnSet] containing only distinct columns (by path). * This is useful when you've selected the same column multiple times but only want it once. * - * NOTE: This doesn't solve [DuplicateColumnNamesException] if you've selected two columns with the same name. + * This doesn't solve [DuplicateColumnNamesException] if you've selected two columns with the same name. * For this, you'll need to [rename][ColumnsSelectionDsl.named] one of the columns. * * ### Check out: [Grammar] diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt index 006c494270..ed612d01c8 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt @@ -84,9 +84,12 @@ internal interface DocumentationUrls { /** [See `remove` on the documentation website.]({@include [Url]}/remove.html) */ interface Remove - /** See `distinct` on the documentation website. */ + /** [See `distinct` on the documentation website.]({@include [Url]}/distinct.html) */ interface Distinct + /** [See `distinctBy` on the documentation website.]({@include [Url]}/distinct.html#distinctby) */ + interface DistinctBy + /** See `flatten` on the documentation website. */ interface Flatten