Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,15 @@ package org.jetbrains.kotlinx.dataframe.api
import org.jetbrains.kotlinx.dataframe.AnyColumnReference
import org.jetbrains.kotlinx.dataframe.ColumnsSelector
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
import org.jetbrains.kotlinx.dataframe.annotations.Refine
import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.DESCRIPTION
import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.DISTINCT_PARAM
import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.DISTINCT_RETURN
import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.FUNCTION
import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.PHRASE_ENDING
import org.jetbrains.kotlinx.dataframe.api.Select.SelectSelectingOptions
import org.jetbrains.kotlinx.dataframe.columns.ColumnSet
import org.jetbrains.kotlinx.dataframe.columns.SingleColumn
Expand All @@ -14,6 +20,7 @@ import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls
import org.jetbrains.kotlinx.dataframe.documentation.DslGrammarTemplateColumnsSelectionDsl.DslGrammarTemplate
import org.jetbrains.kotlinx.dataframe.documentation.ExcludeFromSources
import org.jetbrains.kotlinx.dataframe.documentation.Indent
import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns
import org.jetbrains.kotlinx.dataframe.exceptions.DuplicateColumnNamesException
import org.jetbrains.kotlinx.dataframe.impl.columns.DistinctColumnSet
import org.jetbrains.kotlinx.dataframe.indices
Expand All @@ -23,45 +30,60 @@ import kotlin.reflect.KProperty
// region DataFrame

/**
* ## The Distinct Operation
* {@get [DESCRIPTION] Removes duplicated rows based on {@get [PHRASE_ENDING]}}.
*
* It removes duplicated rows based on {@get PHRASE_ENDING}.
* The [rows][DataRow] in the resulting [DataFrame] are in the same order
* as they were in the original [DataFrame].
*
* __NOTE:__ The rows in the resulting [DataFrame] are in the same order as they were in the original [DataFrame].
* See also {@get [FUNCTION] [distinctBy] that removes duplicated rows based on the specified columns
* and keeps all the columns in the resulting [DataFrame].}
*
* {@get [DISTINCT_PARAM] @param [columns]
* The names of the columns to consider for evaluating distinct rows.}
* @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
*
* See [Selecting Columns][SelectSelectingOptions].
*
* For more information:
*
* @return A new DataFrame containing only distinct rows.
* {@include [DocumentationUrls.Distinct]}
*
* @see [Selecting Columns][SelectSelectingOptions].
* @see {@include [DocumentationUrls.Distinct]}
* {@include [DocumentationUrls.DistinctBy]}
*
* {@get [DISTINCT_PARAM]}
*
* @return {@get [DISTINCT_RETURN] A new [DataFrame] containing only distinct rows}
*/
@ExcludeFromSources
@Suppress("ClassName")
private interface DistinctDocs {
interface DISTINCT_PARAM

interface DISTINCT_RETURN

interface DESCRIPTION

interface PHRASE_ENDING

interface FUNCTION
}

/**
* {@include [DistinctDocs]}
* {@set PHRASE_ENDING all columns}.
* {@set [DistinctDocs.DISTINCT_PARAM]}
* {@set [PHRASE_ENDING] all columns}.
* {@set [DISTINCT_PARAM]}
*/
public fun <T> DataFrame<T>.distinct(): DataFrame<T> = distinctBy { all() }

/**
* {@include [DistinctDocs]}
* {@set PHRASE_ENDING the specified columns}.
* {@set [DESCRIPTION] Selects the specified columns and keeps only distinct rows based on these selected columns}
* {@set [DISTINCT_PARAM] @param [columns\] The [ColumnsSelector] used to select columns
* that will be included in the resulting [DataFrame] and considered for evaluating distinct rows.}
* {@set [DISTINCT_RETURN] A new [DataFrame] containing only selected columns and distinct rows.}
*/
@Refine
@Interpretable("Distinct0")
public fun <T, C> DataFrame<T>.distinct(columns: ColumnsSelector<T, C>): DataFrame<T> = select(columns).distinct()

/**
* {@include [DistinctDocs]}
* {@set PHRASE_ENDING the specified columns}.
*/
@Deprecated(DEPRECATED_ACCESS_API)
@AccessApiOverload
public fun <T> DataFrame<T>.distinct(vararg columns: KProperty<*>): DataFrame<T> =
Expand All @@ -72,46 +94,47 @@ public fun <T> DataFrame<T>.distinct(vararg columns: KProperty<*>): DataFrame<T>

/**
* {@include [DistinctDocs]}
* {@set PHRASE_ENDING the specified columns}.
* {@set [DESCRIPTION] Selects the specified columns and keeps only distinct rows based on these selected columns}
* {@set [DISTINCT_PARAM] @param [columns\] The names of the columns to select
* and to consider for evaluating distinct rows.}
* {@set [DISTINCT_RETURN] A new [DataFrame] containing only selected columns and distinct rows.}
*/
public fun <T> DataFrame<T>.distinct(vararg columns: String): DataFrame<T> = distinct { columns.toColumnSet() }

/**
* {@include [DistinctDocs]}
* {@set PHRASE_ENDING the specified columns}.
*/
@Deprecated(DEPRECATED_ACCESS_API)
@AccessApiOverload
public fun <T> DataFrame<T>.distinct(vararg columns: AnyColumnReference): DataFrame<T> =
distinct { columns.toColumnSet() }

/**
* {@include [DistinctDocs]}
* {@set PHRASE_ENDING the specified columns}.
*/
@Deprecated(DEPRECATED_ACCESS_API)
@AccessApiOverload
public fun <T> DataFrame<T>.distinctBy(vararg columns: KProperty<*>): DataFrame<T> =
distinctBy { columns.toColumnSet() }

/**
* {@include [DistinctDocs]}
* {@set PHRASE_ENDING the specified columns}.
* {@set [PHRASE_ENDING] the specified columns}.
* {@set [FUNCTION] [distinct] that selects the specified columns
* (if the columns are not specified, selects all columns)
* and keeps only distinct rows based on these selected columns.}
* {@set [DISTINCT_PARAM] @param [columns\]
* The names of the columns to consider for evaluating distinct rows.}
*/
public fun <T> DataFrame<T>.distinctBy(vararg columns: String): DataFrame<T> = distinctBy { columns.toColumnSet() }

/**
* {@include [DistinctDocs]}
* {@set PHRASE_ENDING the specified columns}.
*/
@Deprecated(DEPRECATED_ACCESS_API)
@AccessApiOverload
public fun <T> DataFrame<T>.distinctBy(vararg columns: AnyColumnReference): DataFrame<T> =
distinctBy { columns.toColumnSet() }

/**
* {@include [DistinctDocs]}
* {@set PHRASE_ENDING the specified columns}.
* {@set [PHRASE_ENDING] the specified columns}.
* {@set [FUNCTION] [distinct] that selects the specified columns
* (if the columns are not specified, selects all columns)
* and keeps only distinct rows based on these selected columns.}
* {@set [DISTINCT_PARAM] @param [columns\] The [ColumnsSelector] used to select columns
* that will be considered for evaluating distinct rows.}
*/
public fun <T, C> DataFrame<T>.distinctBy(columns: ColumnsSelector<T, C>): DataFrame<T> {
val cols = get(columns)
Expand Down Expand Up @@ -156,7 +179,7 @@ public interface DistinctColumnsSelectionDsl {
* Returns a new [ColumnSet] from [this] [ColumnSet] containing only distinct columns (by path).
* This is useful when you've selected the same column multiple times but only want it once.
*
* NOTE: This doesn't solve [DuplicateColumnNamesException] if you've selected two columns with the same name.
* This doesn't solve [DuplicateColumnNamesException] if you've selected two columns with the same name.
* For this, you'll need to [rename][ColumnsSelectionDsl.named] one of the columns.
*
* ### Check out: [Grammar]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,12 @@ internal interface DocumentationUrls {
/** [See `remove` on the documentation website.]({@include [Url]}/remove.html) */
interface Remove

/** <a href="{@include [Url]}/distinct.html">See `distinct` on the documentation website.</a> */
/** [See `distinct` on the documentation website.]({@include [Url]}/distinct.html) */
interface Distinct

/** [See `distinctBy` on the documentation website.]({@include [Url]}/distinct.html#distinctby) */
interface DistinctBy

/** <a href="{@include [Url]}/flatten.html">See `flatten` on the documentation website.</a> */
interface Flatten

Expand Down