From 8011d33263b4e9968eeffd6056cb147569c37753 Mon Sep 17 00:00:00 2001 From: "andrei.kislitsyn" Date: Fri, 22 Aug 2025 15:02:47 +0400 Subject: [PATCH 1/8] small functions tests and kdocs --- .../jetbrains/kotlinx/dataframe/api/any.kt | 28 ++++++++ .../kotlinx/dataframe/api/asIterable.kt | 5 ++ .../kotlinx/dataframe/api/asSequence.kt | 8 +++ .../kotlinx/dataframe/api/associate.kt | 49 +++++++++++++ .../kotlinx/dataframe/api/between.kt | 12 ++++ .../documentation/RowFilterDescription.kt | 8 ++- .../kotlinx/dataframe/examples/plugin/Main.kt | 2 + .../kotlinx/dataframe/api/FunctionsStdTest.kt | 71 +++++++++++++++++++ 8 files changed, 180 insertions(+), 3 deletions(-) create mode 100644 tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/FunctionsStdTest.kt diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/any.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/any.kt index 745963dfa1..0c021125f4 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/any.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/any.kt @@ -2,18 +2,46 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.Predicate import org.jetbrains.kotlinx.dataframe.RowFilter import org.jetbrains.kotlinx.dataframe.columns.values // region DataColumn +/** + * Returns `true` if at least one element in this [DataColumn] satisfies the given [predicate]. + * + * This is a convenience alias that delegates to [Iterable.any] on the column's [values]. + * + * @param predicate A lambda function that takes a value from the column + * and returns `true` if it matches the condition. + * @return `true` if at least one element matches the [predicate], `false` otherwise. + * @see [DataColumn.all] + * @see [DataColumn.filter] + * @see [DataColumn.count] + */ public fun DataColumn.any(predicate: Predicate): Boolean = values.any(predicate) // endregion // region DataFrame +/** + * Returns `true` if at least one row in this [DataFrame] satisfies the given [predicate]. + * + * {@include [org.jetbrains.kotlinx.dataframe.documentation.RowFilterDescription]} + * + * ### Example + * ```kotlin + * // Check if there is at least one row where "age" is greater than 18 + * val hasAdults = df.any { age > 18 } + * ``` + * + * @param predicate A [RowFilter] lambda that takes a [DataRow] (as both `this` and `it`) + * and returns `true` if the row should be considered a match. + * @return `true` if at least one row satisfies the [predicate], `false` otherwise. + */ public inline fun DataFrame.any(predicate: RowFilter): Boolean = rows().any { predicate(it, it) } // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/asIterable.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/asIterable.kt index 2b5c379f4b..a50bc4f111 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/asIterable.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/asIterable.kt @@ -4,6 +4,11 @@ import org.jetbrains.kotlinx.dataframe.DataColumn // region DataColumn +/** + * Returns an [Iterable] over the values of this [DataColumn]. + * + * @see [asSequence] + */ public fun DataColumn.asIterable(): Iterable = values() // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/asSequence.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/asSequence.kt index 73af1ef413..abd7a892c7 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/asSequence.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/asSequence.kt @@ -6,12 +6,20 @@ import org.jetbrains.kotlinx.dataframe.DataRow // region DataColumn +/** + * Returns a [Sequence] over the values of this [DataColumn]. + * + * @see [asIterable] + */ public fun DataColumn.asSequence(): Sequence = asIterable().asSequence() // endregion // region DataFrame +/** + * Returns a [Sequence] of [DataRow] over this [DataFrame]. + */ public fun DataFrame.asSequence(): Sequence> = rows().asSequence() // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/associate.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/associate.kt index a6dce766ac..0d26dec921 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/associate.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/associate.kt @@ -3,12 +3,61 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.RowExpression +import org.jetbrains.kotlinx.dataframe.RowFilter +import org.jetbrains.kotlinx.dataframe.documentation.ExtensionPropertiesAPIDocs // region DataFrame +/** + * Builds a [Map] where each key is produced by applying [transform] to a row, + * and the value is the corresponding [DataRow]. + * + * The [transform] is a [RowExpression] — a lambda that receives each [DataRow] + * both as `this` and `it` and is expected to return a key, allowing you to compute keys directly from row values. + * You can also use [extension properties][ExtensionPropertiesAPIDocs] for concise and type-safe access. + * + * If multiple rows produce the same key, the last row for that key is stored, + * consistent with Kotlin's [kotlin.collections.associateBy] behavior. + * + * See also: + * - [toMap] — converts a [DataFrame] into a [Map] by using column names as keys + * and their values as the corresponding map values. + * + * ### Example + * ```kotlin + * // Associate each row by the "id" column + * val map = df.associateBy { id } + * ``` + * + * @param transform A [RowExpression] that returns a key for each row. + * @return A [Map] of keys to corresponding rows. + */ public inline fun DataFrame.associateBy(transform: RowExpression): Map> = rows().associateBy { transform(it, it) } +/** + * Builds a [Map] from key-value [Pair]s produced by applying [transform] to each row. + * + * The [transform] is a [RowExpression] — a lambda that receives each [DataRow] + * both as `this` and `it` and is expected to return a pair, allowing you to generate [Pair]s of keys and values from row contents. + * You can also use [extension properties][ExtensionPropertiesAPIDocs] for concise and type-safe access. + * + * If multiple rows produce the same key, the last value for that key is stored, + * consistent with Kotlin's [kotlin.collections.associate] behavior. + * + * See also: + * - [toMap] — converts a [DataFrame] into a [Map] by using column names as keys + * and their values as the corresponding map values. + * + * ### Example + * ```kotlin + * // Associate rows into a map where key = id, value = name + * val map = df.associate { id to name } + * ``` + * + * @param transform A [RowExpression] that returns a [Pair] of key and value for each row. + * @return A [Map] of keys to values. + */ public inline fun DataFrame.associate(transform: RowExpression>): Map = rows().associate { transform(it, it) } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/between.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/between.kt index 3a37f9e7df..6390a082bb 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/between.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/between.kt @@ -5,6 +5,18 @@ import org.jetbrains.kotlinx.dataframe.impl.between // region DataColumn +/** + * Returns a [DataColumn] of [Boolean] values indicating whether each element + * lies between [left] and [right]. + * + * If [includeBoundaries] is `true` (default), values equal to [left] or [right] are also considered in range. + * + * @param left The lower boundary of the range. + * @param right The upper boundary of the range. + * @param includeBoundaries Whether to include [left] and [right] values in the range check. Defaults to `true`. + * @return A [DataColumn] of [Boolean] values where each element indicates if the corresponding + * value is within the specified range. + */ public fun > DataColumn.between( left: T, right: T, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowFilterDescription.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowFilterDescription.kt index dcafe287f2..71bfbe9fb4 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowFilterDescription.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowFilterDescription.kt @@ -4,8 +4,10 @@ import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.RowFilter /** - * The [predicate] is a [RowFilter] — a lambda that receives each [DataRow] both as `this` and `it`, - * allowing you to define a [Boolean] condition using the row's values, - * including through [extension properties][ExtensionPropertiesAPIDocs] for convenient access. + * The [predicate] is a [RowFilter] — a lambda that receives each [DataRow] as both `this` and `it` + * and is expected to return a [Boolean] value. + * + * It allows you to define conditions using the row's values directly, + * including through [extension properties][ExtensionPropertiesAPIDocs] for convenient and type-safe access. */ internal interface RowFilterDescription diff --git a/examples/kotlin-dataframe-plugin-example/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/plugin/Main.kt b/examples/kotlin-dataframe-plugin-example/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/plugin/Main.kt index a1ed3145fa..562d700db3 100644 --- a/examples/kotlin-dataframe-plugin-example/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/plugin/Main.kt +++ b/examples/kotlin-dataframe-plugin-example/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/plugin/Main.kt @@ -6,9 +6,11 @@ import org.jetbrains.kotlinx.dataframe.api.add import org.jetbrains.kotlinx.dataframe.api.convert import org.jetbrains.kotlinx.dataframe.api.convertTo import org.jetbrains.kotlinx.dataframe.api.filter +import org.jetbrains.kotlinx.dataframe.api.insert import org.jetbrains.kotlinx.dataframe.api.into import org.jetbrains.kotlinx.dataframe.api.rename import org.jetbrains.kotlinx.dataframe.api.renameToCamelCase +import org.jetbrains.kotlinx.dataframe.api.under import org.jetbrains.kotlinx.dataframe.api.with import org.jetbrains.kotlinx.dataframe.io.readCsv import org.jetbrains.kotlinx.dataframe.io.writeCsv diff --git a/tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/FunctionsStdTest.kt b/tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/FunctionsStdTest.kt new file mode 100644 index 0000000000..f1f639ea74 --- /dev/null +++ b/tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/FunctionsStdTest.kt @@ -0,0 +1,71 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.samples.api.TestBase +import org.junit.Test + +@Suppress("UNCHECKED_CAST") +class FunctionsStdTest : TestBase() { + + @Test + fun `DataColumn any`() { + val ageCol = df["age"] as DataColumn + ageCol.any { it > 40 } shouldBe true + ageCol.any { it > 90 } shouldBe false + } + + @Test + fun `DataFrame any`() { + df.any { "age"() > 40 && "isHappy"() } shouldBe true + df.any { "city"() == "Berlin" } shouldBe false + } + + @Test + fun `DataColumn between`() { + val ages = listOf(15, 45, 20, 40, 30, 20, 30) + val ageCol = df["age"] as DataColumn + ageCol.between(20, 40).toList() shouldBe listOf(false, false, true, true, true, true, true) + ageCol.between(20, 40, includeBoundaries = false).toList() shouldBe listOf(false, false, false, false, true, false, true) + ageCol.toList() shouldBe ages + } + + @Test + fun `DataFrame associateBy`() { + val byFirstName = df.associateBy { "name"["firstName"]() } + val alice = byFirstName["Alice"]!! + val aliceName = alice.getColumnGroup("name") + aliceName["lastName"] shouldBe "Wolf" + alice["age"] shouldBe 20 + + val byCity = df.associateBy { "city"() } + val moscow = byCity["Moscow"]!! + moscow.getColumnGroup("name")["lastName"] shouldBe "Byrd" + } + + @Test + fun `DataFrame associate`() { + val map = df.associate { "name"["lastName"]() to "age"() } + map.size shouldBe 7 + map["Marley"] shouldBe 30 + map["Cooper"] shouldBe 15 + } + + @Test + fun `DataColumn asIterable`() { + val ageCol = df["age"] as DataColumn + ageCol.asIterable().toList() shouldBe listOf(15, 45, 20, 40, 30, 20, 30) + } + + @Test + fun `DataColumn asSequence`() { + val ageCol = df["age"] as DataColumn + ageCol.asSequence().take(2).toList() shouldBe listOf(15, 45) + } + + @Test + fun `DataFrame asSequence`() { + val happyCount = df.asSequence().count { it["isHappy"] as Boolean } + happyCount shouldBe 5 + } +} From c7aab2f94007d657a5776e19770e4b574ffed19d Mon Sep 17 00:00:00 2001 From: "andrei.kislitsyn" Date: Fri, 22 Aug 2025 15:07:41 +0400 Subject: [PATCH 2/8] ktlint format --- .../kotlin/org/jetbrains/kotlinx/dataframe/api/associate.kt | 1 - .../org/jetbrains/kotlinx/dataframe/api/FunctionsStdTest.kt | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/associate.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/associate.kt index 0d26dec921..1a3264187b 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/associate.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/associate.kt @@ -3,7 +3,6 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.RowExpression -import org.jetbrains.kotlinx.dataframe.RowFilter import org.jetbrains.kotlinx.dataframe.documentation.ExtensionPropertiesAPIDocs // region DataFrame diff --git a/tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/FunctionsStdTest.kt b/tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/FunctionsStdTest.kt index f1f639ea74..3340082c4b 100644 --- a/tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/FunctionsStdTest.kt +++ b/tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/FunctionsStdTest.kt @@ -26,7 +26,8 @@ class FunctionsStdTest : TestBase() { val ages = listOf(15, 45, 20, 40, 30, 20, 30) val ageCol = df["age"] as DataColumn ageCol.between(20, 40).toList() shouldBe listOf(false, false, true, true, true, true, true) - ageCol.between(20, 40, includeBoundaries = false).toList() shouldBe listOf(false, false, false, false, true, false, true) + ageCol.between(20, 40, includeBoundaries = false).toList() shouldBe + listOf(false, false, false, false, true, false, true) ageCol.toList() shouldBe ages } From 0e2cacc9b92821834318ab3d5587b17546811b1e Mon Sep 17 00:00:00 2001 From: "andrei.kislitsyn" Date: Sat, 23 Aug 2025 21:55:28 +0400 Subject: [PATCH 3/8] util functions fixes --- .../kotlinx/dataframe/api/chunked.kt | 54 ++++++++++++++++--- .../jetbrains/kotlinx/dataframe/api/length.kt | 3 ++ .../kotlinx/dataframe/api/lowercase.kt | 3 ++ .../kotlinx/dataframe/api/shuffle.kt | 22 ++++++++ .../kotlinx/dataframe/api/uppercase.kt | 3 ++ .../dataframe/util/deprecationMessages.kt | 6 +++ .../dataframe/api/UtilFunctionsTest.kt | 46 +++++++++++++++- .../kotlinx/dataframe/samples/api/Analyze.kt | 6 +-- .../kotlinx/dataframe/samples/api/Modify.kt | 22 ++++---- .../testSets/person/DataFrameTests.kt | 7 +-- 10 files changed, 147 insertions(+), 25 deletions(-) rename tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/FunctionsStdTest.kt => core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/UtilFunctionsTest.kt (60%) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt index 9aa6ae504f..116e3bafb7 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt @@ -12,23 +12,65 @@ import org.jetbrains.kotlinx.dataframe.nrow import org.jetbrains.kotlinx.dataframe.type /** - * Creates a [FrameColumn] from [this] by splitting the dataframe into - * smaller ones, with their number of rows at most [size]. + * Splits this [DataFrame] into consecutive chunks of up to [size] rows + * and returns them as a [FrameColumn]. + * + * Each element of the resulting [FrameColumn] is a sub-[DataFrame] containing + * at most [size] rows. Chunks are formed in order, without overlap. + * + * @param [size] Maximum number of rows in each chunk. Must be positive. + * @param [name] Name of the resulting [FrameColumn]. Defaults to `"groups"`. + * @return A [FrameColumn] where each value is a sub-[DataFrame] chunk. */ public fun DataFrame.chunked(size: Int, name: String = "groups"): FrameColumn = - chunked( - startIndices = 0 until nrow step size, - name = name, - ) + chunked(startIndices = 0 until nrow step size, name = name) +/** + * Splits this [DataFrame] into chunks starting at the given [startIndices]. + * + * The chunk starting at index `i` ends right before the next start index + * or the end of the [DataFrame]. + * Use this overload when you need custom chunk boundaries. + * + * @param [startIndices] Zero-based row indices where each new chunk starts. + * @param [name] Name of the resulting [FrameColumn]. Defaults to `"groups"`. + * @return A [FrameColumn] where each value is a sub-[DataFrame] chunk. + */ public fun DataFrame.chunked(startIndices: Iterable, name: String = "groups"): FrameColumn = chunkedImpl(startIndices, name) +/** + * Groups consecutive values of this [DataColumn] into lists of at most [size] elements. + * + * This works like [kotlin.collections.chunked], but returns a [ValueColumn] instead of a [List]. + * + * @param [size] Maximum number of elements in each chunk. Must be positive. + * @return A [ValueColumn] whose elements are lists representing chunks of the original values. + */ public fun DataColumn.chunked(size: Int): ValueColumn> { val values = toList().chunked(size) return DataColumn.createValueColumn(name(), values, getListType(type)) } +/** + * Splits this [ColumnGroup] into a [FrameColumn] of sub-dataframes + * with up to [size] rows in each chunk. + * + * The resulting [FrameColumn] inherits the name of this group. + * + * @param [size] Maximum number of rows in each sub-dataframe. Must be positive. + * @return A [FrameColumn] where each value is a sub-[DataFrame] chunk. + */ public fun ColumnGroup.chunked(size: Int): FrameColumn = chunked(size, name()) +/** + * Splits a [DataColumn] of [DataRow] into a [FrameColumn] of sub-dataframes + * with up to [size] rows in each chunk. + * + * This is a convenience overload that treats a [DataColumn] of rows + * as if it were a [ColumnGroup] (see [ColumnGroup.chunked]). + * + * @param [size] Maximum number of rows in each sub-dataframe. Must be positive. + * @return A [FrameColumn] where each value is a sub-[DataFrame] chunk. + */ public fun DataColumn>.chunked(size: Int): FrameColumn = asColumnGroup().chunked(size) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/length.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/length.kt index 3c9eb65afd..57b1a3cbcb 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/length.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/length.kt @@ -2,9 +2,12 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.StringCol +import org.jetbrains.kotlinx.dataframe.util.LENGTH_REPLACE +import org.jetbrains.kotlinx.dataframe.util.MESSAGE_SHORTCUT // region StringCol +@Deprecated(MESSAGE_SHORTCUT, ReplaceWith(LENGTH_REPLACE), DeprecationLevel.WARNING) public fun StringCol.length(): DataColumn = map { it?.length ?: 0 } // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/lowercase.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/lowercase.kt index 1f10f59911..d6df0da0e5 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/lowercase.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/lowercase.kt @@ -1,9 +1,12 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.StringCol +import org.jetbrains.kotlinx.dataframe.util.LOWERCASE_REPLACE +import org.jetbrains.kotlinx.dataframe.util.MESSAGE_SHORTCUT // region StringCol +@Deprecated(MESSAGE_SHORTCUT, ReplaceWith(LOWERCASE_REPLACE), DeprecationLevel.WARNING) public fun StringCol.lowercase(): StringCol = map { it?.lowercase() } // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/shuffle.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/shuffle.kt index dc4f791871..499017607b 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/shuffle.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/shuffle.kt @@ -7,16 +7,38 @@ import kotlin.random.Random // region DataColumn +/** + * Returns a new [DataColumn] with the same values in random order using the provided [random] source. + * + * @param [random] Source of randomness to ensure reproducible shuffles when needed. + * @return A new [DataColumn] with values reordered randomly. + */ public fun DataColumn.shuffle(random: Random): DataColumn = get(indices.shuffled(random)) +/** + * Returns a new [DataColumn] with values in random order using the default randomness. + * + * @return A new [DataColumn] with values reordered randomly. + */ public fun DataColumn.shuffle(): DataColumn = get(indices.shuffled()) // endregion // region DataFrame +/** + * Returns a new [DataFrame] with rows reordered randomly using the provided [random] source. + * + * @param [random] Source of randomness to ensure reproducible shuffles when needed. + * @return A new [DataFrame] with rows in random order. + */ public fun DataFrame.shuffle(random: Random): DataFrame = getRows(indices.shuffled(random)) +/** + * Returns a new [DataFrame] with rows in random order using the default randomness. + * + * @return A new [DataFrame] with rows in random order. + */ public fun DataFrame.shuffle(): DataFrame = getRows(indices.shuffled()) // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/uppercase.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/uppercase.kt index e2595534af..f9c8d93b73 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/uppercase.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/uppercase.kt @@ -1,9 +1,12 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.StringCol +import org.jetbrains.kotlinx.dataframe.util.MESSAGE_SHORTCUT +import org.jetbrains.kotlinx.dataframe.util.UPPERCASE_REPLACE // region StringCol +@Deprecated(MESSAGE_SHORTCUT, ReplaceWith(UPPERCASE_REPLACE), DeprecationLevel.WARNING) public fun StringCol.uppercase(): StringCol = map { it?.uppercase() } // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt index 5ba002a1de..e003e59cf1 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt @@ -227,6 +227,12 @@ internal const val COL_TYPE_DEPRECATED_INSTANT = "kotlinx.datetime.Instant is deprecated in favor of kotlin.time.Instant. Migrate to kotlin.time.Instant and use Coltype.StdlibInstant at your own pace. $MESSAGE_1_1" internal const val COL_TYPE_DEPRECATED_INSTANT_REPLACE = "ColType.StdlibInstant" +internal const val MESSAGE_SHORTCUT = "This shortcut is deprecated. $MESSAGE_1_1" + +internal const val LENGTH_REPLACE = "this.map { it?.length ?: 0 }" +internal const val LOWERCASE_REPLACE = "this.map { it?.lowercase() }" +internal const val UPPERCASE_REPLACE = "this.map { it?.uppercase() }" + // endregion // region keep across releases diff --git a/tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/FunctionsStdTest.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/UtilFunctionsTest.kt similarity index 60% rename from tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/FunctionsStdTest.kt rename to core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/UtilFunctionsTest.kt index 3340082c4b..7fd29fdcf5 100644 --- a/tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/FunctionsStdTest.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/UtilFunctionsTest.kt @@ -3,10 +3,13 @@ package org.jetbrains.kotlinx.dataframe.api import io.kotest.matchers.shouldBe import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.samples.api.TestBase +import org.jetbrains.kotlinx.dataframe.size import org.junit.Test +import kotlin.collections.map +import kotlin.random.Random @Suppress("UNCHECKED_CAST") -class FunctionsStdTest : TestBase() { +class UtilFunctionsTest: TestBase() { @Test fun `DataColumn any`() { @@ -69,4 +72,45 @@ class FunctionsStdTest : TestBase() { val happyCount = df.asSequence().count { it["isHappy"] as Boolean } happyCount shouldBe 5 } + + @Test + fun `DataFrame chunked`() { + val groups = df.chunked(3) + groups.size shouldBe 3 + groups.name() shouldBe "groups" + groups[0].rowsCount() shouldBe 3 + groups[1].rowsCount() shouldBe 3 + groups[2].rowsCount() shouldBe 1 + } + + @Test + fun `DataColumn chunked`() { + val ageCol = df["age"] as DataColumn + val chunked = ageCol.chunked(4) + chunked.size shouldBe 2 + chunked.name() shouldBe "age" + // Check chunk contents + chunked[0] shouldBe listOf(15, 45, 20, 40) + chunked[1] shouldBe listOf(30, 20, 30) + } + + @Test + fun `DataFrame shuffle`() { + val rnd = Random(123) + val shuffledDf = df.shuffle(rnd) + // Compute expected order via indices.shuffled with same seed + val ages = (df["age"] as DataColumn).toList() + val expectedAges = ages.indices.shuffled(Random(123)).map { ages[it] } + shuffledDf.rows().map { it["age"] as Int } shouldBe expectedAges + } + + @Test + fun `DataColumn shuffle`() { + val rnd = Random(123) + val ageCol = df["age"] as DataColumn + val shuffled = ageCol.shuffle(rnd) + val values = ageCol.toList() + val expected = values.indices.shuffled(Random(123)).map { values[it] } + shuffled.toList() shouldBe expected + } } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Analyze.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Analyze.kt index a7779c3b92..8726e7d362 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Analyze.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Analyze.kt @@ -23,7 +23,7 @@ import org.jetbrains.kotlinx.dataframe.api.groupBy import org.jetbrains.kotlinx.dataframe.api.groupByOther import org.jetbrains.kotlinx.dataframe.api.head import org.jetbrains.kotlinx.dataframe.api.indices -import org.jetbrains.kotlinx.dataframe.api.length +import org.jetbrains.kotlinx.dataframe.api.map import org.jetbrains.kotlinx.dataframe.api.matches import org.jetbrains.kotlinx.dataframe.api.max import org.jetbrains.kotlinx.dataframe.api.maxBy @@ -644,7 +644,7 @@ class Analyze : TestBase() { df.groupBy { city }.sum("total weight") { weight } // sum of weights into column "total weight" df.groupBy { city }.count() // number of rows into column "count" df.groupBy { city } - .max { name.firstName.length() and name.lastName.length() } // maximum length of firstName or lastName into column "max" + .max { name.firstName.map { it.length } and name.lastName.map { it.length } } // maximum length of firstName or lastName into column "max" df.groupBy { city } .medianFor { age and weight } // median age into column "age", median weight into column "weight" df.groupBy { city } @@ -663,7 +663,7 @@ class Analyze : TestBase() { df.groupBy("city").sum("weight", name = "total weight") // sum of weights into column "total weight" df.groupBy("city").count() // number of rows into column "count" df.groupBy("city").max { - "name"["firstName"]().length() and "name"["lastName"]().length() + "name"["firstName"]().map { it.length } and "name"["lastName"]().map { it.length } } // maximum length of firstName or lastName into column "max" df.groupBy("city") .medianFor("age", "weight") // median age into column "age", median weight into column "weight" diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt index 1fb498d172..d23c33fdc1 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt @@ -10,9 +10,7 @@ import org.jetbrains.kotlinx.dataframe.annotations.DataSchema import org.jetbrains.kotlinx.dataframe.api.ParserOptions import org.jetbrains.kotlinx.dataframe.api.add import org.jetbrains.kotlinx.dataframe.api.after -import org.jetbrains.kotlinx.dataframe.api.and import org.jetbrains.kotlinx.dataframe.api.asColumn -import org.jetbrains.kotlinx.dataframe.api.to import org.jetbrains.kotlinx.dataframe.api.asFrame import org.jetbrains.kotlinx.dataframe.api.asGroupBy import org.jetbrains.kotlinx.dataframe.api.at @@ -51,8 +49,6 @@ import org.jetbrains.kotlinx.dataframe.api.intoList import org.jetbrains.kotlinx.dataframe.api.intoRows import org.jetbrains.kotlinx.dataframe.api.inward import org.jetbrains.kotlinx.dataframe.api.keysInto -import org.jetbrains.kotlinx.dataframe.api.length -import org.jetbrains.kotlinx.dataframe.api.lowercase import org.jetbrains.kotlinx.dataframe.api.map import org.jetbrains.kotlinx.dataframe.api.mapKeys import org.jetbrains.kotlinx.dataframe.api.mapToColumn @@ -88,11 +84,12 @@ import org.jetbrains.kotlinx.dataframe.api.sortByDesc import org.jetbrains.kotlinx.dataframe.api.sortWith import org.jetbrains.kotlinx.dataframe.api.split import org.jetbrains.kotlinx.dataframe.api.sum +import org.jetbrains.kotlinx.dataframe.api.to import org.jetbrains.kotlinx.dataframe.api.toColumn import org.jetbrains.kotlinx.dataframe.api.toFloat -import org.jetbrains.kotlinx.dataframe.api.toStart import org.jetbrains.kotlinx.dataframe.api.toMap import org.jetbrains.kotlinx.dataframe.api.toPath +import org.jetbrains.kotlinx.dataframe.api.toStart import org.jetbrains.kotlinx.dataframe.api.toTop import org.jetbrains.kotlinx.dataframe.api.under import org.jetbrains.kotlinx.dataframe.api.unfold @@ -115,7 +112,8 @@ import org.junit.Ignore import org.junit.Test import java.net.URL import java.time.format.DateTimeFormatter -import java.util.* +import java.util.Locale +import java.util.Random import java.util.stream.Collectors @Suppress("ktlint:standard:chain-method-continuation", "ktlint:standard:argument-list-wrapping") @@ -197,7 +195,7 @@ class Modify : TestBase() { // SampleStart df.convert { age }.to() df.convert { colsOf() }.to() - df.convert { name.firstName and name.lastName }.asColumn { it.length() } + df.convert { name.firstName and name.lastName }.asColumn { col -> col.map { it.length } } df.convert { weight }.toFloat() // SampleEnd } @@ -287,7 +285,7 @@ class Modify : TestBase() { fun replace() { // SampleStart df.replace { name }.with { name.firstName } - df.replace { colsOf() }.with { it.lowercase() } + df.replace { colsOf() }.with { col -> col.map { it?.lowercase() } } df.replace { age }.with { 2021 - age named "year" } // SampleEnd } @@ -923,7 +921,7 @@ class Modify : TestBase() { "year of birth" from 2021 - age age gt 18 into "is adult" "details" { - name.lastName.length() into "last name length" + name.lastName.map { it.length } into "last name length" "full name" from { name.firstName + " " + name.lastName } } } @@ -938,7 +936,7 @@ class Modify : TestBase() { "year of birth" from 2021 - "age"() "age"() gt 18 into "is adult" "details" { - "name"["lastName"]().length() into "last name length" + "name"["lastName"]().map { it.length } into "last name length" "full name" from { "name"["firstName"]() + " " + "name"["lastName"]() } } } @@ -992,7 +990,7 @@ class Modify : TestBase() { df.mapToFrame { "year of birth" from 2021 - age age gt 18 into "is adult" - name.lastName.length() into "last name length" + name.lastName.map { it.length } into "last name length" "full name" from { name.firstName + " " + name.lastName } +city } @@ -1006,7 +1004,7 @@ class Modify : TestBase() { df.mapToFrame { "year of birth" from 2021 - "age"() "age"() gt 18 into "is adult" - "name"["lastName"]().length() into "last name length" + "name"["lastName"]().map { it.length } into "last name length" "full name" from { "name"["firstName"]() + " " + "name"["lastName"]() } +"city" } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt index 435018c912..7b94c6fcbf 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt @@ -88,7 +88,6 @@ import org.jetbrains.kotlinx.dataframe.api.isNumber import org.jetbrains.kotlinx.dataframe.api.keysInto import org.jetbrains.kotlinx.dataframe.api.last import org.jetbrains.kotlinx.dataframe.api.leftJoin -import org.jetbrains.kotlinx.dataframe.api.lowercase import org.jetbrains.kotlinx.dataframe.api.map import org.jetbrains.kotlinx.dataframe.api.mapToFrame import org.jetbrains.kotlinx.dataframe.api.match @@ -1124,7 +1123,7 @@ class DataFrameTests : BaseTest() { @Test fun `pivot matches with conversion`() { val filtered = typed.dropNulls { city } - val res = filtered.pivot(inward = false) { city.lowercase() }.groupBy { name and age }.matches() + val res = filtered.pivot(inward = false) { city.map { it?.lowercase() } }.groupBy { name and age }.matches() val cities = filtered.city.toList().map { it!!.lowercase() } val gathered = res.gather { colsOf { cities.contains(it.name()) } }.where { it }.keysInto("city") @@ -2061,7 +2060,9 @@ class DataFrameTests : BaseTest() { typed .groupBy { name.map { it.lowercase() } }.toDataFrame() .name.values() shouldBe - typed.name.distinct().lowercase().values() + run { + typed.name.distinct().map { it.lowercase() }.values() + } } @Test From ecc5b0ae3891e709b8c4b13c2fea2af8b58e61c7 Mon Sep 17 00:00:00 2001 From: "andrei.kislitsyn" Date: Sat, 23 Aug 2025 21:55:59 +0400 Subject: [PATCH 4/8] ktlint format --- .../org/jetbrains/kotlinx/dataframe/api/UtilFunctionsTest.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/UtilFunctionsTest.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/UtilFunctionsTest.kt index 7fd29fdcf5..a29c37dffd 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/UtilFunctionsTest.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/UtilFunctionsTest.kt @@ -9,7 +9,7 @@ import kotlin.collections.map import kotlin.random.Random @Suppress("UNCHECKED_CAST") -class UtilFunctionsTest: TestBase() { +class UtilFunctionsTest : TestBase() { @Test fun `DataColumn any`() { From 5ce2a7d9293085ece52d3f7c5cf0bac7093fd9d0 Mon Sep 17 00:00:00 2001 From: "andrei.kislitsyn" Date: Tue, 26 Aug 2025 14:13:00 +0400 Subject: [PATCH 5/8] deprecate ColumnReferenceApi --- .../dataframe/api/ColumnReferenceApi.kt | 11 +++++++++++ .../jetbrains/kotlinx/dataframe/api/gather.kt | 4 ++-- .../kotlinx/dataframe/api/reverse.kt | 2 +- .../kotlinx/dataframe/samples/api/Create.kt | 6 +++--- .../testSets/person/DataFrameTests.kt | 19 ------------------- 5 files changed, 17 insertions(+), 25 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnReferenceApi.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnReferenceApi.kt index 40c90f36d3..3b79e04a93 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnReferenceApi.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnReferenceApi.kt @@ -4,26 +4,37 @@ import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.columns.ColumnReference import org.jetbrains.kotlinx.dataframe.columns.ValueColumn import org.jetbrains.kotlinx.dataframe.impl.asList +import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API import kotlin.reflect.typeOf internal val ColumnReference<*>.name: String get() = name() +@Suppress("DEPRECATION_ERROR") +@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR) public inline fun ColumnReference.withValues(vararg values: T): ValueColumn = withValues(values.asIterable()) +@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR) public inline fun ColumnReference.withValues(values: Iterable): ValueColumn = DataColumn.createValueColumn(name(), values.asList(), typeOf()) +@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR) public infix fun > ColumnReference.gt(value: C): ColumnReference = map { it > value } +@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR) public infix fun > ColumnReference.lt(value: C): ColumnReference = map { it < value } +@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR) public infix fun ColumnReference.eq(value: C): ColumnReference = map { it == value } +@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR) public infix fun ColumnReference.neq(value: C): ColumnReference = map { it != value } +@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR) public fun ColumnReference.length(): ColumnReference = map { it?.length ?: 0 } +@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR) public fun ColumnReference.lowercase(): ColumnReference = map { it?.lowercase() } +@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR) public fun ColumnReference.uppercase(): ColumnReference = map { it?.uppercase() } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt index d9594d0878..59885d4b92 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt @@ -106,8 +106,8 @@ class GatherTests { val dataRows = cols.map { it[0] } val newDf = listOf( - name.withValues(List(cols.size) { name[0] }), - mode.withValues(cols.map { it.name() }), + List(cols.size) { name[0] }.toColumn("name"), + cols.map { it.name() }.toColumn("mode"), dataRows.map { it.getValueOrNull("c1") }.toColumn("c1"), dataRows.map { it.getValueOrNull("c2") }.toColumn("c2"), dataRows.map { it.getValueOrNull("c3") }.toColumn("c3"), diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/reverse.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/reverse.kt index 39f351db9f..227584b6f8 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/reverse.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/reverse.kt @@ -14,7 +14,7 @@ class ReverseTests { @Test fun column() { val col by columnOf(1, 2, 3) - col.reverse() shouldBe col.withValues(listOf(3, 2, 1)) + col.reverse() shouldBe listOf(3, 2, 1).toColumn("col") } @Test diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Create.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Create.kt index 2335cde5b0..9dfa164049 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Create.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Create.kt @@ -25,7 +25,6 @@ import org.jetbrains.kotlinx.dataframe.api.toColumn import org.jetbrains.kotlinx.dataframe.api.toColumnOf import org.jetbrains.kotlinx.dataframe.api.toDataFrame import org.jetbrains.kotlinx.dataframe.api.value -import org.jetbrains.kotlinx.dataframe.api.withValues import org.jetbrains.kotlinx.dataframe.columns.ColumnKind import org.jetbrains.kotlinx.dataframe.explainer.TransformDataFrameExpressions import org.jetbrains.kotlinx.dataframe.kind @@ -79,7 +78,8 @@ class Create : TestBase() { // SampleEnd } - @Test + // Not used anymore + /*@Test @TransformDataFrameExpressions fun columnAccessorToColumn() { // SampleStart @@ -89,7 +89,7 @@ class Create : TestBase() { // SampleEnd ageCol2.size() shouldBe 10 - } + }*/ @Test @TransformDataFrameExpressions diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt index 7b94c6fcbf..68eba9ec32 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt @@ -158,7 +158,6 @@ import org.jetbrains.kotlinx.dataframe.api.valuesNotNull import org.jetbrains.kotlinx.dataframe.api.where import org.jetbrains.kotlinx.dataframe.api.with import org.jetbrains.kotlinx.dataframe.api.withNull -import org.jetbrains.kotlinx.dataframe.api.withValues import org.jetbrains.kotlinx.dataframe.api.withZero import org.jetbrains.kotlinx.dataframe.api.xs import org.jetbrains.kotlinx.dataframe.columns.ColumnKind @@ -247,24 +246,6 @@ class DataFrameTests : BaseTest() { val d = dataFrameOf(a, b) } - @Test - fun `create column reference`() { - val name by column() - val col = name.withValues("Alice", "Bob") - val df = col.toDataFrame() - df.nrow shouldBe 2 - df.columnNames() shouldBe listOf("name") - } - - @Test - fun `add values to column reference`() { - val name by column() - val values = listOf("Alice", "Bob") - val col1 = name.withValues(values) - val col2 = values.toColumn(name) - col1 shouldBe col2 - } - @Test fun `create from map`() { val data = mapOf("name" to listOf("Alice", "Bob"), "age" to listOf(15, null)) From 5ec6fd21803ce143803fb578cb0b3639e3014a34 Mon Sep 17 00:00:00 2001 From: "andrei.kislitsyn" Date: Tue, 26 Aug 2025 14:37:56 +0400 Subject: [PATCH 6/8] deprecate tail --- .../main/kotlin/org/jetbrains/kotlinx/dataframe/api/tail.kt | 3 +++ .../jetbrains/kotlinx/dataframe/util/deprecationMessages.kt | 3 +++ 2 files changed, 6 insertions(+) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/tail.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/tail.kt index 1228924f45..c2b652e977 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/tail.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/tail.kt @@ -1,9 +1,12 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.util.TAIL +import org.jetbrains.kotlinx.dataframe.util.TAIL_REPLACE // region DataFrame +@Deprecated(TAIL, ReplaceWith(TAIL_REPLACE), DeprecationLevel.ERROR) public fun DataFrame.tail(numRows: Int = 5): DataFrame = takeLast(numRows) // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt index e003e59cf1..7d1ff25477 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt @@ -137,6 +137,9 @@ internal const val COL_TYPE_INSTANT = "kotlinx.datetime.Instant is deprecated in favor of kotlin.time.Instant. Either migrate to kotlin.time.Instant and use ColType.StdlibInstant or use ColType.DeprecatedInstant. $MESSAGE_1_0 and migrated to kotlin.time.Instant in 1.1." internal const val COL_TYPE_INSTANT_REPLACE = "ColType.DeprecatedInstant" +internal const val TAIL = "This function will be removed in favor of `takeLast()`. $MESSAGE_1_0" +internal const val TAIL_REPLACE = "this.takeLast(numRows)" + // endregion // region WARNING in 1.0, ERROR in 1.1 From c11d7704c09aeb320074cfe34192da51eb13209d Mon Sep 17 00:00:00 2001 From: "andrei.kislitsyn" Date: Wed, 27 Aug 2025 13:16:59 +0400 Subject: [PATCH 7/8] wine notebook update tail -> takeLast --- examples/notebooks/dev/wine/WineNetWIthKotlinDL.ipynb | 6 ++---- examples/notebooks/wine/WineNetWIthKotlinDL.ipynb | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/examples/notebooks/dev/wine/WineNetWIthKotlinDL.ipynb b/examples/notebooks/dev/wine/WineNetWIthKotlinDL.ipynb index 8bab36888d..7d154e292f 100644 --- a/examples/notebooks/dev/wine/WineNetWIthKotlinDL.ipynb +++ b/examples/notebooks/dev/wine/WineNetWIthKotlinDL.ipynb @@ -2294,9 +2294,7 @@ "start_time": "2025-08-04T19:11:29.971573Z" } }, - "source": [ - "trainHist.epochHistory.toDataFrame().tail()" - ], + "source": "trainHist.epochHistory.toDataFrame().takeLast()", "outputs": [ { "data": { @@ -3251,7 +3249,7 @@ }, "source": [ "val trainHist = model2.fit(train, batchSize = 500, epochs = 2000)\n", - "trainHist.epochHistory.toDataFrame().tail()" + "trainHist.epochHistory.toDataFrame().takeLast()" ], "outputs": [ { diff --git a/examples/notebooks/wine/WineNetWIthKotlinDL.ipynb b/examples/notebooks/wine/WineNetWIthKotlinDL.ipynb index 9b9738c78b..4b809be639 100644 --- a/examples/notebooks/wine/WineNetWIthKotlinDL.ipynb +++ b/examples/notebooks/wine/WineNetWIthKotlinDL.ipynb @@ -2291,9 +2291,7 @@ "start_time": "2025-05-28T10:59:34.101283Z" } }, - "source": [ - "trainHist.epochHistory.toDataFrame().tail()" - ], + "source": "trainHist.epochHistory.toDataFrame().takeLast()", "outputs": [ { "data": { @@ -3248,7 +3246,7 @@ }, "source": [ "val trainHist = model2.fit(train, batchSize = 500, epochs = 2000)\n", - "trainHist.epochHistory.toDataFrame().tail()" + "trainHist.epochHistory.toDataFrame().takeLast()" ], "outputs": [ { From 86a46307ac27309b7289ecebb9073161c2040a69 Mon Sep 17 00:00:00 2001 From: "andrei.kislitsyn" Date: Wed, 27 Aug 2025 18:22:27 +0400 Subject: [PATCH 8/8] util functions fix --- .../org/jetbrains/kotlinx/dataframe/api/any.kt | 1 + .../org/jetbrains/kotlinx/dataframe/api/between.kt | 2 +- .../org/jetbrains/kotlinx/dataframe/api/tail.kt | 11 ++++++++--- .../kotlinx/dataframe/util/deprecationMessages.kt | 3 --- .../kotlinx/dataframe/samples/api/Create.kt | 13 ------------- .../kotlinx/dataframe/examples/plugin/Main.kt | 2 -- 6 files changed, 10 insertions(+), 22 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/any.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/any.kt index 0c021125f4..1f7902f5d2 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/any.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/any.kt @@ -41,6 +41,7 @@ public fun DataColumn.any(predicate: Predicate): Boolean = values.any( * @param predicate A [RowFilter] lambda that takes a [DataRow] (as both `this` and `it`) * and returns `true` if the row should be considered a match. * @return `true` if at least one row satisfies the [predicate], `false` otherwise. + * @see [DataFrame.all] */ public inline fun DataFrame.any(predicate: RowFilter): Boolean = rows().any { predicate(it, it) } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/between.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/between.kt index 6390a082bb..4280f67d3f 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/between.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/between.kt @@ -7,7 +7,7 @@ import org.jetbrains.kotlinx.dataframe.impl.between /** * Returns a [DataColumn] of [Boolean] values indicating whether each element - * lies between [left] and [right]. + * in this column lies between [left] and [right]. * * If [includeBoundaries] is `true` (default), values equal to [left] or [right] are also considered in range. * diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/tail.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/tail.kt index c2b652e977..c775970e10 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/tail.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/tail.kt @@ -1,12 +1,17 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.util.TAIL -import org.jetbrains.kotlinx.dataframe.util.TAIL_REPLACE // region DataFrame -@Deprecated(TAIL, ReplaceWith(TAIL_REPLACE), DeprecationLevel.ERROR) +/** + * Returns a DataFrame containing the last [numRows] rows. + * + * Equivalent to [takeLast]. + * + * @param numRows The number of rows to return from the end of the DataFrame. Defaults to 5. + * @return A DataFrame containing the last [numRows] rows. + */ public fun DataFrame.tail(numRows: Int = 5): DataFrame = takeLast(numRows) // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt index 4a7dcb4b88..21d5e44b40 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt @@ -139,9 +139,6 @@ internal const val COL_TYPE_INSTANT = "kotlinx.datetime.Instant is deprecated in favor of kotlin.time.Instant. Either migrate to kotlin.time.Instant and use ColType.StdlibInstant or use ColType.DeprecatedInstant. $MESSAGE_1_0 and migrated to kotlin.time.Instant in 1.1." internal const val COL_TYPE_INSTANT_REPLACE = "ColType.DeprecatedInstant" -internal const val TAIL = "This function will be removed in favor of `takeLast()`. $MESSAGE_1_0" -internal const val TAIL_REPLACE = "this.takeLast(numRows)" - // endregion // region WARNING in 1.0, ERROR in 1.1 diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Create.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Create.kt index 9dfa164049..860c48062e 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Create.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Create.kt @@ -78,19 +78,6 @@ class Create : TestBase() { // SampleEnd } - // Not used anymore - /*@Test - @TransformDataFrameExpressions - fun columnAccessorToColumn() { - // SampleStart - val age by column() - val ageCol1 = age.withValues(15, 20) - val ageCol2 = age.withValues(1..10) - // SampleEnd - - ageCol2.size() shouldBe 10 - }*/ - @Test @TransformDataFrameExpressions fun columnAccessorMap() { diff --git a/examples/kotlin-dataframe-plugin-example/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/plugin/Main.kt b/examples/kotlin-dataframe-plugin-example/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/plugin/Main.kt index 562d700db3..a1ed3145fa 100644 --- a/examples/kotlin-dataframe-plugin-example/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/plugin/Main.kt +++ b/examples/kotlin-dataframe-plugin-example/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/plugin/Main.kt @@ -6,11 +6,9 @@ import org.jetbrains.kotlinx.dataframe.api.add import org.jetbrains.kotlinx.dataframe.api.convert import org.jetbrains.kotlinx.dataframe.api.convertTo import org.jetbrains.kotlinx.dataframe.api.filter -import org.jetbrains.kotlinx.dataframe.api.insert import org.jetbrains.kotlinx.dataframe.api.into import org.jetbrains.kotlinx.dataframe.api.rename import org.jetbrains.kotlinx.dataframe.api.renameToCamelCase -import org.jetbrains.kotlinx.dataframe.api.under import org.jetbrains.kotlinx.dataframe.api.with import org.jetbrains.kotlinx.dataframe.io.readCsv import org.jetbrains.kotlinx.dataframe.io.writeCsv