Kotlin · AndreiKingsley · Aug 30, 2025 · Aug 22, 2025 · Aug 22, 2025 · Aug 23, 2025
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnReferenceApi.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnReferenceApi.kt
@@ -4,26 +4,37 @@ import org.jetbrains.kotlinx.dataframe.DataColumn
 import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
 import org.jetbrains.kotlinx.dataframe.columns.ValueColumn
 import org.jetbrains.kotlinx.dataframe.impl.asList
+import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
 import kotlin.reflect.typeOf
 
 internal val ColumnReference<*>.name: String get() = name()
 
+@Suppress("DEPRECATION_ERROR")
+@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR)
 public inline fun <reified T> ColumnReference<T>.withValues(vararg values: T): ValueColumn<T> =
     withValues(values.asIterable())
 
+@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR)
 public inline fun <reified T> ColumnReference<T>.withValues(values: Iterable<T>): ValueColumn<T> =
     DataColumn.createValueColumn(name(), values.asList(), typeOf<T>())
 
+@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR)
 public infix fun <C : Comparable<C>> ColumnReference<C>.gt(value: C): ColumnReference<Boolean> = map { it > value }
 
+@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR)
 public infix fun <C : Comparable<C>> ColumnReference<C>.lt(value: C): ColumnReference<Boolean> = map { it < value }
 
+@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR)
 public infix fun <C> ColumnReference<C>.eq(value: C): ColumnReference<Boolean> = map { it == value }
 
+@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR)
 public infix fun <C> ColumnReference<C>.neq(value: C): ColumnReference<Boolean> = map { it != value }
 
+@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR)
 public fun ColumnReference<String?>.length(): ColumnReference<Int> = map { it?.length ?: 0 }
 
+@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR)
 public fun ColumnReference<String?>.lowercase(): ColumnReference<String?> = map { it?.lowercase() }
 
+@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR)
 public fun ColumnReference<String?>.uppercase(): ColumnReference<String?> = map { it?.uppercase() }
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/any.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/any.kt
@@ -2,18 +2,47 @@ package org.jetbrains.kotlinx.dataframe.api
 
 import org.jetbrains.kotlinx.dataframe.DataColumn
 import org.jetbrains.kotlinx.dataframe.DataFrame
+import org.jetbrains.kotlinx.dataframe.DataRow
 import org.jetbrains.kotlinx.dataframe.Predicate
 import org.jetbrains.kotlinx.dataframe.RowFilter
 import org.jetbrains.kotlinx.dataframe.columns.values
 
 // region DataColumn
 
+/**
+ * Returns `true` if at least one element in this [DataColumn] satisfies the given [predicate].
+ *
+ * This is a convenience alias that delegates to [Iterable.any] on the column's [values].
+ *
+ * @param predicate A lambda function that takes a value from the column
+ * and returns `true` if it matches the condition.
+ * @return `true` if at least one element matches the [predicate], `false` otherwise.
+ * @see [DataColumn.all]
+ * @see [DataColumn.filter]
+ * @see [DataColumn.count]
+ */
 public fun <T> DataColumn<T>.any(predicate: Predicate<T>): Boolean = values.any(predicate)
 
 // endregion
 
 // region DataFrame
 
+/**
+ * Returns `true` if at least one row in this [DataFrame] satisfies the given [predicate].
+ *
+ * {@include [org.jetbrains.kotlinx.dataframe.documentation.RowFilterDescription]}
+ *
+ * ### Example
+ * ```kotlin
+ * // Check if there is at least one row where "age" is greater than 18
+ * val hasAdults = df.any { age > 18 }
+ * ```
+ *
+ * @param predicate A [RowFilter] lambda that takes a [DataRow] (as both `this` and `it`)
+ * and returns `true` if the row should be considered a match.
+ * @return `true` if at least one row satisfies the [predicate], `false` otherwise.
+ * @see [DataFrame.all]
+ */
 public inline fun <T> DataFrame<T>.any(predicate: RowFilter<T>): Boolean = rows().any { predicate(it, it) }
 
 // endregion
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/asIterable.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/asIterable.kt
@@ -4,6 +4,11 @@ import org.jetbrains.kotlinx.dataframe.DataColumn
 
 // region DataColumn
 
+/**
+ * Returns an [Iterable] over the values of this [DataColumn].
+ *
+ * @see [asSequence]
+ */
 public fun <T> DataColumn<T>.asIterable(): Iterable<T> = values()
 
 // endregion
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/asSequence.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/asSequence.kt
@@ -6,12 +6,20 @@ import org.jetbrains.kotlinx.dataframe.DataRow
 
 // region DataColumn
 
+/**
+ * Returns a [Sequence] over the values of this [DataColumn].
+ *
+ * @see [asIterable]
+ */
 public fun <T> DataColumn<T>.asSequence(): Sequence<T> = asIterable().asSequence()
 
 // endregion
 
 // region DataFrame
 
+/**
+ * Returns a [Sequence] of [DataRow] over this [DataFrame].
+ */
 public fun <T> DataFrame<T>.asSequence(): Sequence<DataRow<T>> = rows().asSequence()
 
 // endregion
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/associate.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/associate.kt
@@ -3,12 +3,60 @@ package org.jetbrains.kotlinx.dataframe.api
 import org.jetbrains.kotlinx.dataframe.DataFrame
 import org.jetbrains.kotlinx.dataframe.DataRow
 import org.jetbrains.kotlinx.dataframe.RowExpression
+import org.jetbrains.kotlinx.dataframe.documentation.ExtensionPropertiesAPIDocs
 
 // region DataFrame
 
+/**
+ * Builds a [Map] where each key is produced by applying [transform] to a row,
+ * and the value is the corresponding [DataRow].
+ *
+ * The [transform] is a [RowExpression] — a lambda that receives each [DataRow]
+ * both as `this` and `it` and is expected to return a key, allowing you to compute keys directly from row values.
+ * You can also use [extension properties][ExtensionPropertiesAPIDocs] for concise and type-safe access.
+ *
+ * If multiple rows produce the same key, the last row for that key is stored,
+ * consistent with Kotlin's [kotlin.collections.associateBy] behavior.
+ *
+ * See also:
+ * - [toMap] — converts a [DataFrame] into a [Map] by using column names as keys
+ *   and their values as the corresponding map values.
+ *
+ * ### Example
+ * ```kotlin
+ * // Associate each row by the "id" column
+ * val map = df.associateBy { id }
+ * ```
+ *
+ * @param transform A [RowExpression] that returns a key for each row.
+ * @return A [Map] of keys to corresponding rows.
+ */
 public inline fun <T, V> DataFrame<T>.associateBy(transform: RowExpression<T, V>): Map<V, DataRow<T>> =
     rows().associateBy { transform(it, it) }
 
+/**
+ * Builds a [Map] from key-value [Pair]s produced by applying [transform] to each row.
+ *
+ * The [transform] is a [RowExpression] — a lambda that receives each [DataRow]
+ * both as `this` and `it` and is expected to return a pair, allowing you to generate [Pair]s of keys and values from row contents.
+ * You can also use [extension properties][ExtensionPropertiesAPIDocs] for concise and type-safe access.
+ *
+ * If multiple rows produce the same key, the last value for that key is stored,
+ * consistent with Kotlin's [kotlin.collections.associate] behavior.
+ *
+ * See also:
+ * - [toMap] — converts a [DataFrame] into a [Map] by using column names as keys
+ *   and their values as the corresponding map values.
+ *
+ * ### Example
+ * ```kotlin
+ * // Associate rows into a map where key = id, value = name
+ * val map = df.associate { id to name }
+ * ```
+ *
+ * @param transform A [RowExpression] that returns a [Pair] of key and value for each row.
+ * @return A [Map] of keys to values.
+ */
 public inline fun <T, K, V> DataFrame<T>.associate(transform: RowExpression<T, Pair<K, V>>): Map<K, V> =
     rows().associate { transform(it, it) }
 

diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/between.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/between.kt
@@ -5,6 +5,18 @@ import org.jetbrains.kotlinx.dataframe.impl.between
 
 // region DataColumn
 
+/**
+ * Returns a [DataColumn] of [Boolean] values indicating whether each element
+ * in this column lies between [left] and [right].
+ *
+ * If [includeBoundaries] is `true` (default), values equal to [left] or [right] are also considered in range.
+ *
+ * @param left The lower boundary of the range.
+ * @param right The upper boundary of the range.
+ * @param includeBoundaries Whether to include [left] and [right] values in the range check. Defaults to `true`.
+ * @return A [DataColumn] of [Boolean] values where each element indicates if the corresponding
+ *         value is within the specified range.
+ */
 public fun <T : Comparable<T>> DataColumn<T>.between(
     left: T,
     right: T,

diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt
@@ -12,23 +12,65 @@ import org.jetbrains.kotlinx.dataframe.nrow
 import org.jetbrains.kotlinx.dataframe.type
 
 /**
- * Creates a [FrameColumn] from [this] by splitting the dataframe into
- * smaller ones, with their number of rows at most [size].
+ * Splits this [DataFrame] into consecutive chunks of up to [size] rows
+ * and returns them as a [FrameColumn].
+ *
+ * Each element of the resulting [FrameColumn] is a sub-[DataFrame] containing
+ * at most [size] rows. Chunks are formed in order, without overlap.
+ *
+ * @param [size] Maximum number of rows in each chunk. Must be positive.
+ * @param [name] Name of the resulting [FrameColumn]. Defaults to `"groups"`.
+ * @return A [FrameColumn] where each value is a sub-[DataFrame] chunk.
  */
 public fun <T> DataFrame<T>.chunked(size: Int, name: String = "groups"): FrameColumn<T> =
-    chunked(
-        startIndices = 0 until nrow step size,
-        name = name,
-    )
+    chunked(startIndices = 0 until nrow step size, name = name)
 
+/**
+ * Splits this [DataFrame] into chunks starting at the given [startIndices].
+ *
+ * The chunk starting at index `i` ends right before the next start index
+ * or the end of the [DataFrame].
+ * Use this overload when you need custom chunk boundaries.
+ *
+ * @param [startIndices] Zero-based row indices where each new chunk starts.
+ * @param [name] Name of the resulting [FrameColumn]. Defaults to `"groups"`.
+ * @return A [FrameColumn] where each value is a sub-[DataFrame] chunk.
+ */
 public fun <T> DataFrame<T>.chunked(startIndices: Iterable<Int>, name: String = "groups"): FrameColumn<T> =
     chunkedImpl(startIndices, name)
 
+/**
+ * Groups consecutive values of this [DataColumn] into lists of at most [size] elements.
+ *
+ * This works like [kotlin.collections.chunked], but returns a [ValueColumn] instead of a [List].
+ *
+ * @param [size] Maximum number of elements in each chunk. Must be positive.
+ * @return A [ValueColumn] whose elements are lists representing chunks of the original values.
+ */
 public fun <T> DataColumn<T>.chunked(size: Int): ValueColumn<List<T>> {
     val values = toList().chunked(size)
     return DataColumn.createValueColumn(name(), values, getListType(type))
 }
 
+/**
+ * Splits this [ColumnGroup] into a [FrameColumn] of sub-dataframes
+ * with up to [size] rows in each chunk.
+ *
+ * The resulting [FrameColumn] inherits the name of this group.
+ *
+ * @param [size] Maximum number of rows in each sub-dataframe. Must be positive.
+ * @return A [FrameColumn] where each value is a sub-[DataFrame] chunk.
+ */
 public fun <T> ColumnGroup<T>.chunked(size: Int): FrameColumn<T> = chunked(size, name())
 
+/**
+ * Splits a [DataColumn] of [DataRow] into a [FrameColumn] of sub-dataframes
+ * with up to [size] rows in each chunk.
+ *
+ * This is a convenience overload that treats a [DataColumn] of rows
+ * as if it were a [ColumnGroup] (see [ColumnGroup.chunked]).
+ *
+ * @param [size] Maximum number of rows in each sub-dataframe. Must be positive.
+ * @return A [FrameColumn] where each value is a sub-[DataFrame] chunk.
+ */
 public fun <T> DataColumn<DataRow<T>>.chunked(size: Int): FrameColumn<T> = asColumnGroup().chunked(size)
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/length.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/length.kt
@@ -2,9 +2,12 @@ package org.jetbrains.kotlinx.dataframe.api
 
 import org.jetbrains.kotlinx.dataframe.DataColumn
 import org.jetbrains.kotlinx.dataframe.StringCol
+import org.jetbrains.kotlinx.dataframe.util.LENGTH_REPLACE
+import org.jetbrains.kotlinx.dataframe.util.MESSAGE_SHORTCUT
 
 // region StringCol
 
+@Deprecated(MESSAGE_SHORTCUT, ReplaceWith(LENGTH_REPLACE), DeprecationLevel.WARNING)
 public fun StringCol.length(): DataColumn<Int> = map { it?.length ?: 0 }
 
 // endregion
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/lowercase.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/lowercase.kt
@@ -1,9 +1,12 @@
 package org.jetbrains.kotlinx.dataframe.api
 
 import org.jetbrains.kotlinx.dataframe.StringCol
+import org.jetbrains.kotlinx.dataframe.util.LOWERCASE_REPLACE
+import org.jetbrains.kotlinx.dataframe.util.MESSAGE_SHORTCUT
 
 // region StringCol
 
+@Deprecated(MESSAGE_SHORTCUT, ReplaceWith(LOWERCASE_REPLACE), DeprecationLevel.WARNING)
 public fun StringCol.lowercase(): StringCol = map { it?.lowercase() }
 
 // endregion
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/shuffle.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/shuffle.kt
@@ -7,16 +7,38 @@ import kotlin.random.Random
 
 // region DataColumn
 
+/**
+ * Returns a new [DataColumn] with the same values in random order using the provided [random] source.
+ *
+ * @param [random] Source of randomness to ensure reproducible shuffles when needed.
+ * @return A new [DataColumn] with values reordered randomly.
+ */
 public fun <T> DataColumn<T>.shuffle(random: Random): DataColumn<T> = get(indices.shuffled(random))
 
+/**
+ * Returns a new [DataColumn] with values in random order using the default randomness.
+ *
+ * @return A new [DataColumn] with values reordered randomly.
+ */
 public fun <T> DataColumn<T>.shuffle(): DataColumn<T> = get(indices.shuffled())
 
 // endregion
 
 // region DataFrame
 
+/**
+ * Returns a new [DataFrame] with rows reordered randomly using the provided [random] source.
+ *
+ * @param [random] Source of randomness to ensure reproducible shuffles when needed.
+ * @return A new [DataFrame] with rows in random order.
+ */
 public fun <T> DataFrame<T>.shuffle(random: Random): DataFrame<T> = getRows(indices.shuffled(random))
 
+/**
+ * Returns a new [DataFrame] with rows in random order using the default randomness.
+ *
+ * @return A new [DataFrame] with rows in random order.
+ */
 public fun <T> DataFrame<T>.shuffle(): DataFrame<T> = getRows(indices.shuffled())
 
 // endregion
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/tail.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/tail.kt
@@ -4,6 +4,14 @@ import org.jetbrains.kotlinx.dataframe.DataFrame
 
 // region DataFrame
 
+/**
+ * Returns a DataFrame containing the last [numRows] rows.
+ *
+ * Equivalent to [takeLast].
+ *
+ * @param numRows The number of rows to return from the end of the DataFrame. Defaults to 5.
+ * @return A DataFrame containing the last [numRows] rows.
+ */
 public fun <T> DataFrame<T>.tail(numRows: Int = 5): DataFrame<T> = takeLast(numRows)
 
 // endregion
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/uppercase.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/uppercase.kt
@@ -1,9 +1,12 @@
 package org.jetbrains.kotlinx.dataframe.api
 
 import org.jetbrains.kotlinx.dataframe.StringCol
+import org.jetbrains.kotlinx.dataframe.util.MESSAGE_SHORTCUT
+import org.jetbrains.kotlinx.dataframe.util.UPPERCASE_REPLACE
 
 // region StringCol
 
+@Deprecated(MESSAGE_SHORTCUT, ReplaceWith(UPPERCASE_REPLACE), DeprecationLevel.WARNING)
 public fun StringCol.uppercase(): StringCol = map { it?.uppercase() }
 
 // endregion
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowFilterDescription.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowFilterDescription.kt
@@ -4,8 +4,10 @@ import org.jetbrains.kotlinx.dataframe.DataRow
 import org.jetbrains.kotlinx.dataframe.RowFilter
 
 /**
- * The [predicate] is a [RowFilter] — a lambda that receives each [DataRow] both as `this` and `it`,
- * allowing you to define a [Boolean] condition using the row's values,
- * including through [extension properties][ExtensionPropertiesAPIDocs] for convenient access.
+ * The [predicate] is a [RowFilter] — a lambda that receives each [DataRow] as both `this` and `it`
+ * and is expected to return a [Boolean] value.
+ *
+ * It allows you to define conditions using the row's values directly,
+ * including through [extension properties][ExtensionPropertiesAPIDocs] for convenient and type-safe access.
  */
 internal interface RowFilterDescription
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt
@@ -233,6 +233,12 @@ internal const val COL_TYPE_DEPRECATED_INSTANT =
     "kotlinx.datetime.Instant is deprecated in favor of kotlin.time.Instant. Migrate to kotlin.time.Instant and use Coltype.StdlibInstant at your own pace. $MESSAGE_1_1"
 internal const val COL_TYPE_DEPRECATED_INSTANT_REPLACE = "ColType.StdlibInstant"
 
+internal const val MESSAGE_SHORTCUT = "This shortcut is deprecated. $MESSAGE_1_1"
+
+internal const val LENGTH_REPLACE = "this.map { it?.length ?: 0 }"
+internal const val LOWERCASE_REPLACE = "this.map { it?.lowercase() }"
+internal const val UPPERCASE_REPLACE = "this.map { it?.uppercase() }"
+
 // endregion
 
 // region keep across releases