Skip to content

Commit f915165

Browse files
Merge pull request #1412 from Kotlin/util_functions_fixes
Util functions fixes
2 parents 295fe51 + 84796f1 commit f915165

File tree

23 files changed

+351
-70
lines changed

23 files changed

+351
-70
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnReferenceApi.kt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,26 +4,37 @@ import org.jetbrains.kotlinx.dataframe.DataColumn
44
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
55
import org.jetbrains.kotlinx.dataframe.columns.ValueColumn
66
import org.jetbrains.kotlinx.dataframe.impl.asList
7+
import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
78
import kotlin.reflect.typeOf
89

910
internal val ColumnReference<*>.name: String get() = name()
1011

12+
@Suppress("DEPRECATION_ERROR")
13+
@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR)
1114
public inline fun <reified T> ColumnReference<T>.withValues(vararg values: T): ValueColumn<T> =
1215
withValues(values.asIterable())
1316

17+
@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR)
1418
public inline fun <reified T> ColumnReference<T>.withValues(values: Iterable<T>): ValueColumn<T> =
1519
DataColumn.createValueColumn(name(), values.asList(), typeOf<T>())
1620

21+
@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR)
1722
public infix fun <C : Comparable<C>> ColumnReference<C>.gt(value: C): ColumnReference<Boolean> = map { it > value }
1823

24+
@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR)
1925
public infix fun <C : Comparable<C>> ColumnReference<C>.lt(value: C): ColumnReference<Boolean> = map { it < value }
2026

27+
@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR)
2128
public infix fun <C> ColumnReference<C>.eq(value: C): ColumnReference<Boolean> = map { it == value }
2229

30+
@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR)
2331
public infix fun <C> ColumnReference<C>.neq(value: C): ColumnReference<Boolean> = map { it != value }
2432

33+
@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR)
2534
public fun ColumnReference<String?>.length(): ColumnReference<Int> = map { it?.length ?: 0 }
2635

36+
@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR)
2737
public fun ColumnReference<String?>.lowercase(): ColumnReference<String?> = map { it?.lowercase() }
2838

39+
@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR)
2940
public fun ColumnReference<String?>.uppercase(): ColumnReference<String?> = map { it?.uppercase() }

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/any.kt

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,47 @@ package org.jetbrains.kotlinx.dataframe.api
22

33
import org.jetbrains.kotlinx.dataframe.DataColumn
44
import org.jetbrains.kotlinx.dataframe.DataFrame
5+
import org.jetbrains.kotlinx.dataframe.DataRow
56
import org.jetbrains.kotlinx.dataframe.Predicate
67
import org.jetbrains.kotlinx.dataframe.RowFilter
78
import org.jetbrains.kotlinx.dataframe.columns.values
89

910
// region DataColumn
1011

12+
/**
13+
* Returns `true` if at least one element in this [DataColumn] satisfies the given [predicate].
14+
*
15+
* This is a convenience alias that delegates to [Iterable.any] on the column's [values].
16+
*
17+
* @param predicate A lambda function that takes a value from the column
18+
* and returns `true` if it matches the condition.
19+
* @return `true` if at least one element matches the [predicate], `false` otherwise.
20+
* @see [DataColumn.all]
21+
* @see [DataColumn.filter]
22+
* @see [DataColumn.count]
23+
*/
1124
public fun <T> DataColumn<T>.any(predicate: Predicate<T>): Boolean = values.any(predicate)
1225

1326
// endregion
1427

1528
// region DataFrame
1629

30+
/**
31+
* Returns `true` if at least one row in this [DataFrame] satisfies the given [predicate].
32+
*
33+
* {@include [org.jetbrains.kotlinx.dataframe.documentation.RowFilterDescription]}
34+
*
35+
* ### Example
36+
* ```kotlin
37+
* // Check if there is at least one row where "age" is greater than 18
38+
* val hasAdults = df.any { age > 18 }
39+
* ```
40+
*
41+
* @param predicate A [RowFilter] lambda that takes a [DataRow] (as both `this` and `it`)
42+
* and returns `true` if the row should be considered a match.
43+
* @return `true` if at least one row satisfies the [predicate], `false` otherwise.
44+
* @see [DataFrame.all]
45+
*/
1746
public inline fun <T> DataFrame<T>.any(predicate: RowFilter<T>): Boolean = rows().any { predicate(it, it) }
1847

1948
// endregion

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/asIterable.kt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@ import org.jetbrains.kotlinx.dataframe.DataColumn
44

55
// region DataColumn
66

7+
/**
8+
* Returns an [Iterable] over the values of this [DataColumn].
9+
*
10+
* @see [asSequence]
11+
*/
712
public fun <T> DataColumn<T>.asIterable(): Iterable<T> = values()
813

914
// endregion

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/asSequence.kt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,20 @@ import org.jetbrains.kotlinx.dataframe.DataRow
66

77
// region DataColumn
88

9+
/**
10+
* Returns a [Sequence] over the values of this [DataColumn].
11+
*
12+
* @see [asIterable]
13+
*/
914
public fun <T> DataColumn<T>.asSequence(): Sequence<T> = asIterable().asSequence()
1015

1116
// endregion
1217

1318
// region DataFrame
1419

20+
/**
21+
* Returns a [Sequence] of [DataRow] over this [DataFrame].
22+
*/
1523
public fun <T> DataFrame<T>.asSequence(): Sequence<DataRow<T>> = rows().asSequence()
1624

1725
// endregion

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/associate.kt

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,60 @@ package org.jetbrains.kotlinx.dataframe.api
33
import org.jetbrains.kotlinx.dataframe.DataFrame
44
import org.jetbrains.kotlinx.dataframe.DataRow
55
import org.jetbrains.kotlinx.dataframe.RowExpression
6+
import org.jetbrains.kotlinx.dataframe.documentation.ExtensionPropertiesAPIDocs
67

78
// region DataFrame
89

10+
/**
11+
* Builds a [Map] where each key is produced by applying [transform] to a row,
12+
* and the value is the corresponding [DataRow].
13+
*
14+
* The [transform] is a [RowExpression] — a lambda that receives each [DataRow]
15+
* both as `this` and `it` and is expected to return a key, allowing you to compute keys directly from row values.
16+
* You can also use [extension properties][ExtensionPropertiesAPIDocs] for concise and type-safe access.
17+
*
18+
* If multiple rows produce the same key, the last row for that key is stored,
19+
* consistent with Kotlin's [kotlin.collections.associateBy] behavior.
20+
*
21+
* See also:
22+
* - [toMap] — converts a [DataFrame] into a [Map] by using column names as keys
23+
* and their values as the corresponding map values.
24+
*
25+
* ### Example
26+
* ```kotlin
27+
* // Associate each row by the "id" column
28+
* val map = df.associateBy { id }
29+
* ```
30+
*
31+
* @param transform A [RowExpression] that returns a key for each row.
32+
* @return A [Map] of keys to corresponding rows.
33+
*/
934
public inline fun <T, V> DataFrame<T>.associateBy(transform: RowExpression<T, V>): Map<V, DataRow<T>> =
1035
rows().associateBy { transform(it, it) }
1136

37+
/**
38+
* Builds a [Map] from key-value [Pair]s produced by applying [transform] to each row.
39+
*
40+
* The [transform] is a [RowExpression] — a lambda that receives each [DataRow]
41+
* both as `this` and `it` and is expected to return a pair, allowing you to generate [Pair]s of keys and values from row contents.
42+
* You can also use [extension properties][ExtensionPropertiesAPIDocs] for concise and type-safe access.
43+
*
44+
* If multiple rows produce the same key, the last value for that key is stored,
45+
* consistent with Kotlin's [kotlin.collections.associate] behavior.
46+
*
47+
* See also:
48+
* - [toMap] — converts a [DataFrame] into a [Map] by using column names as keys
49+
* and their values as the corresponding map values.
50+
*
51+
* ### Example
52+
* ```kotlin
53+
* // Associate rows into a map where key = id, value = name
54+
* val map = df.associate { id to name }
55+
* ```
56+
*
57+
* @param transform A [RowExpression] that returns a [Pair] of key and value for each row.
58+
* @return A [Map] of keys to values.
59+
*/
1260
public inline fun <T, K, V> DataFrame<T>.associate(transform: RowExpression<T, Pair<K, V>>): Map<K, V> =
1361
rows().associate { transform(it, it) }
1462

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/between.kt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,18 @@ import org.jetbrains.kotlinx.dataframe.impl.between
55

66
// region DataColumn
77

8+
/**
9+
* Returns a [DataColumn] of [Boolean] values indicating whether each element
10+
* in this column lies between [left] and [right].
11+
*
12+
* If [includeBoundaries] is `true` (default), values equal to [left] or [right] are also considered in range.
13+
*
14+
* @param left The lower boundary of the range.
15+
* @param right The upper boundary of the range.
16+
* @param includeBoundaries Whether to include [left] and [right] values in the range check. Defaults to `true`.
17+
* @return A [DataColumn] of [Boolean] values where each element indicates if the corresponding
18+
* value is within the specified range.
19+
*/
820
public fun <T : Comparable<T>> DataColumn<T>.between(
921
left: T,
1022
right: T,

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,23 +12,65 @@ import org.jetbrains.kotlinx.dataframe.nrow
1212
import org.jetbrains.kotlinx.dataframe.type
1313

1414
/**
15-
* Creates a [FrameColumn] from [this] by splitting the dataframe into
16-
* smaller ones, with their number of rows at most [size].
15+
* Splits this [DataFrame] into consecutive chunks of up to [size] rows
16+
* and returns them as a [FrameColumn].
17+
*
18+
* Each element of the resulting [FrameColumn] is a sub-[DataFrame] containing
19+
* at most [size] rows. Chunks are formed in order, without overlap.
20+
*
21+
* @param [size] Maximum number of rows in each chunk. Must be positive.
22+
* @param [name] Name of the resulting [FrameColumn]. Defaults to `"groups"`.
23+
* @return A [FrameColumn] where each value is a sub-[DataFrame] chunk.
1724
*/
1825
public fun <T> DataFrame<T>.chunked(size: Int, name: String = "groups"): FrameColumn<T> =
19-
chunked(
20-
startIndices = 0 until nrow step size,
21-
name = name,
22-
)
26+
chunked(startIndices = 0 until nrow step size, name = name)
2327

28+
/**
29+
* Splits this [DataFrame] into chunks starting at the given [startIndices].
30+
*
31+
* The chunk starting at index `i` ends right before the next start index
32+
* or the end of the [DataFrame].
33+
* Use this overload when you need custom chunk boundaries.
34+
*
35+
* @param [startIndices] Zero-based row indices where each new chunk starts.
36+
* @param [name] Name of the resulting [FrameColumn]. Defaults to `"groups"`.
37+
* @return A [FrameColumn] where each value is a sub-[DataFrame] chunk.
38+
*/
2439
public fun <T> DataFrame<T>.chunked(startIndices: Iterable<Int>, name: String = "groups"): FrameColumn<T> =
2540
chunkedImpl(startIndices, name)
2641

42+
/**
43+
* Groups consecutive values of this [DataColumn] into lists of at most [size] elements.
44+
*
45+
* This works like [kotlin.collections.chunked], but returns a [ValueColumn] instead of a [List].
46+
*
47+
* @param [size] Maximum number of elements in each chunk. Must be positive.
48+
* @return A [ValueColumn] whose elements are lists representing chunks of the original values.
49+
*/
2750
public fun <T> DataColumn<T>.chunked(size: Int): ValueColumn<List<T>> {
2851
val values = toList().chunked(size)
2952
return DataColumn.createValueColumn(name(), values, getListType(type))
3053
}
3154

55+
/**
56+
* Splits this [ColumnGroup] into a [FrameColumn] of sub-dataframes
57+
* with up to [size] rows in each chunk.
58+
*
59+
* The resulting [FrameColumn] inherits the name of this group.
60+
*
61+
* @param [size] Maximum number of rows in each sub-dataframe. Must be positive.
62+
* @return A [FrameColumn] where each value is a sub-[DataFrame] chunk.
63+
*/
3264
public fun <T> ColumnGroup<T>.chunked(size: Int): FrameColumn<T> = chunked(size, name())
3365

66+
/**
67+
* Splits a [DataColumn] of [DataRow] into a [FrameColumn] of sub-dataframes
68+
* with up to [size] rows in each chunk.
69+
*
70+
* This is a convenience overload that treats a [DataColumn] of rows
71+
* as if it were a [ColumnGroup] (see [ColumnGroup.chunked]).
72+
*
73+
* @param [size] Maximum number of rows in each sub-dataframe. Must be positive.
74+
* @return A [FrameColumn] where each value is a sub-[DataFrame] chunk.
75+
*/
3476
public fun <T> DataColumn<DataRow<T>>.chunked(size: Int): FrameColumn<T> = asColumnGroup().chunked(size)

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/length.kt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,12 @@ package org.jetbrains.kotlinx.dataframe.api
22

33
import org.jetbrains.kotlinx.dataframe.DataColumn
44
import org.jetbrains.kotlinx.dataframe.StringCol
5+
import org.jetbrains.kotlinx.dataframe.util.LENGTH_REPLACE
6+
import org.jetbrains.kotlinx.dataframe.util.MESSAGE_SHORTCUT
57

68
// region StringCol
79

10+
@Deprecated(MESSAGE_SHORTCUT, ReplaceWith(LENGTH_REPLACE), DeprecationLevel.WARNING)
811
public fun StringCol.length(): DataColumn<Int> = map { it?.length ?: 0 }
912

1013
// endregion
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
package org.jetbrains.kotlinx.dataframe.api
22

33
import org.jetbrains.kotlinx.dataframe.StringCol
4+
import org.jetbrains.kotlinx.dataframe.util.LOWERCASE_REPLACE
5+
import org.jetbrains.kotlinx.dataframe.util.MESSAGE_SHORTCUT
46

57
// region StringCol
68

9+
@Deprecated(MESSAGE_SHORTCUT, ReplaceWith(LOWERCASE_REPLACE), DeprecationLevel.WARNING)
710
public fun StringCol.lowercase(): StringCol = map { it?.lowercase() }
811

912
// endregion

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/shuffle.kt

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,38 @@ import kotlin.random.Random
77

88
// region DataColumn
99

10+
/**
11+
* Returns a new [DataColumn] with the same values in random order using the provided [random] source.
12+
*
13+
* @param [random] Source of randomness to ensure reproducible shuffles when needed.
14+
* @return A new [DataColumn] with values reordered randomly.
15+
*/
1016
public fun <T> DataColumn<T>.shuffle(random: Random): DataColumn<T> = get(indices.shuffled(random))
1117

18+
/**
19+
* Returns a new [DataColumn] with values in random order using the default randomness.
20+
*
21+
* @return A new [DataColumn] with values reordered randomly.
22+
*/
1223
public fun <T> DataColumn<T>.shuffle(): DataColumn<T> = get(indices.shuffled())
1324

1425
// endregion
1526

1627
// region DataFrame
1728

29+
/**
30+
* Returns a new [DataFrame] with rows reordered randomly using the provided [random] source.
31+
*
32+
* @param [random] Source of randomness to ensure reproducible shuffles when needed.
33+
* @return A new [DataFrame] with rows in random order.
34+
*/
1835
public fun <T> DataFrame<T>.shuffle(random: Random): DataFrame<T> = getRows(indices.shuffled(random))
1936

37+
/**
38+
* Returns a new [DataFrame] with rows in random order using the default randomness.
39+
*
40+
* @return A new [DataFrame] with rows in random order.
41+
*/
2042
public fun <T> DataFrame<T>.shuffle(): DataFrame<T> = getRows(indices.shuffled())
2143

2244
// endregion

0 commit comments

Comments
 (0)