Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,37 @@ import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
import org.jetbrains.kotlinx.dataframe.columns.ValueColumn
import org.jetbrains.kotlinx.dataframe.impl.asList
import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
import kotlin.reflect.typeOf

internal val ColumnReference<*>.name: String get() = name()

@Suppress("DEPRECATION_ERROR")
@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR)
public inline fun <reified T> ColumnReference<T>.withValues(vararg values: T): ValueColumn<T> =
withValues(values.asIterable())

@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR)
public inline fun <reified T> ColumnReference<T>.withValues(values: Iterable<T>): ValueColumn<T> =
DataColumn.createValueColumn(name(), values.asList(), typeOf<T>())

@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR)
public infix fun <C : Comparable<C>> ColumnReference<C>.gt(value: C): ColumnReference<Boolean> = map { it > value }

@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR)
public infix fun <C : Comparable<C>> ColumnReference<C>.lt(value: C): ColumnReference<Boolean> = map { it < value }

@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR)
public infix fun <C> ColumnReference<C>.eq(value: C): ColumnReference<Boolean> = map { it == value }

@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR)
public infix fun <C> ColumnReference<C>.neq(value: C): ColumnReference<Boolean> = map { it != value }

@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR)
public fun ColumnReference<String?>.length(): ColumnReference<Int> = map { it?.length ?: 0 }

@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR)
public fun ColumnReference<String?>.lowercase(): ColumnReference<String?> = map { it?.lowercase() }

@Deprecated(DEPRECATED_ACCESS_API, level = DeprecationLevel.ERROR)
public fun ColumnReference<String?>.uppercase(): ColumnReference<String?> = map { it?.uppercase() }
29 changes: 29 additions & 0 deletions core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/any.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,47 @@ package org.jetbrains.kotlinx.dataframe.api

import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.Predicate
import org.jetbrains.kotlinx.dataframe.RowFilter
import org.jetbrains.kotlinx.dataframe.columns.values

// region DataColumn

/**
* Returns `true` if at least one element in this [DataColumn] satisfies the given [predicate].
*
* This is a convenience alias that delegates to [Iterable.any] on the column's [values].
*
* @param predicate A lambda function that takes a value from the column
* and returns `true` if it matches the condition.
* @return `true` if at least one element matches the [predicate], `false` otherwise.
* @see [DataColumn.all]
* @see [DataColumn.filter]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think filter and count are very relevant, only all

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why? IMHO these functions are really related.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since they all operate on column values with a Boolean condition.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree with Andrew here

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

well so does takeWhile, dropWhile and I can think of some more.

I would only group all {} and any {} since they both reduce to a single boolean:
Column<T>.function(predicate: (T) -> Boolean): Boolean
(none {} would be here too, if we had it... do we?)

* @see [DataColumn.count]
*/
public fun <T> DataColumn<T>.any(predicate: Predicate<T>): Boolean = values.any(predicate)

// endregion

// region DataFrame

/**
* Returns `true` if at least one row in this [DataFrame] satisfies the given [predicate].
*
* {@include [org.jetbrains.kotlinx.dataframe.documentation.RowFilterDescription]}
*
* ### Example
* ```kotlin
* // Check if there is at least one row where "age" is greater than 18
* val hasAdults = df.any { age > 18 }
* ```
*
* @param predicate A [RowFilter] lambda that takes a [DataRow] (as both `this` and `it`)
* and returns `true` if the row should be considered a match.
* @return `true` if at least one row satisfies the [predicate], `false` otherwise.
* @see [DataFrame.all]
*/
public inline fun <T> DataFrame<T>.any(predicate: RowFilter<T>): Boolean = rows().any { predicate(it, it) }

// endregion
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ import org.jetbrains.kotlinx.dataframe.DataColumn

// region DataColumn

/**
* Returns an [Iterable] over the values of this [DataColumn].
*
* @see [asSequence]
*/
public fun <T> DataColumn<T>.asIterable(): Iterable<T> = values()

// endregion
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,20 @@ import org.jetbrains.kotlinx.dataframe.DataRow

// region DataColumn

/**
* Returns a [Sequence] over the values of this [DataColumn].
*
* @see [asIterable]
*/
public fun <T> DataColumn<T>.asSequence(): Sequence<T> = asIterable().asSequence()

// endregion

// region DataFrame

/**
* Returns a [Sequence] of [DataRow] over this [DataFrame].
*/
public fun <T> DataFrame<T>.asSequence(): Sequence<DataRow<T>> = rows().asSequence()

// endregion
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,60 @@ package org.jetbrains.kotlinx.dataframe.api
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.RowExpression
import org.jetbrains.kotlinx.dataframe.documentation.ExtensionPropertiesAPIDocs

// region DataFrame

/**
* Builds a [Map] where each key is produced by applying [transform] to a row,
* and the value is the corresponding [DataRow].
*
* The [transform] is a [RowExpression] — a lambda that receives each [DataRow]
* both as `this` and `it` and is expected to return a key, allowing you to compute keys directly from row values.
* You can also use [extension properties][ExtensionPropertiesAPIDocs] for concise and type-safe access.
*
* If multiple rows produce the same key, the last row for that key is stored,
* consistent with Kotlin's [kotlin.collections.associateBy] behavior.
*
* See also:
* - [toMap] — converts a [DataFrame] into a [Map] by using column names as keys
* and their values as the corresponding map values.
*
* ### Example
* ```kotlin
* // Associate each row by the "id" column
* val map = df.associateBy { id }
* ```
*
* @param transform A [RowExpression] that returns a key for each row.
* @return A [Map] of keys to corresponding rows.
*/
public inline fun <T, V> DataFrame<T>.associateBy(transform: RowExpression<T, V>): Map<V, DataRow<T>> =
rows().associateBy { transform(it, it) }

/**
* Builds a [Map] from key-value [Pair]s produced by applying [transform] to each row.
*
* The [transform] is a [RowExpression] — a lambda that receives each [DataRow]
* both as `this` and `it` and is expected to return a pair, allowing you to generate [Pair]s of keys and values from row contents.
* You can also use [extension properties][ExtensionPropertiesAPIDocs] for concise and type-safe access.
*
* If multiple rows produce the same key, the last value for that key is stored,
* consistent with Kotlin's [kotlin.collections.associate] behavior.
*
* See also:
* - [toMap] — converts a [DataFrame] into a [Map] by using column names as keys
* and their values as the corresponding map values.
*
* ### Example
* ```kotlin
* // Associate rows into a map where key = id, value = name
* val map = df.associate { id to name }
* ```
*
* @param transform A [RowExpression] that returns a [Pair] of key and value for each row.
* @return A [Map] of keys to values.
*/
public inline fun <T, K, V> DataFrame<T>.associate(transform: RowExpression<T, Pair<K, V>>): Map<K, V> =
rows().associate { transform(it, it) }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,18 @@ import org.jetbrains.kotlinx.dataframe.impl.between

// region DataColumn

/**
* Returns a [DataColumn] of [Boolean] values indicating whether each element
* in this column lies between [left] and [right].
*
* If [includeBoundaries] is `true` (default), values equal to [left] or [right] are also considered in range.
*
* @param left The lower boundary of the range.
* @param right The upper boundary of the range.
* @param includeBoundaries Whether to include [left] and [right] values in the range check. Defaults to `true`.
* @return A [DataColumn] of [Boolean] values where each element indicates if the corresponding
* value is within the specified range.
*/
public fun <T : Comparable<T>> DataColumn<T>.between(
left: T,
right: T,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,65 @@ import org.jetbrains.kotlinx.dataframe.nrow
import org.jetbrains.kotlinx.dataframe.type

/**
* Creates a [FrameColumn] from [this] by splitting the dataframe into
* smaller ones, with their number of rows at most [size].
* Splits this [DataFrame] into consecutive chunks of up to [size] rows
* and returns them as a [FrameColumn].
*
* Each element of the resulting [FrameColumn] is a sub-[DataFrame] containing
* at most [size] rows. Chunks are formed in order, without overlap.
*
* @param [size] Maximum number of rows in each chunk. Must be positive.
* @param [name] Name of the resulting [FrameColumn]. Defaults to `"groups"`.
* @return A [FrameColumn] where each value is a sub-[DataFrame] chunk.
*/
public fun <T> DataFrame<T>.chunked(size: Int, name: String = "groups"): FrameColumn<T> =
chunked(
startIndices = 0 until nrow step size,
name = name,
)
chunked(startIndices = 0 until nrow step size, name = name)

/**
* Splits this [DataFrame] into chunks starting at the given [startIndices].
*
* The chunk starting at index `i` ends right before the next start index
* or the end of the [DataFrame].
* Use this overload when you need custom chunk boundaries.
*
* @param [startIndices] Zero-based row indices where each new chunk starts.
* @param [name] Name of the resulting [FrameColumn]. Defaults to `"groups"`.
* @return A [FrameColumn] where each value is a sub-[DataFrame] chunk.
*/
public fun <T> DataFrame<T>.chunked(startIndices: Iterable<Int>, name: String = "groups"): FrameColumn<T> =
chunkedImpl(startIndices, name)

/**
* Groups consecutive values of this [DataColumn] into lists of at most [size] elements.
*
* This works like [kotlin.collections.chunked], but returns a [ValueColumn] instead of a [List].
*
* @param [size] Maximum number of elements in each chunk. Must be positive.
* @return A [ValueColumn] whose elements are lists representing chunks of the original values.
*/
public fun <T> DataColumn<T>.chunked(size: Int): ValueColumn<List<T>> {
val values = toList().chunked(size)
return DataColumn.createValueColumn(name(), values, getListType(type))
}

/**
* Splits this [ColumnGroup] into a [FrameColumn] of sub-dataframes
* with up to [size] rows in each chunk.
*
* The resulting [FrameColumn] inherits the name of this group.
*
* @param [size] Maximum number of rows in each sub-dataframe. Must be positive.
* @return A [FrameColumn] where each value is a sub-[DataFrame] chunk.
*/
public fun <T> ColumnGroup<T>.chunked(size: Int): FrameColumn<T> = chunked(size, name())

/**
* Splits a [DataColumn] of [DataRow] into a [FrameColumn] of sub-dataframes
* with up to [size] rows in each chunk.
*
* This is a convenience overload that treats a [DataColumn] of rows
* as if it were a [ColumnGroup] (see [ColumnGroup.chunked]).
*
* @param [size] Maximum number of rows in each sub-dataframe. Must be positive.
* @return A [FrameColumn] where each value is a sub-[DataFrame] chunk.
*/
public fun <T> DataColumn<DataRow<T>>.chunked(size: Int): FrameColumn<T> = asColumnGroup().chunked(size)
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@ package org.jetbrains.kotlinx.dataframe.api

import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.StringCol
import org.jetbrains.kotlinx.dataframe.util.LENGTH_REPLACE
import org.jetbrains.kotlinx.dataframe.util.MESSAGE_SHORTCUT

// region StringCol

@Deprecated(MESSAGE_SHORTCUT, ReplaceWith(LENGTH_REPLACE), DeprecationLevel.WARNING)
public fun StringCol.length(): DataColumn<Int> = map { it?.length ?: 0 }

// endregion
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
package org.jetbrains.kotlinx.dataframe.api

import org.jetbrains.kotlinx.dataframe.StringCol
import org.jetbrains.kotlinx.dataframe.util.LOWERCASE_REPLACE
import org.jetbrains.kotlinx.dataframe.util.MESSAGE_SHORTCUT

// region StringCol

@Deprecated(MESSAGE_SHORTCUT, ReplaceWith(LOWERCASE_REPLACE), DeprecationLevel.WARNING)
public fun StringCol.lowercase(): StringCol = map { it?.lowercase() }

// endregion
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,38 @@ import kotlin.random.Random

// region DataColumn

/**
* Returns a new [DataColumn] with the same values in random order using the provided [random] source.
*
* @param [random] Source of randomness to ensure reproducible shuffles when needed.
* @return A new [DataColumn] with values reordered randomly.
*/
public fun <T> DataColumn<T>.shuffle(random: Random): DataColumn<T> = get(indices.shuffled(random))

/**
* Returns a new [DataColumn] with values in random order using the default randomness.
*
* @return A new [DataColumn] with values reordered randomly.
*/
public fun <T> DataColumn<T>.shuffle(): DataColumn<T> = get(indices.shuffled())

// endregion

// region DataFrame

/**
* Returns a new [DataFrame] with rows reordered randomly using the provided [random] source.
*
* @param [random] Source of randomness to ensure reproducible shuffles when needed.
* @return A new [DataFrame] with rows in random order.
*/
public fun <T> DataFrame<T>.shuffle(random: Random): DataFrame<T> = getRows(indices.shuffled(random))

/**
* Returns a new [DataFrame] with rows in random order using the default randomness.
*
* @return A new [DataFrame] with rows in random order.
*/
public fun <T> DataFrame<T>.shuffle(): DataFrame<T> = getRows(indices.shuffled())

// endregion
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@ import org.jetbrains.kotlinx.dataframe.DataFrame

// region DataFrame

/**
* Returns a DataFrame containing the last [numRows] rows.
*
* Equivalent to [takeLast].
*
* @param numRows The number of rows to return from the end of the DataFrame. Defaults to 5.
* @return A DataFrame containing the last [numRows] rows.
*/
public fun <T> DataFrame<T>.tail(numRows: Int = 5): DataFrame<T> = takeLast(numRows)

// endregion
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
package org.jetbrains.kotlinx.dataframe.api

import org.jetbrains.kotlinx.dataframe.StringCol
import org.jetbrains.kotlinx.dataframe.util.MESSAGE_SHORTCUT
import org.jetbrains.kotlinx.dataframe.util.UPPERCASE_REPLACE

// region StringCol

@Deprecated(MESSAGE_SHORTCUT, ReplaceWith(UPPERCASE_REPLACE), DeprecationLevel.WARNING)
public fun StringCol.uppercase(): StringCol = map { it?.uppercase() }

// endregion
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@ import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.RowFilter

/**
* The [predicate] is a [RowFilter] — a lambda that receives each [DataRow] both as `this` and `it`,
* allowing you to define a [Boolean] condition using the row's values,
* including through [extension properties][ExtensionPropertiesAPIDocs] for convenient access.
* The [predicate] is a [RowFilter] — a lambda that receives each [DataRow] as both `this` and `it`
* and is expected to return a [Boolean] value.
*
* It allows you to define conditions using the row's values directly,
* including through [extension properties][ExtensionPropertiesAPIDocs] for convenient and type-safe access.
*/
internal interface RowFilterDescription
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,12 @@ internal const val COL_TYPE_DEPRECATED_INSTANT =
"kotlinx.datetime.Instant is deprecated in favor of kotlin.time.Instant. Migrate to kotlin.time.Instant and use Coltype.StdlibInstant at your own pace. $MESSAGE_1_1"
internal const val COL_TYPE_DEPRECATED_INSTANT_REPLACE = "ColType.StdlibInstant"

internal const val MESSAGE_SHORTCUT = "This shortcut is deprecated. $MESSAGE_1_1"

internal const val LENGTH_REPLACE = "this.map { it?.length ?: 0 }"
internal const val LOWERCASE_REPLACE = "this.map { it?.lowercase() }"
internal const val UPPERCASE_REPLACE = "this.map { it?.uppercase() }"

// endregion

// region keep across releases
Expand Down
Loading