Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/drop.kt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import org.jetbrains.kotlinx.dataframe.columns.size
import org.jetbrains.kotlinx.dataframe.documentation.CommonTakeAndDropDocs
import org.jetbrains.kotlinx.dataframe.documentation.CommonTakeAndDropWhileDocs
import org.jetbrains.kotlinx.dataframe.documentation.TakeAndDropColumnsSelectionDslGrammar
import org.jetbrains.kotlinx.dataframe.impl.api.GroupByEntryImpl
import org.jetbrains.kotlinx.dataframe.impl.columns.transform
import org.jetbrains.kotlinx.dataframe.impl.columns.transformSingle
import org.jetbrains.kotlinx.dataframe.index
Expand Down Expand Up @@ -73,6 +74,42 @@ public inline fun <T> DataFrame<T>.dropWhile(predicate: RowFilter<T>): DataFrame

// endregion

// region GroupBy

public inline fun <T, G> GroupBy<T, G>.dropEntries(crossinline predicate: GroupByEntryFilter<T, G>): GroupBy<T, G> =
filterEntries { !predicate(it, it) }

/**
* Returns an adjusted [GroupBy] containing all entries except the first [n] entries.
*
* @throws IllegalArgumentException if [n] is negative.
*/
public fun <T, G> GroupBy<T, G>.dropEntries(n: Int): GroupBy<T, G> {
require(n >= 0) { "Requested rows count $n is less than zero." }
return toDataFrame().drop(n).asGroupBy(groups.name()).cast()
}

/**
* Returns an adjusted [GroupBy] containing all entries except the last [n] entries.
*
* @throws IllegalArgumentException if [n] is negative.
*/
public fun <T, G> GroupBy<T, G>.dropLastEntries(n: Int): GroupBy<T, G> {
require(n >= 0) { "Requested rows count $n is less than zero." }
return toDataFrame().drop(n).asGroupBy(groups.name()).cast()
}

/**
* Returns an adjusted [GroupBy] containing all entries except the first entries that satisfy the given [predicate].
*/
public inline fun <T, G> GroupBy<T, G>.dropEntriesWhile(predicate: GroupByEntryFilter<T, G>): GroupBy<T, G> =
toDataFrame().dropWhile {
val entry = GroupByEntryImpl(it, groups)
predicate(entry, entry)
}.asGroupBy(groups.name()).cast()

// endregion

// region ColumnsSelectionDsl

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package org.jetbrains.kotlinx.dataframe.api
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.RowExpression
import org.jetbrains.kotlinx.dataframe.columns.values

// region DataColumn

Expand All @@ -21,10 +20,16 @@ public inline fun <T> DataFrame<T>.forEach(action: RowExpression<T, Unit>): Unit

// region GroupBy

@Deprecated(
"Replaced with forEachEntry",
ReplaceWith("forEachEntry { val key = it\nval group = it.group()\nbody(key, group) }"),
)
public inline fun <T, G> GroupBy<T, G>.forEach(body: (GroupBy.Entry<T, G>) -> Unit): Unit =
keys.forEach { key ->
val group = groups[key.index()]
body(GroupBy.Entry(key, group))
}

public inline fun <T, G> GroupBy<T, G>.forEachEntry(body: (GroupByEntry<T, G>) -> Unit): Unit =
entriesAsSequence().forEach(body)
// endregion
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import org.jetbrains.kotlinx.dataframe.annotations.Refine
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
import org.jetbrains.kotlinx.dataframe.impl.aggregation.PivotImpl
import org.jetbrains.kotlinx.dataframe.impl.api.GroupByEntryImpl
import org.jetbrains.kotlinx.dataframe.impl.api.getPivotColumnPaths
import org.jetbrains.kotlinx.dataframe.impl.api.groupByImpl
import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
Expand Down Expand Up @@ -71,17 +72,46 @@ public fun <T> Pivot<T>.groupByOther(): PivotGroupBy<T> {

// endregion

@Deprecated("Replaced by GroupByEntrySelector")
public typealias GroupedRowSelector<T, G, R> = GroupedDataRow<T, G>.(GroupedDataRow<T, G>) -> R

@Deprecated("Replaced by GroupByEntryFilter")
public typealias GroupedRowFilter<T, G> = GroupedRowSelector<T, G, Boolean>

@Deprecated("Replaced by GroupByEntry")
public interface GroupedDataRow<out T, out G> : DataRow<T> {

public fun group(): DataFrame<G>
}

public val <T, G> GroupedDataRow<T, G>.group: DataFrame<G> get() = group()

/**
* Represents a single combination of keys+group in a [GroupBy] instance.
*
* `this` is a [DataRow] representing the keys of the current group, while the [group()][group]
* function points to the group that corresponds to the keys of this entry.
*
* For example:
* ```kotlin
* df.groupBy { name and age }.forEachEntry { // this|it: GroupByEntry<T, G> ->
* println("There are \${group().rowsCount()} instances of \$name")
* }
* ```
*/
public interface GroupByEntry<out T, out G> : DataRow<T> {

/** Returns the [DataFrame] representing the group that corresponds to the keys of this entry. */
public fun group(): DataFrame<G>

// TODO?
public fun keys(): Map<String, Any?> = this.toMap()
}

public typealias GroupByEntrySelector<T, G, R> = GroupByEntry<T, G>.(GroupByEntry<T, G>) -> R
public typealias GroupByEntryFilter<T, G> = GroupByEntrySelector<T, G, Boolean>

@Deprecated("Replaced by GroupByEntry")
public data class GroupWithKey<T, G>(val key: DataRow<T>, val group: DataFrame<G>)

public interface GroupBy<out T, out G> : Grouped<G> {
Expand All @@ -92,12 +122,16 @@ public interface GroupBy<out T, out G> : Grouped<G> {

public fun <R> updateGroups(transform: Selector<DataFrame<G>, DataFrame<R>>): GroupBy<T, R>

@Deprecated("Replaced by filterEntries", ReplaceWith("filterEntries(predicate)"))
public fun filter(predicate: GroupedRowFilter<T, G>): GroupBy<T, G>

public fun filterEntries(predicate: GroupByEntryFilter<T, G>): GroupBy<T, G>

@Refine
@Interpretable("GroupByToDataFrame")
public fun toDataFrame(groupedColumnName: String? = null): DataFrame<T>

@Deprecated("")
public data class Entry<T, G>(val key: DataRow<T>, val group: DataFrame<G>)

public companion object {
Expand All @@ -117,3 +151,25 @@ public class ReducedGroupBy<T, G>(
@PublishedApi
internal fun <T, G> GroupBy<T, G>.reduce(reducer: Selector<DataFrame<G>, DataRow<G>?>): ReducedGroupBy<T, G> =
ReducedGroupBy(this, reducer)

/**
* Returns the total number of rows of this [GroupBy]-[DataFrame].
*
* @return The number of rows in the [GroupBy]-[DataFrame].
*/
public fun GroupBy<*, *>.rowsCount(): Int = groups.size()

/**
* Retrieves all keys+group [entries][GroupByEntry] inside this [GroupBy]-[DataFrame].
* @see entriesAsSequence
*/
public fun <T, G> GroupBy<T, G>.entries(): List<GroupByEntry<T, G>> = entriesAsSequence().toList()

/**
* Retrieves all keys+group [entries][GroupByEntry] inside this [GroupBy]-[DataFrame] as a [Sequence].
* @see entries
*/
public fun <T, G> GroupBy<T, G>.entriesAsSequence(): Sequence<GroupByEntry<T, G>> =
keys.asSequence().map {
GroupByEntryImpl(it, groups)
}
26 changes: 26 additions & 0 deletions core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/map.kt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
import org.jetbrains.kotlinx.dataframe.annotations.Refine
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
import org.jetbrains.kotlinx.dataframe.impl.api.GroupByEntryImpl
import org.jetbrains.kotlinx.dataframe.impl.columnName
import org.jetbrains.kotlinx.dataframe.impl.columns.createComputedColumnReference
import org.jetbrains.kotlinx.dataframe.impl.columns.newColumn
Expand Down Expand Up @@ -141,17 +142,42 @@ public inline fun <T> DataFrame<T>.mapToFrame(body: AddDsl<T>.() -> Unit): AnyFr

// region GroupBy

@Deprecated(
"Replaced by mapEntries",
ReplaceWith("mapEntries { val key = it\nval group = it.group()\nbody(key, group) }"),
)
public inline fun <T, G, R> GroupBy<T, G>.map(body: Selector<GroupWithKey<T, G>, R>): List<R> =
keys.rows().mapIndexedNotNull { index, row ->
val group = groups[index]
val g = GroupWithKey(row, group)
body(g, g)
}

@Deprecated(
"Replaced by mapEntriesToRows",
ReplaceWith("mapEntriesToRows { val key = it\nval group = it.group()\nbody(key, group) }"),
)
public fun <T, G> GroupBy<T, G>.mapToRows(body: Selector<GroupWithKey<T, G>, DataRow<G>?>): DataFrame<G> =
map(body).concat()

@Deprecated(
"Replaced by mapEntriesToFrames",
ReplaceWith("mapEntriesToFrames { val key = it\nval group = it.group()\nbody(key, group) }"),
)
public fun <T, G> GroupBy<T, G>.mapToFrames(body: Selector<GroupWithKey<T, G>, DataFrame<G>>): FrameColumn<G> =
DataColumn.createFrameColumn(groups.name, map(body))

public inline fun <T, G, R> GroupBy<T, G>.mapEntries(body: GroupByEntrySelector<T, G, R>): List<R & Any> =
keys.rows().mapNotNull { row ->
val entry = GroupByEntryImpl(row, groups)
body(entry, entry)
}

public fun <T, G, R : Any> GroupBy<T, G>.mapEntriesToRows(body: GroupByEntrySelector<T, G, DataRow<R>?>): DataFrame<R> =
mapEntries(body).concat()

public fun <T, G, R : Any> GroupBy<T, G>.mapEntriesToFrames(
body: GroupByEntrySelector<T, G, DataFrame<R>>,
): FrameColumn<R> = DataColumn.createFrameColumn(groups.name, mapEntries(body))

// endregion
36 changes: 34 additions & 2 deletions core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/take.kt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.RowFilter
import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
import org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
import org.jetbrains.kotlinx.dataframe.columns.ColumnSet
import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath
Expand All @@ -16,9 +15,9 @@ import org.jetbrains.kotlinx.dataframe.columns.size
import org.jetbrains.kotlinx.dataframe.documentation.CommonTakeAndDropDocs
import org.jetbrains.kotlinx.dataframe.documentation.CommonTakeAndDropWhileDocs
import org.jetbrains.kotlinx.dataframe.documentation.TakeAndDropColumnsSelectionDslGrammar
import org.jetbrains.kotlinx.dataframe.impl.api.GroupByEntryImpl
import org.jetbrains.kotlinx.dataframe.impl.columns.transform
import org.jetbrains.kotlinx.dataframe.impl.columns.transformSingle
import org.jetbrains.kotlinx.dataframe.index
import org.jetbrains.kotlinx.dataframe.nrow
import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
import kotlin.reflect.KProperty
Expand Down Expand Up @@ -66,6 +65,39 @@ public inline fun <T> DataFrame<T>.takeWhile(predicate: RowFilter<T>): DataFrame

// endregion

// region GroupBy

/**
* Returns an adjusted [GroupBy] containing first [n] entries.
*
* @throws IllegalArgumentException if [n] is negative.
*/
public fun <T, G> GroupBy<T, G>.takeEntries(n: Int): GroupBy<T, G> {
require(n >= 0) { "Requested rows count $n is less than zero." }
return toDataFrame().take(n).asGroupBy(groups.name()).cast()
}

/**
* Returns an adjusted [GroupBy] containing last [n] entries.
*
* @throws IllegalArgumentException if [n] is negative.
*/
public fun <T, G> GroupBy<T, G>.takeLastEntries(n: Int): GroupBy<T, G> {
require(n >= 0) { "Requested rows count $n is less than zero." }
return toDataFrame().takeLast(n).asGroupBy(groups.name()).cast()
}

/**
* Returns an adjusted [GroupBy] containing the first entries that satisfy the given [predicate].
*/
public inline fun <T, G> GroupBy<T, G>.takeEntriesWhile(predicate: GroupByEntryFilter<T, G>): GroupBy<T, G> =
toDataFrame().takeWhile {
val entry = GroupByEntryImpl(it, groups)
predicate(entry, entry)
}.asGroupBy(groups.name()).cast()

// endregion

// region ColumnsSelectionDsl

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import org.jetbrains.kotlinx.dataframe.Selector
import org.jetbrains.kotlinx.dataframe.aggregation.AggregateGroupedBody
import org.jetbrains.kotlinx.dataframe.aggregation.NamedValue
import org.jetbrains.kotlinx.dataframe.api.GroupBy
import org.jetbrains.kotlinx.dataframe.api.GroupByEntryFilter
import org.jetbrains.kotlinx.dataframe.api.GroupedRowFilter
import org.jetbrains.kotlinx.dataframe.api.asGroupBy
import org.jetbrains.kotlinx.dataframe.api.concat
Expand All @@ -18,11 +19,13 @@ import org.jetbrains.kotlinx.dataframe.api.isColumnGroup
import org.jetbrains.kotlinx.dataframe.api.pathOf
import org.jetbrains.kotlinx.dataframe.api.remove
import org.jetbrains.kotlinx.dataframe.api.rename
import org.jetbrains.kotlinx.dataframe.api.with
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
import org.jetbrains.kotlinx.dataframe.impl.aggregation.AggregatableInternal
import org.jetbrains.kotlinx.dataframe.impl.aggregation.GroupByReceiverImpl
import org.jetbrains.kotlinx.dataframe.impl.api.AggregatedPivot
import org.jetbrains.kotlinx.dataframe.impl.api.ColumnToInsert
import org.jetbrains.kotlinx.dataframe.impl.api.GroupByEntryImpl
import org.jetbrains.kotlinx.dataframe.impl.api.GroupedDataRowImpl
import org.jetbrains.kotlinx.dataframe.impl.api.insertImpl
import org.jetbrains.kotlinx.dataframe.impl.api.removeImpl
Expand All @@ -41,29 +44,40 @@ internal class GroupByImpl<T, G>(
) : GroupBy<T, G>,
AggregatableInternal<G> {

override val keys by lazy { df.remove(groups) }
override val keys by lazy { df.remove { groups } }

override fun <R> updateGroups(transform: Selector<DataFrame<G>, DataFrame<R>>) =
df.convert(groups) { transform(it, it) }.asGroupBy(groups.name()) as GroupBy<T, R>
@Suppress("UNCHECKED_CAST")
override fun <R> updateGroups(transform: Selector<DataFrame<G>, DataFrame<R>>): GroupBy<T, R> =
df.convert { groups }.with { transform(it, it) }
.asGroupBy { frameCol<R>(groups.name()) }

override fun toString() = df.toString()

override fun remainingColumnsSelector(): ColumnsSelector<*, *> =
keyColumnsInGroups.toColumnSet().let { groupCols -> { all().except(groupCols) } }

@Deprecated("Replaced by filterEntries")
override fun filter(predicate: GroupedRowFilter<T, G>): GroupBy<T, G> {
val indices = (0 until df.nrow).filter {
val row = GroupedDataRowImpl(df.get(it), groups)
val row = GroupedDataRowImpl(df[it], groups)
predicate(row, row)
}
return df[indices].asGroupBy(groups)
return df[indices].asGroupBy { frameCol<G>(groups.name()) }
}

override fun filterEntries(predicate: GroupByEntryFilter<T, G>): GroupBy<T, G> {
val indices = (0 until df.nrow).filter {
val row = GroupByEntryImpl(df[it], groups)
predicate(row, row)
}
return df[indices].asGroupBy { frameCol<G>(groups.name()) }
}

override fun toDataFrame(groupedColumnName: String?): DataFrame<T> =
if (groupedColumnName == null || groupedColumnName == groups.name()) {
df
} else {
df.rename(groups).into(groupedColumnName)
df.rename { groups }.into(groupedColumnName)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import org.jetbrains.kotlinx.dataframe.ColumnsSelector
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.api.GroupBy
import org.jetbrains.kotlinx.dataframe.api.GroupByEntry
import org.jetbrains.kotlinx.dataframe.api.GroupedDataRow
import org.jetbrains.kotlinx.dataframe.api.cast
import org.jetbrains.kotlinx.dataframe.api.getColumnsWithPaths
Expand All @@ -13,14 +14,25 @@ import org.jetbrains.kotlinx.dataframe.api.pathOf
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
import org.jetbrains.kotlinx.dataframe.impl.GroupByImpl
import org.jetbrains.kotlinx.dataframe.impl.nameGenerator
import org.jetbrains.kotlinx.dataframe.io.renderToString

@Deprecated("Replaced by GroupByEntryImpl")
internal class GroupedDataRowImpl<T, G>(private val row: DataRow<T>, private val frameCol: FrameColumn<G>) :
GroupedDataRow<T, G>,
DataRow<T> by row {

override fun group() = frameCol[row.index()]
}

@PublishedApi
internal class GroupByEntryImpl<T, G>(private val keysRow: DataRow<T>, internal val allGroups: FrameColumn<G>) :
GroupByEntry<T, G>,
DataRow<T> by keysRow {
override fun group() = allGroups[keysRow.index()]

override fun toString(): String = "GroupByEntry(keysRow=${renderToString()}, group()=${group()})"
}

@PublishedApi
internal fun <T> DataFrame<T>.groupByImpl(moveToTop: Boolean, columns: ColumnsSelector<T, *>): GroupBy<T, T> {
val nameGenerator = nameGenerator(GroupBy.groupedColumnAccessor.name())
Expand Down
Loading